lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4F50678C.6010800@openvz.org>
Date:	Fri, 02 Mar 2012 10:24:12 +0400
From:	Konstantin Khlebnikov <khlebnikov@...nvz.org>
To:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
CC:	Andrew Morton <akpm@...ux-foundation.org>,
	Hugh Dickins <hughd@...gle.com>,
	Johannes Weiner <jweiner@...hat.com>,
	"linux-mm@...ck.org" <linux-mm@...ck.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 6/7] mm/memcg: rework inactive_ratio calculation

KAMEZAWA Hiroyuki wrote:
> On Wed, 29 Feb 2012 13:16:00 +0400
> Konstantin Khlebnikov<khlebnikov@...nvz.org>  wrote:
>
>> This patch removes precalculated zone->inactive_ratio.
>> Now it always calculated in inactive_anon_is_low() from current lru size.
>> After that we can merge memcg and non-memcg cases and drop duplicated code.
>>
>> We can drop precalculated ratio, because its calculation fast enough to do it
>> each time. Plus precalculation uses zone size as basis, this estimation not
>> always match with page lru size, for example if a significant proportion
>> of memory occupied by kernel objects.om memory cgroup which is triggered this memory reclaim.
This is more reason
>>
>> Signed-off-by: Konstantin Khlebnikov<khlebnikov@...nvz.org>
>
> Maybe good....but please don't change the user interface /proc/zoneinfo implicitly.
> How about calculating inactive_ratio at reading /proc/zoneinfo ?

I don't know... Anybody need this?
Plus now it work in per-lruvec manner, why we should show it per-zone?

This field was introduced not long ago, in v2.6.27-5589-g556adec
For example prev_priority was there before v2.6.12 and was killed in v2.6.35-5854-g25edde0

>
> Thanks,
> -Kame
>
>
>
>
>> ---
>>   include/linux/memcontrol.h |   16 --------
>>   include/linux/mmzone.h     |    7 ----
>>   mm/memcontrol.c            |   38 -------------------
>>   mm/page_alloc.c            |   44 ----------------------
>>   mm/vmscan.c                |   88 ++++++++++++++++++++++++++++----------------
>>   mm/vmstat.c                |    6 +--
>>   6 files changed, 58 insertions(+), 141 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index e2e1fac..7e114f8 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -117,10 +117,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
>>   /*
>>    * For memory reclaim.
>>    */
>> -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
>> -                                 struct zone *zone);
>> -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
>> -                                 struct zone *zone);
>>   int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
>>   unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
>>                                        int nid, int zid, unsigned int lrumask);
>> @@ -334,18 +330,6 @@ static inline bool mem_cgroup_disabled(void)
>>        return true;
>>   }
>>
>> -static inline int
>> -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
>> -{
>> -     return 1;
>> -}
>> -
>> -static inline int
>> -mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
>> -{
>> -     return 1;
>> -}
>> -
>>   static inline unsigned long
>>   mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
>>                                unsigned int lru_mask)
>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>> index fdcd683..7edcf17 100644
>> --- a/include/linux/mmzone.h
>> +++ b/include/linux/mmzone.h
>> @@ -384,13 +384,6 @@ struct zone {
>>        /* Zone statistics */
>>        atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
>>
>> -     /*
>> -      * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
>> -      * this zone's LRU.  Maintained by the pageout code.
>> -      */
>> -     unsigned int inactive_ratio;
>> -
>> -
>>        ZONE_PADDING(_pad2_)
>>        /* Rarely used or read-mostly fields */
>>
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 2809531..4bc6835 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -1171,44 +1171,6 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
>>        return ret;
>>   }
>>
>> -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
>> -{
>> -     unsigned long inactive_ratio;
>> -     int nid = zone_to_nid(zone);
>> -     int zid = zone_idx(zone);
>> -     unsigned long inactive;
>> -     unsigned long active;
>> -     unsigned long gb;
>> -
>> -     inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
>> -                                             BIT(LRU_INACTIVE_ANON));
>> -     active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
>> -                                           BIT(LRU_ACTIVE_ANON));
>> -
>> -     gb = (inactive + active)>>  (30 - PAGE_SHIFT);
>> -     if (gb)
>> -             inactive_ratio = int_sqrt(10 * gb);
>> -     else
>> -             inactive_ratio = 1;
>> -
>> -     return inactive * inactive_ratio<  active;
>> -}
>> -
>> -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
>> -{
>> -     unsigned long active;
>> -     unsigned long inactive;
>> -     int zid = zone_idx(zone);
>> -     int nid = zone_to_nid(zone);
>> -
>> -     inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
>> -                                             BIT(LRU_INACTIVE_FILE));
>> -     active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
>> -                                           BIT(LRU_ACTIVE_FILE));
>> -
>> -     return (active>  inactive);
>> -}
>> -
>>   struct zone_reclaim_stat *
>>   mem_cgroup_get_reclaim_stat_from_page(struct page *page)
>>   {
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index ea40034..2e90931 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -5051,49 +5051,6 @@ void setup_per_zone_wmarks(void)
>>   }
>>
>>   /*
>> - * The inactive anon list should be small enough that the VM never has to
>> - * do too much work, but large enough that each inactive page has a chance
>> - * to be referenced again before it is swapped out.
>> - *
>> - * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
>> - * INACTIVE_ANON pages on this zone's LRU, maintained by the
>> - * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
>> - * the anonymous pages are kept on the inactive list.
>> - *
>> - * total     target    max
>> - * memory    ratio     inactive anon
>> - * -------------------------------------
>> - *   10MB       1         5MB
>> - *  100MB       1        50MB
>> - *    1GB       3       250MB
>> - *   10GB      10       0.9GB
>> - *  100GB      31         3GB
>> - *    1TB     101        10GB
>> - *   10TB     320        32GB
>> - */
>> -static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
>> -{
>> -     unsigned int gb, ratio;
>> -
>> -     /* Zone size in gigabytes */
>> -     gb = zone->present_pages>>  (30 - PAGE_SHIFT);
>> -     if (gb)
>> -             ratio = int_sqrt(10 * gb);
>> -     else
>> -             ratio = 1;
>> -
>> -     zone->inactive_ratio = ratio;
>> -}
>> -
>> -static void __meminit setup_per_zone_inactive_ratio(void)
>> -{
>> -     struct zone *zone;
>> -
>> -     for_each_zone(zone)
>> -             calculate_zone_inactive_ratio(zone);
>> -}
>> -
>> -/*
>>    * Initialise min_free_kbytes.
>>    *
>>    * For small machines we want it small (128k min).  For large machines
>> @@ -5131,7 +5088,6 @@ int __meminit init_per_zone_wmark_min(void)
>>        setup_per_zone_wmarks();
>>        refresh_zone_stat_thresholds();
>>        setup_per_zone_lowmem_reserve();
>> -     setup_per_zone_inactive_ratio();
>>        return 0;
>>   }
>>   module_init(init_per_zone_wmark_min)
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index fe00a22..ab447df 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -1750,29 +1750,38 @@ static void shrink_active_list(unsigned long nr_to_scan,
>>   }
>>
>>   #ifdef CONFIG_SWAP
>> -static int inactive_anon_is_low_global(struct zone *zone)
>> -{
>> -     unsigned long active, inactive;
>> -
>> -     active = zone_page_state(zone, NR_ACTIVE_ANON);
>> -     inactive = zone_page_state(zone, NR_INACTIVE_ANON);
>> -
>> -     if (inactive * zone->inactive_ratio<  active)
>> -             return 1;
>> -
>> -     return 0;
>> -}
>> -
>>   /**
>>    * inactive_anon_is_low - check if anonymous pages need to be deactivated
>>    * @zone: zone to check
>> - * @sc:   scan control of this context
>>    *
>>    * Returns true if the zone does not have enough inactive anon pages,
>>    * meaning some active anon pages need to be deactivated.
>> + *
>> + * The inactive anon list should be small enough that the VM never has to
>> + * do too much work, but large enough that each inactive page has a chance
>> + * to be referenced again before it is swapped out.
>> + *
>> + * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
>> + * INACTIVE_ANON pages on this zone's LRU, maintained by the
>> + * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
>> + * the anonymous pages are kept on the inactive list.
>> + *
>> + * total     target    max
>> + * memory    ratio     inactive anon
>> + * -------------------------------------
>> + *   10MB       1         5MB
>> + *  100MB       1        50MB
>> + *    1GB       3       250MB
>> + *   10GB      10       0.9GB
>> + *  100GB      31         3GB
>> + *    1TB     101        10GB
>> + *   10TB     320        32GB
>>    */
>>   static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
>>   {
>> +     unsigned long active, inactive;
>> +     unsigned int gb, ratio;
>> +
>>        /*
>>         * If we don't have swap space, anonymous page deactivation
>>         * is pointless.
>> @@ -1780,11 +1789,26 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
>>        if (!total_swap_pages)
>>                return 0;
>>
>> -     if (!mem_cgroup_disabled())
>> -             return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
>> -                                                    mz->zone);
>> +     if (mem_cgroup_disabled()) {
>> +             active = zone_page_state(mz->zone, NR_ACTIVE_ANON);
>> +             inactive = zone_page_state(mz->zone, NR_INACTIVE_ANON);
>> +     } else {
>> +             active = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
>> +                             zone_to_nid(mz->zone), zone_idx(mz->zone),
>> +                             BIT(LRU_ACTIVE_ANON));
>> +             inactive = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
>> +                             zone_to_nid(mz->zone), zone_idx(mz->zone),
>> +                             BIT(LRU_INACTIVE_ANON));
>> +     }
>> +
>> +     /* Total size in gigabytes */
>> +     gb = (active + inactive)>>  (30 - PAGE_SHIFT);
>> +     if (gb)
>> +             ratio = int_sqrt(10 * gb);
>> +     else
>> +             ratio = 1;
>>
>> -     return inactive_anon_is_low_global(mz->zone);
>> +     return inactive * ratio<  active;
>>   }
>>   #else
>>   static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
>> @@ -1793,16 +1817,6 @@ static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
>>   }
>>   #endif
>>
>> -static int inactive_file_is_low_global(struct zone *zone)
>> -{
>> -     unsigned long active, inactive;
>> -
>> -     active = zone_page_state(zone, NR_ACTIVE_FILE);
>> -     inactive = zone_page_state(zone, NR_INACTIVE_FILE);
>> -
>> -     return (active>  inactive);
>> -}
>> -
>>   /**
>>    * inactive_file_is_low - check if file pages need to be deactivated
>>    * @mz: memory cgroup and zone to check
>> @@ -1819,11 +1833,21 @@ static int inactive_file_is_low_global(struct zone *zone)
>>    */
>>   static int inactive_file_is_low(struct mem_cgroup_zone *mz)
>>   {
>> -     if (!mem_cgroup_disabled())
>> -             return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
>> -                                                    mz->zone);
>> +     unsigned long active, inactive;
>> +
>> +     if (mem_cgroup_disabled()) {
>> +             active = zone_page_state(mz->zone, NR_ACTIVE_FILE);
>> +             inactive = zone_page_state(mz->zone, NR_INACTIVE_FILE);
>> +     } else {
>> +             active = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
>> +                             zone_to_nid(mz->zone), zone_idx(mz->zone),
>> +                             BIT(LRU_ACTIVE_FILE));
>> +             inactive = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
>> +                             zone_to_nid(mz->zone), zone_idx(mz->zone),
>> +                             BIT(LRU_INACTIVE_FILE));
>> +     }
>>
>> -     return inactive_file_is_low_global(mz->zone);
>> +     return inactive<  active;
>>   }
>>
>>   static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
>> diff --git a/mm/vmstat.c b/mm/vmstat.c
>> index f600557..2c813e1 100644
>> --- a/mm/vmstat.c
>> +++ b/mm/vmstat.c
>> @@ -1017,11 +1017,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
>>        }
>>        seq_printf(m,
>>                   "\n  all_unreclaimable: %u"
>> -                "\n  start_pfn:         %lu"
>> -                "\n  inactive_ratio:    %u",
>> +                "\n  start_pfn:         %lu",
>>                   zone->all_unreclaimable,
>> -                zone->zone_start_pfn,
>> -                zone->inactive_ratio);
>> +                zone->zone_start_pfn);
>>        seq_putc(m, '\n');
>>   }
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@...r.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ