lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 2 Mar 2012 14:31:06 +0900
From:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
To:	Konstantin Khlebnikov <khlebnikov@...nvz.org>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Hugh Dickins <hughd@...gle.com>,
	Johannes Weiner <jweiner@...hat.com>, linux-mm@...ck.org,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 6/7] mm/memcg: rework inactive_ratio calculation

On Wed, 29 Feb 2012 13:16:00 +0400
Konstantin Khlebnikov <khlebnikov@...nvz.org> wrote:

> This patch removes precalculated zone->inactive_ratio.
> Now it always calculated in inactive_anon_is_low() from current lru size.
> After that we can merge memcg and non-memcg cases and drop duplicated code.
> 
> We can drop precalculated ratio, because its calculation fast enough to do it
> each time. Plus precalculation uses zone size as basis, this estimation not
> always match with page lru size, for example if a significant proportion
> of memory occupied by kernel objects.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@...nvz.org>

Maybe good....but please don't change the user interface /proc/zoneinfo implicitly.
How about calculating inactive_ratio at reading /proc/zoneinfo ?

Thanks,
-Kame




> ---
>  include/linux/memcontrol.h |   16 --------
>  include/linux/mmzone.h     |    7 ----
>  mm/memcontrol.c            |   38 -------------------
>  mm/page_alloc.c            |   44 ----------------------
>  mm/vmscan.c                |   88 ++++++++++++++++++++++++++++----------------
>  mm/vmstat.c                |    6 +--
>  6 files changed, 58 insertions(+), 141 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index e2e1fac..7e114f8 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -117,10 +117,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
>  /*
>   * For memory reclaim.
>   */
> -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
> -				    struct zone *zone);
> -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
> -				    struct zone *zone);
>  int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
>  unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
>  					int nid, int zid, unsigned int lrumask);
> @@ -334,18 +330,6 @@ static inline bool mem_cgroup_disabled(void)
>  	return true;
>  }
>  
> -static inline int
> -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
> -{
> -	return 1;
> -}
> -
> -static inline int
> -mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
> -{
> -	return 1;
> -}
> -
>  static inline unsigned long
>  mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
>  				unsigned int lru_mask)
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index fdcd683..7edcf17 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -384,13 +384,6 @@ struct zone {
>  	/* Zone statistics */
>  	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
>  
> -	/*
> -	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
> -	 * this zone's LRU.  Maintained by the pageout code.
> -	 */
> -	unsigned int inactive_ratio;
> -
> -
>  	ZONE_PADDING(_pad2_)
>  	/* Rarely used or read-mostly fields */
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 2809531..4bc6835 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1171,44 +1171,6 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
>  	return ret;
>  }
>  
> -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
> -{
> -	unsigned long inactive_ratio;
> -	int nid = zone_to_nid(zone);
> -	int zid = zone_idx(zone);
> -	unsigned long inactive;
> -	unsigned long active;
> -	unsigned long gb;
> -
> -	inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
> -						BIT(LRU_INACTIVE_ANON));
> -	active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
> -					      BIT(LRU_ACTIVE_ANON));
> -
> -	gb = (inactive + active) >> (30 - PAGE_SHIFT);
> -	if (gb)
> -		inactive_ratio = int_sqrt(10 * gb);
> -	else
> -		inactive_ratio = 1;
> -
> -	return inactive * inactive_ratio < active;
> -}
> -
> -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
> -{
> -	unsigned long active;
> -	unsigned long inactive;
> -	int zid = zone_idx(zone);
> -	int nid = zone_to_nid(zone);
> -
> -	inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
> -						BIT(LRU_INACTIVE_FILE));
> -	active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
> -					      BIT(LRU_ACTIVE_FILE));
> -
> -	return (active > inactive);
> -}
> -
>  struct zone_reclaim_stat *
>  mem_cgroup_get_reclaim_stat_from_page(struct page *page)
>  {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index ea40034..2e90931 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5051,49 +5051,6 @@ void setup_per_zone_wmarks(void)
>  }
>  
>  /*
> - * The inactive anon list should be small enough that the VM never has to
> - * do too much work, but large enough that each inactive page has a chance
> - * to be referenced again before it is swapped out.
> - *
> - * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
> - * INACTIVE_ANON pages on this zone's LRU, maintained by the
> - * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
> - * the anonymous pages are kept on the inactive list.
> - *
> - * total     target    max
> - * memory    ratio     inactive anon
> - * -------------------------------------
> - *   10MB       1         5MB
> - *  100MB       1        50MB
> - *    1GB       3       250MB
> - *   10GB      10       0.9GB
> - *  100GB      31         3GB
> - *    1TB     101        10GB
> - *   10TB     320        32GB
> - */
> -static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
> -{
> -	unsigned int gb, ratio;
> -
> -	/* Zone size in gigabytes */
> -	gb = zone->present_pages >> (30 - PAGE_SHIFT);
> -	if (gb)
> -		ratio = int_sqrt(10 * gb);
> -	else
> -		ratio = 1;
> -
> -	zone->inactive_ratio = ratio;
> -}
> -
> -static void __meminit setup_per_zone_inactive_ratio(void)
> -{
> -	struct zone *zone;
> -
> -	for_each_zone(zone)
> -		calculate_zone_inactive_ratio(zone);
> -}
> -
> -/*
>   * Initialise min_free_kbytes.
>   *
>   * For small machines we want it small (128k min).  For large machines
> @@ -5131,7 +5088,6 @@ int __meminit init_per_zone_wmark_min(void)
>  	setup_per_zone_wmarks();
>  	refresh_zone_stat_thresholds();
>  	setup_per_zone_lowmem_reserve();
> -	setup_per_zone_inactive_ratio();
>  	return 0;
>  }
>  module_init(init_per_zone_wmark_min)
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index fe00a22..ab447df 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1750,29 +1750,38 @@ static void shrink_active_list(unsigned long nr_to_scan,
>  }
>  
>  #ifdef CONFIG_SWAP
> -static int inactive_anon_is_low_global(struct zone *zone)
> -{
> -	unsigned long active, inactive;
> -
> -	active = zone_page_state(zone, NR_ACTIVE_ANON);
> -	inactive = zone_page_state(zone, NR_INACTIVE_ANON);
> -
> -	if (inactive * zone->inactive_ratio < active)
> -		return 1;
> -
> -	return 0;
> -}
> -
>  /**
>   * inactive_anon_is_low - check if anonymous pages need to be deactivated
>   * @zone: zone to check
> - * @sc:   scan control of this context
>   *
>   * Returns true if the zone does not have enough inactive anon pages,
>   * meaning some active anon pages need to be deactivated.
> + *
> + * The inactive anon list should be small enough that the VM never has to
> + * do too much work, but large enough that each inactive page has a chance
> + * to be referenced again before it is swapped out.
> + *
> + * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
> + * INACTIVE_ANON pages on this zone's LRU, maintained by the
> + * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
> + * the anonymous pages are kept on the inactive list.
> + *
> + * total     target    max
> + * memory    ratio     inactive anon
> + * -------------------------------------
> + *   10MB       1         5MB
> + *  100MB       1        50MB
> + *    1GB       3       250MB
> + *   10GB      10       0.9GB
> + *  100GB      31         3GB
> + *    1TB     101        10GB
> + *   10TB     320        32GB
>   */
>  static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
>  {
> +	unsigned long active, inactive;
> +	unsigned int gb, ratio;
> +
>  	/*
>  	 * If we don't have swap space, anonymous page deactivation
>  	 * is pointless.
> @@ -1780,11 +1789,26 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
>  	if (!total_swap_pages)
>  		return 0;
>  
> -	if (!mem_cgroup_disabled())
> -		return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
> -						       mz->zone);
> +	if (mem_cgroup_disabled()) {
> +		active = zone_page_state(mz->zone, NR_ACTIVE_ANON);
> +		inactive = zone_page_state(mz->zone, NR_INACTIVE_ANON);
> +	} else {
> +		active = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
> +				zone_to_nid(mz->zone), zone_idx(mz->zone),
> +				BIT(LRU_ACTIVE_ANON));
> +		inactive = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
> +				zone_to_nid(mz->zone), zone_idx(mz->zone),
> +				BIT(LRU_INACTIVE_ANON));
> +	}
> +
> +	/* Total size in gigabytes */
> +	gb = (active + inactive) >> (30 - PAGE_SHIFT);
> +	if (gb)
> +		ratio = int_sqrt(10 * gb);
> +	else
> +		ratio = 1;
>  
> -	return inactive_anon_is_low_global(mz->zone);
> +	return inactive * ratio < active;
>  }
>  #else
>  static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
> @@ -1793,16 +1817,6 @@ static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
>  }
>  #endif
>  
> -static int inactive_file_is_low_global(struct zone *zone)
> -{
> -	unsigned long active, inactive;
> -
> -	active = zone_page_state(zone, NR_ACTIVE_FILE);
> -	inactive = zone_page_state(zone, NR_INACTIVE_FILE);
> -
> -	return (active > inactive);
> -}
> -
>  /**
>   * inactive_file_is_low - check if file pages need to be deactivated
>   * @mz: memory cgroup and zone to check
> @@ -1819,11 +1833,21 @@ static int inactive_file_is_low_global(struct zone *zone)
>   */
>  static int inactive_file_is_low(struct mem_cgroup_zone *mz)
>  {
> -	if (!mem_cgroup_disabled())
> -		return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
> -						       mz->zone);
> +	unsigned long active, inactive;
> +
> +	if (mem_cgroup_disabled()) {
> +		active = zone_page_state(mz->zone, NR_ACTIVE_FILE);
> +		inactive = zone_page_state(mz->zone, NR_INACTIVE_FILE);
> +	} else {
> +		active = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
> +				zone_to_nid(mz->zone), zone_idx(mz->zone),
> +				BIT(LRU_ACTIVE_FILE));
> +		inactive = mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
> +				zone_to_nid(mz->zone), zone_idx(mz->zone),
> +				BIT(LRU_INACTIVE_FILE));
> +	}
>  
> -	return inactive_file_is_low_global(mz->zone);
> +	return inactive < active;
>  }
>  
>  static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index f600557..2c813e1 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1017,11 +1017,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
>  	}
>  	seq_printf(m,
>  		   "\n  all_unreclaimable: %u"
> -		   "\n  start_pfn:         %lu"
> -		   "\n  inactive_ratio:    %u",
> +		   "\n  start_pfn:         %lu",
>  		   zone->all_unreclaimable,
> -		   zone->zone_start_pfn,
> -		   zone->inactive_ratio);
> +		   zone->zone_start_pfn);
>  	seq_putc(m, '\n');
>  }
>  
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ