lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20080606180447.5487a6e8.akpm@linux-foundation.org>
Date:	Fri, 6 Jun 2008 18:04:47 -0700
From:	Andrew Morton <akpm@...ux-foundation.org>
To:	Rik van Riel <riel@...hat.com>
Cc:	linux-kernel@...r.kernel.org, lee.schermerhorn@...com,
	kosaki.motohiro@...fujitsu.com
Subject: Re: [PATCH -mm 08/25] add some sanity checks to get_scan_ratio

On Fri, 06 Jun 2008 16:28:46 -0400
Rik van Riel <riel@...hat.com> wrote:

> From: Rik van Riel <riel@...hat.com>
> 
> The access ratio based scan rate determination in get_scan_ratio
> works ok in most situations, but needs to be corrected in some
> corner cases:
> - if we run out of swap space, do not bother scanning the anon LRUs
> - if we have already freed all of the page cache, we need to scan
>   the anon LRUs

Strange.  We'll never free *all* the pagecache?

> - restore the *actual* access ratio based scan rate algorithm, the
>   previous versions of this patch series had the wrong version
> - scale the number of pages added to zone->nr_scan[l]
> 
> ...
>
> @@ -1180,15 +1191,19 @@ static void get_scan_ratio(struct zone *
>  	file  = zone_page_state(zone, NR_ACTIVE_FILE) +
>  		zone_page_state(zone, NR_INACTIVE_FILE);
>  
> -	rotate_sum = zone->recent_rotated_file + zone->recent_rotated_anon;
> -
>  	/* Keep a floating average of RECENT references. */
> -	if (unlikely(rotate_sum > min(anon, file))) {
> +	if (unlikely(zone->recent_scanned_anon > anon / zone->inactive_ratio)) {
>  		spin_lock_irq(&zone->lru_lock);
> -		zone->recent_rotated_file /= 2;
> +		zone->recent_scanned_anon /= 2;
>  		zone->recent_rotated_anon /= 2;
>  		spin_unlock_irq(&zone->lru_lock);
> -		rotate_sum /= 2;
> +	}
> +
> +	if (unlikely(zone->recent_scanned_file > file / 4)) {

I see nothing in the changelog about this and there are no comments. 
How can a reader possibly work out what you were thinking when this
was typed in??

> +		spin_lock_irq(&zone->lru_lock);
> +		zone->recent_scanned_file /= 2;
> +		zone->recent_rotated_file /= 2;
> +		spin_unlock_irq(&zone->lru_lock);
>  	}
>  
>  	/*
> @@ -1201,23 +1216,33 @@ static void get_scan_ratio(struct zone *
>  	/*
>  	 *                  anon       recent_rotated_anon
>  	 * %anon = 100 * ----------- / ------------------- * IO cost
> -	 *               anon + file       rotate_sum
> +	 *               anon + file   recent_scanned_anon
>  	 */
> -	ap = (anon_prio * anon) / (anon + file + 1);
> -	ap *= rotate_sum / (zone->recent_rotated_anon + 1);
> -	if (ap == 0)
> -		ap = 1;
> -	else if (ap > 100)
> -		ap = 100;
> -	percent[0] = ap;
> -
> -	fp = (file_prio * file) / (anon + file + 1);
> -	fp *= rotate_sum / (zone->recent_rotated_file + 1);
> -	if (fp == 0)
> -		fp = 1;
> -	else if (fp > 100)
> -		fp = 100;
> -	percent[1] = fp;
> +	ap = (anon_prio + 1) * (zone->recent_scanned_anon + 1);
> +	ap /= zone->recent_rotated_anon + 1;
> +
> +	fp = (file_prio + 1) * (zone->recent_scanned_file + 1);
> +	fp /= zone->recent_rotated_file + 1;
> +
> +	/* Normalize to percentages */
> +	percent[0] = 100 * ap / (ap + fp + 1);
> +	percent[1] = 100 - percent[0];
> +
> +	free = zone_page_state(zone, NR_FREE_PAGES);
> +
> +	/*
> +	 * If we have no swap space, do not bother scanning anon pages.
> +	 */
> +	if (nr_swap_pages <= 0) {
> +		percent[0] = 0;
> +		percent[1] = 100;
> +	}
> +	/*
> +	 * If we already freed most file pages, scan the anon pages
> +	 * regardless of the page access ratios or swappiness setting.
> +	 */
> +	else if (file + free <= zone->pages_high)
> +		percent[0] = 100;
>  }

Perhaps the (nr_swap_pages <= 0) test could happen earlier on.

Please quadruple-check this code like a paranoid maniac looking for
underflows, overflows and divides-by-zero.  Bear in mind that x/(y+1)
can get a div-by-zero for sufficiently-unepected values of y.

The layout of the last bit is misleading, IMO.  Better and more typical
would be:

	if (nr_swap_pages <= 0) {
		/*
		 * If we have no swap space, do not bother scanning anon pages.
		 */
		percent[0] = 0;
		percent[1] = 100;
	} else if (file + free <= zone->pages_high) {
		/*
		 * If we already freed most file pages, scan the anon pages
		 * regardless of the page access ratios or swappiness setting.
		 */
		percent[0] = 100;
	}

(Was there no need to wite percent[1] here?)

>  
> @@ -1238,13 +1263,17 @@ static unsigned long shrink_zone(int pri
>  	for_each_lru(l) {
>  		if (scan_global_lru(sc)) {
>  			int file = is_file_lru(l);
> +			int scan;
>  			/*
>  			 * Add one to nr_to_scan just to make sure that the
> -			 * kernel will slowly sift through the active list.
> +			 * kernel will slowly sift through each list.
>  			 */
> -			zone->nr_scan[l] += (zone_page_state(zone,
> -				NR_INACTIVE_ANON + l) >> priority) + 1;
> -			nr[l] = zone->nr_scan[l] * percent[file] / 100;
> +			scan = zone_page_state(zone, NR_INACTIVE_ANON + l);
> +			scan >>= priority;
> +			scan = (scan * percent[file]) / 100;

Oh, so that's what the [0] and [1] in get_scan_ratio() mean.  Perhaps
doing this:

	if (nr_swap_pages <= 0) {
		percent[0] = 0;		/* anon */
		percent[1] = 100;	/* file */

would clarify things.  But much better would be

/* comment goes here */
struct scan_ratios {
	unsigned long anon;
	unsigned long file;
};

no?


> +			zone->nr_scan[l] += scan + 1;
> +			nr[l] = zone->nr_scan[l];
>  			if (nr[l] >= sc->swap_cluster_max)
>  				zone->nr_scan[l] = 0;
>  			else
> @@ -1261,7 +1290,7 @@ static unsigned long shrink_zone(int pri
>  	}
>  
>  	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
> -						 nr[LRU_INACTIVE_FILE]) {
> +					nr[LRU_INACTIVE_FILE]) {
>  		for_each_lru(l) {
>  			if (nr[l]) {
>  				nr_to_scan = min(nr[l],
> @@ -1274,6 +1303,13 @@ static unsigned long shrink_zone(int pri
>  		}
>  	}
>  
> +	/*
> +	 * Even if we did not try to evict anon pages at all, we want to
> +	 * rebalance the anon lru active/inactive ratio.
> +	 */
> +	if (scan_global_lru(sc) && inactive_anon_low(zone))
> +		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
> +
>  	throttle_vm_writeout(sc->gfp_mask);
>  	return nr_reclaimed;
>  }
> Index: linux-2.6.26-rc2-mm1/include/linux/mmzone.h
> ===================================================================
> --- linux-2.6.26-rc2-mm1.orig/include/linux/mmzone.h	2008-05-28 12:09:06.000000000 -0400
> +++ linux-2.6.26-rc2-mm1/include/linux/mmzone.h	2008-05-28 12:11:51.000000000 -0400
> @@ -289,6 +289,8 @@ struct zone {
>  
>  	unsigned long		recent_rotated_anon;
>  	unsigned long		recent_rotated_file;
> +	unsigned long		recent_scanned_anon;
> +	unsigned long		recent_scanned_file;

I think struct zone is sufficiently important and obscure that
field-by-field /*documentation*/ is needed.  Not as kerneldoc, please -
better to do it at the definition site

>  	unsigned long		pages_scanned;	   /* since last reclaim */
>  	unsigned long		flags;		   /* zone flags, see below */
> Index: linux-2.6.26-rc2-mm1/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.26-rc2-mm1.orig/mm/page_alloc.c	2008-05-28 12:09:06.000000000 -0400
> +++ linux-2.6.26-rc2-mm1/mm/page_alloc.c	2008-05-28 12:11:51.000000000 -0400
> @@ -3512,7 +3512,8 @@ static void __paginginit free_area_init_
>  		}
>  		zone->recent_rotated_anon = 0;
>  		zone->recent_rotated_file = 0;
> -//TODO recent_scanned_* ???
> +		zone->recent_scanned_anon = 0;
> +		zone->recent_scanned_file = 0;
>  		zap_zone_vm_stats(zone);
>  		zone->flags = 0;
>  		if (!size)
> Index: linux-2.6.26-rc2-mm1/mm/swap.c
> ===================================================================
> --- linux-2.6.26-rc2-mm1.orig/mm/swap.c	2008-05-28 12:09:06.000000000 -0400
> +++ linux-2.6.26-rc2-mm1/mm/swap.c	2008-05-28 12:11:51.000000000 -0400
> @@ -176,8 +176,8 @@ void activate_page(struct page *page)
>  
>  	spin_lock_irq(&zone->lru_lock);
>  	if (PageLRU(page) && !PageActive(page)) {
> -		int lru = LRU_BASE;
> -		lru += page_file_cache(page);
> +		int file = page_file_cache(page);
> +		int lru = LRU_BASE + file;
>  		del_page_from_lru_list(zone, page, lru);
>  
>  		SetPageActive(page);
> @@ -185,6 +185,15 @@ void activate_page(struct page *page)
>  		add_page_to_lru_list(zone, page, lru);
>  		__count_vm_event(PGACTIVATE);
>  		mem_cgroup_move_lists(page, true);
> +
> +		if (file) {
> +			zone->recent_scanned_file++;
> +			zone->recent_rotated_file++;
> +		} else {
> +			/* Can this happen?  Maybe through tmpfs... */

What's the status here?

> +			zone->recent_scanned_anon++;
> +			zone->recent_rotated_anon++;
> +		}
>  	}
>  	spin_unlock_irq(&zone->lru_lock);
>  }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ