lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 18 Jun 2014 11:00:52 +0200
From:	Jerome Marchand <jmarchan@...hat.com>
To:	Chen Yucong <slaoub@...il.com>, akpm@...ux-foundation.org
CC:	minchan@...nel.org, mgorman@...e.de, hannes@...xchg.org,
	mhocko@...e.cz, riel@...hat.com, linux-mm@...ck.org,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH] mm/vmscan.c: fix an implementation flaw in proportional
 scanning

On 06/17/2014 06:55 AM, Chen Yucong wrote:
> Via https://lkml.org/lkml/2013/4/10/897, we can know that the relative design
> idea is to keep
> 
>     scan_target[anon] : scan_target[file]
>         == really_scanned_num[anon] : really_scanned_num[file]
> 
> But we can find the following snippet in shrink_lruvec():
> 
>     if (nr_file > nr_anon) {
>         ...
>     } else {
>         ...
>     }
> 
> However, the above code fragment broke the design idea. We can assume:
> 
>       nr[LRU_ACTIVE_FILE] = 30
>       nr[LRU_INACTIVE_FILE] = 30
>       nr[LRU_ACTIVE_ANON] = 0
>       nr[LRU_INACTIVE_ANON] = 40
> 
> When the value of (nr_reclaimed < nr_to_reclaim) become false, there are
> the following results:
> 
>       nr[LRU_ACTIVE_FILE] = 15
>       nr[LRU_INACTIVE_FILE] = 15
>       nr[LRU_ACTIVE_ANON] = 0
>       nr[LRU_INACTIVE_ANON] = 25
>       nr_file = 30
>       nr_anon = 25
>       file_percent = 30 / 60 = 0.5
>       anon_percent = 25 / 40 = 0.65
> 
> According to the above design idea, we should scan some pages from ANON,
> but in fact we execute the an error code path due to "if (nr_file > nr_anon)".
> In this way, nr[lru] is likely to be a negative number. Luckily,
> "nr[lru] -= min(nr[lru], nr_scanned)" can help us to filter this situation,
> but it has rebelled against our design idea.
> 
> Signed-off-by: Chen Yucong <slaoub@...il.com>
> ---
>  mm/vmscan.c |   39 ++++++++++++++++++---------------------
>  1 file changed, 18 insertions(+), 21 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index a8ffe4e..2c35e34 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2057,8 +2057,7 @@ out:
>  static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
>  {
>  	unsigned long nr[NR_LRU_LISTS];
> -	unsigned long targets[NR_LRU_LISTS];
> -	unsigned long nr_to_scan;
> +	unsigned long file_target, anon_target;
>  	enum lru_list lru;
>  	unsigned long nr_reclaimed = 0;
>  	unsigned long nr_to_reclaim = sc->nr_to_reclaim;
> @@ -2067,8 +2066,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
>  
>  	get_scan_count(lruvec, sc, nr);
>  
> -	/* Record the original scan target for proportional adjustments later */
> -	memcpy(targets, nr, sizeof(nr));
> +	file_target = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE];
> +	anon_target = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON];

Current code adds 1 to these value to avoid divide by zero error.

>  
>  	/*
>  	 * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal
> @@ -2087,8 +2086,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
>  	blk_start_plug(&plug);
>  	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
>  					nr[LRU_INACTIVE_FILE]) {
> -		unsigned long nr_anon, nr_file, percentage;
> -		unsigned long nr_scanned;
> +		unsigned long nr_anon, nr_file, file_percent, anon_percent;
> +		unsigned long nr_to_scan, nr_scanned, percentage;
>  
>  		for_each_evictable_lru(lru) {
>  			if (nr[lru]) {
> @@ -2122,16 +2121,19 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
>  		if (!nr_file || !nr_anon)
>  			break;
>  
> -		if (nr_file > nr_anon) {
> -			unsigned long scan_target = targets[LRU_INACTIVE_ANON] +
> -						targets[LRU_ACTIVE_ANON] + 1;
> +		file_percent = nr_file * 100 / file_target;
> +		anon_percent = nr_anon * 100 / anon_target;

Here it could happen.

Jerome

> +
> +		if (file_percent > anon_percent) {
>  			lru = LRU_BASE;
> -			percentage = nr_anon * 100 / scan_target;
> +			nr_scanned = file_target - nr_file;
> +			nr_to_scan = file_target * (100 - anon_percent) / 100;
> +			percentage = nr[LRU_FILE] * 100 / nr_file;
>  		} else {
> -			unsigned long scan_target = targets[LRU_INACTIVE_FILE] +
> -						targets[LRU_ACTIVE_FILE] + 1;
>  			lru = LRU_FILE;
> -			percentage = nr_file * 100 / scan_target;
> +			nr_scanned = anon_target - nr_anon;
> +			nr_to_scan = anon_target * (100 - file_percent) / 100;
> +			percentage = nr[LRU_BASE] * 100 / nr_anon;
>  		}
>  
>  		/* Stop scanning the smaller of the LRU */
> @@ -2143,14 +2145,9 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
>  		 * scan target and the percentage scanning already complete
>  		 */
>  		lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE;
> -		nr_scanned = targets[lru] - nr[lru];
> -		nr[lru] = targets[lru] * (100 - percentage) / 100;
> -		nr[lru] -= min(nr[lru], nr_scanned);
> -
> -		lru += LRU_ACTIVE;
> -		nr_scanned = targets[lru] - nr[lru];
> -		nr[lru] = targets[lru] * (100 - percentage) / 100;
> -		nr[lru] -= min(nr[lru], nr_scanned);
> +		nr_to_scan -= min(nr_to_scan, nr_scanned);
> +		nr[lru] = nr_to_scan * percentage / 100;
> +		nr[lru + LRU_ACTIVE] = nr_to_scan - nr[lru];
>  
>  		scan_adjusted = true;
>  	}
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ