lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 30 Oct 2015 12:10:15 +0800
From:	"Hillf Danton" <hillf.zj@...baba-inc.com>
To:	<mhocko@...nel.org>, <linux-mm@...ck.org>
Cc:	"'Andrew Morton'" <akpm@...ux-foundation.org>,
	"'Linus Torvalds'" <torvalds@...ux-foundation.org>,
	"'Mel Gorman'" <mgorman@...e.de>,
	"'Johannes Weiner'" <hannes@...xchg.org>,
	"'Rik van Riel'" <riel@...hat.com>,
	"'David Rientjes'" <rientjes@...gle.com>,
	"'Tetsuo Handa'" <penguin-kernel@...ove.SAKURA.ne.jp>,
	"'LKML'" <linux-kernel@...r.kernel.org>,
	"'Michal Hocko'" <mhocko@...e.com>
Subject: Re: [RFC 1/3] mm, oom: refactor oom detection

> +/*
> + * Number of backoff steps for potentially reclaimable pages if the direct reclaim
> + * cannot make any progress. Each step will reduce 1/MAX_STALL_BACKOFF of the
> + * reclaimable memory.
> + */
> +#define MAX_STALL_BACKOFF 16
> +
>  static inline struct page *
>  __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  						struct alloc_context *ac)
> @@ -2984,6 +2991,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	enum migrate_mode migration_mode = MIGRATE_ASYNC;
>  	bool deferred_compaction = false;
>  	int contended_compaction = COMPACT_CONTENDED_NONE;
> +	struct zone *zone;
> +	struct zoneref *z;
> +	int stall_backoff = 0;
> 
>  	/*
>  	 * In the slowpath, we sanity check order to avoid ever trying to
> @@ -3135,13 +3145,56 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	if (gfp_mask & __GFP_NORETRY)
>  		goto noretry;
> 
> -	/* Keep reclaiming pages as long as there is reasonable progress */
> +	/*
> +	 * Do not retry high order allocations unless they are __GFP_REPEAT
> +	 * and even then do not retry endlessly.
> +	 */
>  	pages_reclaimed += did_some_progress;
> -	if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
> -	    ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
> -		/* Wait for some write requests to complete then retry */
> -		wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50);
> -		goto retry;
> +	if (order > PAGE_ALLOC_COSTLY_ORDER) {
> +		if (!(gfp_mask & __GFP_REPEAT) || pages_reclaimed >= (1<<order))
> +			goto noretry;
> +
> +		if (did_some_progress)
> +			goto retry;
> +	}
> +
> +	/*
> +	 * Be optimistic and consider all pages on reclaimable LRUs as usable
> +	 * but make sure we converge to OOM if we cannot make any progress after
> +	 * multiple consecutive failed attempts.
> +	 */
> +	if (did_some_progress)
> +		stall_backoff = 0;
> +	else
> +		stall_backoff = min(stall_backoff+1, MAX_STALL_BACKOFF);
> +
> +	/*
> +	 * Keep reclaiming pages while there is a chance this will lead somewhere.
> +	 * If none of the target zones can satisfy our allocation request even
> +	 * if all reclaimable pages are considered then we are screwed and have
> +	 * to go OOM.
> +	 */
> +	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, ac->nodemask) {
> +		unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
> +		unsigned long reclaimable;
> +		unsigned long target;
> +
> +		reclaimable = zone_reclaimable_pages(zone) +
> +			      zone_page_state(zone, NR_ISOLATED_FILE) +
> +			      zone_page_state(zone, NR_ISOLATED_ANON);
> +		target = reclaimable;
> +		target -= stall_backoff * (1 + target/MAX_STALL_BACKOFF);

		target = reclaimable - stall_backoff * (1 + target/MAX_STALL_BACKOFF);
		             = reclaimable - stall_backoff - stall_backoff  * (target/MAX_STALL_BACKOFF);

then the first stall_backoff looks unreasonable.
I guess you mean
		target	= reclaimable - target * (stall_backoff/MAX_STALL_BACKOFF);
			= reclaimable - stall_back * (target/MAX_STALL_BACKOFF);

> +		target += free;
> +
> +		/*
> +		 * Would the allocation succeed if we reclaimed the whole target?
> +		 */
> +		if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
> +				ac->high_zoneidx, alloc_flags, target)) {
> +			/* Wait for some write requests to complete then retry */
> +			wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
> +			goto retry;
> +		}
>  	}
> 
[...]
/*
> @@ -2734,10 +2730,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
>  		goto retry;
>  	}
> 
> -	/* Any of the zones still reclaimable?  Don't OOM. */
> -	if (zones_reclaimable)
> -		return 1;
> -

Looks cleanup of zones_reclaimable left.
>  	return 0;
>  }
> 
> --
> 2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists