lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20110311085833.874c6c0e.kamezawa.hiroyu@jp.fujitsu.com>
Date:	Fri, 11 Mar 2011 08:58:33 +0900
From:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
To:	Minchan Kim <minchan.kim@...il.com>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Andrew Vagin <avagin@...il.com>,
	Andrey Vagin <avagin@...nvz.org>, Mel Gorman <mel@....ul.ie>,
	KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>,
	linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] mm: check zone->all_unreclaimable in
 all_unreclaimable()

On Thu, 10 Mar 2011 15:58:29 +0900
Minchan Kim <minchan.kim@...il.com> wrote:

> Hi Kame,
> 
> Sorry for late response.
> I had a time to test this issue shortly because these day I am very busy.
> This issue was interesting to me.
> So I hope taking a time for enough testing when I have a time.
> I should find out root cause of livelock.
> 

Thanks. I and Kosaki-san reproduced the bug with swapless system.
Now, Kosaki-san is digging and found some issue with scheduler boost at OOM
and lack of enough "wait" in vmscan.c.

I myself made patch like attached one. This works well for returning TRUE at
all_unreclaimable() but livelock(deadlock?) still happens.
I wonder vmscan itself isn't a key for fixing issue.
Then, I'd like to wait for Kosaki-san's answer ;)

I'm now wondering how to catch fork-bomb and stop it (without using cgroup). 
I think the problem is that fork-bomb is faster than killall...

Thanks,
-Kame
==

This is just a debug patch.

---
 mm/vmscan.c |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 4 deletions(-)

Index: mmotm-0303/mm/vmscan.c
===================================================================
--- mmotm-0303.orig/mm/vmscan.c
+++ mmotm-0303/mm/vmscan.c
@@ -1983,9 +1983,55 @@ static void shrink_zones(int priority, s
 	}
 }
 
-static bool zone_reclaimable(struct zone *zone)
+static bool zone_seems_empty(struct zone *zone, struct scan_control *sc)
 {
-	return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+	unsigned long nr, wmark, free, isolated, lru;
+
+	/*
+	 * If scanned, zone->pages_scanned is incremented and this can
+ 	 * trigger OOM.
+ 	 */
+	if (sc->nr_scanned)
+		return false;
+
+	free = zone_page_state(zone, NR_FREE_PAGES);
+	isolated = zone_page_state(zone, NR_ISOLATED_FILE);
+	if (nr_swap_pages)
+		isolated += zone_page_state(zone, NR_ISOLATED_ANON);
+
+	/* In we cannot do scan, don't count LRU pages. */
+	if (!zone->all_unreclaimable) {
+		lru = zone_page_state(zone, NR_ACTIVE_FILE);
+		lru += zone_page_state(zone, NR_INACTIVE_FILE);
+		if (nr_swap_pages) {
+			lru += zone_page_state(zone, NR_ACTIVE_ANON);
+			lru += zone_page_state(zone, NR_INACTIVE_ANON);
+		}
+	} else
+		lru = 0;
+	nr = free + isolated + lru;
+	wmark = min_wmark_pages(zone);
+	wmark += zone->lowmem_reserve[gfp_zone(sc->gfp_mask)];
+	wmark += 1 << sc->order;
+	printk("thread %d/%ld all %d scanned %ld pages %ld/%ld/%ld/%ld/%ld/%ld\n",
+		current->pid, sc->nr_scanned, zone->all_unreclaimable,
+		zone->pages_scanned,
+		nr,free,isolated,lru,
+		zone_reclaimable_pages(zone), wmark);
+	/*
+	 * In some case (especially noswap), almost all page cache are paged out
+	 * and we'll see the amount of reclaimable+free pages is smaller than
+	 * zone->min. In this case, we canoot expect any recovery other
+	 * than OOM-KILL. We can't reclaim memory enough for usual tasks.
+	 */
+
+	return nr <= wmark;
+}
+
+static bool zone_reclaimable(struct zone *zone, struct scan_control *sc)
+{
+	/* zone_reclaimable_pages() can return 0, we need <= */
+	return zone->pages_scanned <= zone_reclaimable_pages(zone) * 6;
 }
 
 /*
@@ -2006,11 +2052,15 @@ static bool all_unreclaimable(struct zon
 			continue;
 		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 			continue;
-		if (zone_reclaimable(zone)) {
+		if (zone_seems_empty(zone, sc))
+			continue;
+		if (zone_reclaimable(zone, sc)) {
 			all_unreclaimable = false;
 			break;
 		}
 	}
+	if (all_unreclaimable)
+		printk("all_unreclaimable() returns TRUE\n");
 
 	return all_unreclaimable;
 }
@@ -2456,7 +2506,7 @@ loop_again:
 			if (zone->all_unreclaimable)
 				continue;
 			if (!compaction && nr_slab == 0 &&
-			    !zone_reclaimable(zone))
+			    !zone_reclaimable(zone, &sc))
 				zone->all_unreclaimable = 1;
 			/*
 			 * If we've done a decent amount of scanning and

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ