lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1390600984-13925-3-git-send-email-hannes@cmpxchg.org>
Date:	Fri, 24 Jan 2014 17:03:04 -0500
From:	Johannes Weiner <hannes@...xchg.org>
To:	Andrew Morton <akpm@...ux-foundation.org>
Cc:	Tejun Heo <tj@...nel.org>, Rik van Riel <riel@...hat.com>,
	Mel Gorman <mgorman@...e.de>, linux-mm@...ck.org,
	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [patch 2/2] mm: page-writeback: do not count anon pages as dirtyable memory

The VM is currently heavily tuned to avoid swapping.  Whether that is
good or bad is a separate discussion, but as long as the VM won't swap
to make room for dirty cache, we can not consider anonymous pages when
calculating the amount of dirtyable memory, the baseline to which
dirty_background_ratio and dirty_ratio are applied.

A simple workload that occupies a significant size (40+%, depending on
memory layout, storage speeds etc.) of memory with anon/tmpfs pages
and uses the remainder for a streaming writer demonstrates this
problem.  In that case, the actual cache pages are a small fraction of
what is considered dirtyable overall, which results in an relatively
large portion of the cache pages to be dirtied.  As kswapd starts
rotating these, random tasks enter direct reclaim and stall on IO.

Only consider free pages and file pages dirtyable.

Signed-off-by: Johannes Weiner <hannes@...xchg.org>
---
 include/linux/vmstat.h |  2 --
 mm/internal.h          |  1 -
 mm/page-writeback.c    |  6 ++++--
 mm/vmscan.c            | 23 +----------------------
 4 files changed, 5 insertions(+), 27 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e4b948080d20..a67b38415768 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -142,8 +142,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
 	return x;
 }
 
-extern unsigned long global_reclaimable_pages(void);
-
 #ifdef CONFIG_NUMA
 /*
  * Determine the per node value of a stat item. This function
diff --git a/mm/internal.h b/mm/internal.h
index 684f7aa9692a..8b6cfd63b5a5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -85,7 +85,6 @@ extern unsigned long highest_memmap_pfn;
  */
 extern int isolate_lru_page(struct page *page);
 extern void putback_lru_page(struct page *page);
-extern unsigned long zone_reclaimable_pages(struct zone *zone);
 extern bool zone_reclaimable(struct zone *zone);
 
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 79cf52b058a7..29e129478644 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -205,7 +205,8 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
 	nr_pages = zone_page_state(zone, NR_FREE_PAGES);
 	nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
 
-	nr_pages += zone_reclaimable_pages(zone);
+	nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
+	nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
 
 	return nr_pages;
 }
@@ -259,7 +260,8 @@ static unsigned long global_dirtyable_memory(void)
 	x = global_page_state(NR_FREE_PAGES);
 	x -= min(x, dirty_balance_reserve);
 
-	x += global_reclaimable_pages();
+	x += global_page_state(NR_INACTIVE_FILE);
+	x += global_page_state(NR_ACTIVE_FILE);
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eea668d9cff6..05e6095159dc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -147,7 +147,7 @@ static bool global_reclaim(struct scan_control *sc)
 }
 #endif
 
-unsigned long zone_reclaimable_pages(struct zone *zone)
+static unsigned long zone_reclaimable_pages(struct zone *zone)
 {
 	int nr;
 
@@ -3297,27 +3297,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
-/*
- * The reclaimable count would be mostly accurate.
- * The less reclaimable pages may be
- * - mlocked pages, which will be moved to unevictable list when encountered
- * - mapped pages, which may require several travels to be reclaimed
- * - dirty pages, which is not "instantly" reclaimable
- */
-unsigned long global_reclaimable_pages(void)
-{
-	int nr;
-
-	nr = global_page_state(NR_ACTIVE_FILE) +
-	     global_page_state(NR_INACTIVE_FILE);
-
-	if (get_nr_swap_pages() > 0)
-		nr += global_page_state(NR_ACTIVE_ANON) +
-		      global_page_state(NR_INACTIVE_ANON);
-
-	return nr;
-}
-
 #ifdef CONFIG_HIBERNATION
 /*
  * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
-- 
1.8.4.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ