linux-kernel - [PATCH] Provide an interface to limit total page cache.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <afe668f90701150139q26e41720lf06d6ee445a917b0@mail.gmail.com>
Date:	Mon, 15 Jan 2007 17:39:46 +0800
From:	"Roy Huang" <royhuang9@...il.com>
To:	linux-kernel@...r.kernel.org
Cc:	aubreylee@...il.com, nickpiggin@...oo.com.au, torvalds@...l.org
Subject: [PATCH] Provide an interface to limit total page cache.

A patch provide a interface to limit total page cache in
/proc/sys/vm/pagecache_ratio. The default value is 90 percent. Any
feedback is appreciated.

-Roy

diff -urp a/include/linux/pagemap.h b/include/linux/pagemap.h
--- a/include/linux/pagemap.h	2006-11-30 05:57:37.000000000 +0800
+++ b/include/linux/pagemap.h	2007-01-15 17:03:09.000000000 +0800
@@ -12,6 +12,12 @@
 #include <asm/uaccess.h>
 #include <linux/gfp.h>

+extern int pagecache_ratio;
+extern long pagecache_limit;
+
+int pagecache_ratio_sysctl_handler(struct ctl_table *, int,
+			struct file *, void __user *, size_t *, loff_t *);
+
 /*
  * Bits in mapping->flags.  The lower __GFP_BITS_SHIFT bits are the page
  * allocation mode flags.
diff -urp a/include/linux/sysctl.h b/include/linux/sysctl.h
--- a/include/linux/sysctl.h	2007-01-15 17:18:46.000000000 +0800
+++ b/include/linux/sysctl.h	2007-01-15 17:03:09.000000000 +0800
@@ -202,6 +202,7 @@ enum
 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
 	VM_MIN_SLAB=35,		 /* Percent pages ignored by zone reclaim */
+	VM_PAGECACHE_RATIO=36,  /* Percent memory is used as page cache */
 };


diff -urp a/kernel/sysctl.c b/kernel/sysctl.c
--- a/kernel/sysctl.c	2007-01-15 17:18:46.000000000 +0800
+++ b/kernel/sysctl.c	2007-01-15 17:03:09.000000000 +0800
@@ -1035,6 +1035,15 @@ static ctl_table vm_table[] = {
 		.extra1		= &zero,
 	},
 #endif
+	{
+		.ctl_name	= VM_PAGECACHE_RATIO,
+		.procname	= "pagecache_ratio",
+		.data		= &pagecache_ratio,
+		.maxlen		= sizeof(pagecache_ratio),
+		.mode		= 0644,
+		.proc_handler	= &pagecache_ratio_sysctl_handler,
+		.strategy	= &sysctl_intvec,
+	},
 	{ .ctl_name = 0 }
 };

diff -urp a/mm/filemap.c b/mm/filemap.c
--- a/mm/filemap.c	2007-01-15 17:18:46.000000000 +0800
+++ b/mm/filemap.c	2007-01-15 17:03:09.000000000 +0800
@@ -30,6 +30,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/cpuset.h>
+#include <linux/sysctl.h>
 #include "filemap.h"
 #include "internal.h"

@@ -108,6 +109,48 @@ generic_file_direct_IO(int rw, struct ki
  */

 /*
+ * Start release pagecache (via kswapd) at the percentage.
+ */
+int pagecache_ratio __read_mostly = 90;
+
+long pagecache_limit = 0;
+
+int setup_pagecache_limit(void)
+{
+	pagecache_limit = pagecache_ratio * nr_free_pagecache_pages() / 100;
+	return 0;
+}
+
+int pagecache_ratio_sysctl_handler(ctl_table *table, int write,
+	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	setup_pagecache_limit();
+	return 0;
+}
+
+static inline int balance_pagecache(void)
+{
+	if (global_page_state(NR_FILE_PAGES) > pagecache_limit) {
+		int nid, j;
+		pg_data_t *pgdat;
+		struct zone *zone;
+
+		for_each_online_node(nid) {
+			pgdat = NODE_DATA(nid);
+			for (j = 0; j < MAX_NR_ZONES; j++) {
+				zone = pgdat->node_zones + j;
+				wakeup_kswapd(zone, 0);
+			}
+		}
+	}
+
+	return 0;
+}
+
+module_init(setup_pagecache_limit)
+
+/*
  * Remove a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
  * is safe.  The caller must hold a write_lock on the mapping's tree_lock.
@@ -1085,6 +1128,8 @@ out:
 		page_cache_release(cached_page);
 	if (filp)
 		file_accessed(filp);
+
+	balance_pagecache();
 }
 EXPORT_SYMBOL(do_generic_mapping_read);

@@ -2212,6 +2257,8 @@ zero_length_segment:
 		status = filemap_write_and_wait(mapping);

 	pagevec_lru_add(&lru_pvec);
+	balance_pagecache();
+
 	return written ? written : status;
 }
 EXPORT_SYMBOL(generic_file_buffered_write);
diff -urp a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c	2007-01-15 17:18:46.000000000 +0800
+++ b/mm/vmscan.c	2007-01-15 17:03:09.000000000 +0800
@@ -1316,6 +1316,7 @@ static int kswapd(void *p)
 	order = 0;
 	for ( ; ; ) {
 		unsigned long new_order;
+		long over_limit;

 		try_to_freeze();

@@ -1335,6 +1336,9 @@ static int kswapd(void *p)
 		finish_wait(&pgdat->kswapd_wait, &wait);

 		balance_pgdat(pgdat, order);
+		over_limit = global_page_state(NR_FILE_PAGES) - pagecache_limit;
+		if (over_limit > 0)
+			shrink_all_memory(over_limit);
 	}
 	return 0;
 }
@@ -1350,8 +1354,10 @@ void wakeup_kswapd(struct zone *zone, in
 		return;

 	pgdat = zone->zone_pgdat;
-	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
-		return;
+	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) {
+		if (global_page_state(NR_FILE_PAGES) < pagecache_limit)
+			return;
+	}
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;
 	if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1361,7 +1367,6 @@ void wakeup_kswapd(struct zone *zone, in
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }

-#ifdef CONFIG_PM
 /*
  * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
  * from LRU lists system-wide, for given pass and priority, and returns the
@@ -1510,7 +1515,6 @@ out:

 	return ret;
 }
-#endif

 /* It's optimal to keep kswapds on the same CPUs as their memory, but
    not required for correctness.  So if the last cpu in a node goes
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/