lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20070319122740.286f602e.randy.dunlap@oracle.com>
Date:	Mon, 19 Mar 2007 12:27:40 -0700
From:	Randy Dunlap <randy.dunlap@...cle.com>
To:	"H. Peter Anvin" <hpa@...or.com>
Cc:	Andrew Morton <akpm@...l.org>, "J.H." <warthog9@...nel.org>,
	kernel list <linux-kernel@...r.kernel.org>
Subject: [PATCH] sysctl: vfs_cache_divisor

On Sat, 06 Jan 2007 12:18:39 -0800 H. Peter Anvin wrote:

> Andrew Morton wrote:
> >>>
> >>> The most fundamental problem seems to be that I can't tell currnt Linux 
> >>> kernels that the dcache/icache is precious, and that it's way too eager 
> >>> to dump dcache and icache in favour of data blocks.  If I could do that, 
> >>> this problem would be much, much smaller.
> > 
> > Usually people complain about the exact opposite of this.
> 
> Yeah, but we constantly have all-filesystem sweeps, and being able to 
> retain those in memory would be a key to performance, *especially* from 
> the upload latency standpoint.
> 
> >> Isn't setting the vm.vfs_cache_pressure sysctl below 100 supposed to do
> >> this?
> 
> Just tweaked it (setting it to 1).  There really should be another 
> sysctl to set the denominator instead of hardcoding it at 100, since the 
> granularity of this sysctl at the very low end is really much too coarse.
> 
> I missed this sysctl since the name isn't really all that obvious.

Peter,

Were there any patches written after this?  If so, I missed them.
If not, does this patch help any?
---

From: Randy Dunlap <randy.dunlap@...cle.com>

Add sysctl_vfs_cache_divisor (default value 100), which is used as the
divisor for sysctl_vfs_cache_pressure.  This allows a system admin to
make finer-grained pressure settings.

Signed-off-by: Randy Dunlap <randy.dunlap@...cle.com>
---
 Documentation/filesystems/proc.txt |    7 +++++++
 Documentation/sysctl/vm.txt        |    4 ++--
 fs/dcache.c                        |    6 +++++-
 fs/dquot.c                         |    4 +++-
 fs/inode.c                         |    3 ++-
 fs/mbcache.c                       |    3 ++-
 fs/nfs/dir.c                       |    4 +++-
 include/linux/dcache.h             |    1 +
 include/linux/sysctl.h             |    1 +
 kernel/sysctl.c                    |   10 ++++++++++
 10 files changed, 36 insertions(+), 7 deletions(-)

--- linux-2621-rc4.orig/fs/dcache.c
+++ linux-2621-rc4/fs/dcache.c
@@ -17,6 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/string.h>
 #include <linux/mm.h>
+#include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/fsnotify.h>
 #include <linux/slab.h>
@@ -37,6 +38,8 @@
 
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
+int sysctl_vfs_cache_divisor __read_mostly = 100;
+EXPORT_SYMBOL_GPL(sysctl_vfs_cache_divisor);
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
 static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
@@ -851,7 +854,8 @@ static int shrink_dcache_memory(int nr, 
 			return -1;
 		prune_dcache(nr, NULL);
 	}
-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	return (dentry_stat.nr_unused / sysctl_vfs_cache_divisor)
+		* sysctl_vfs_cache_pressure;
 }
 
 /**
--- linux-2621-rc4.orig/fs/dquot.c
+++ linux-2621-rc4/fs/dquot.c
@@ -57,6 +57,7 @@
 
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/mm.h>
@@ -536,7 +537,8 @@ static int shrink_dqcache_memory(int nr,
 		prune_dqcache(nr);
 		spin_unlock(&dq_list_lock);
 	}
-	return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
+	return (dqstats.free_dquots / sysctl_vfs_cache_divisor)
+		* sysctl_vfs_cache_pressure;
 }
 
 /*
--- linux-2621-rc4.orig/fs/inode.c
+++ linux-2621-rc4/fs/inode.c
@@ -461,7 +461,8 @@ static int shrink_icache_memory(int nr, 
 			return -1;
 		prune_icache(nr);
 	}
-	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	return (inodes_stat.nr_unused / sysctl_vfs_cache_divisor)
+		* sysctl_vfs_cache_pressure;
 }
 
 static void __wait_on_freeing_inode(struct inode *inode);
--- linux-2621-rc4.orig/fs/mbcache.c
+++ linux-2621-rc4/fs/mbcache.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 
 #include <linux/hash.h>
+#include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
@@ -226,7 +227,7 @@ mb_cache_shrink_fn(int nr_to_scan, gfp_t
 						   e_lru_list), gfp_mask);
 	}
 out:
-	return (count / 100) * sysctl_vfs_cache_pressure;
+	return (count / sysctl_vfs_cache_divisor) * sysctl_vfs_cache_pressure;
 }
 
 
--- linux-2621-rc4.orig/include/linux/dcache.h
+++ linux-2621-rc4/include/linux/dcache.h
@@ -355,6 +355,7 @@ extern struct vfsmount *__lookup_mnt(str
 extern struct dentry *lookup_create(struct nameidata *nd, int is_dir);
 
 extern int sysctl_vfs_cache_pressure;
+extern int sysctl_vfs_cache_divisor;
 
 #endif /* __KERNEL__ */
 
--- linux-2621-rc4.orig/include/linux/sysctl.h
+++ linux-2621-rc4/include/linux/sysctl.h
@@ -207,6 +207,7 @@ enum
 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
 	VM_MIN_SLAB=35,		 /* Percent pages ignored by zone reclaim */
+	VM_VFS_CACHE_DIVISOR=36, /* dcache/icache reclaim pressure divisor, def. 100 */
 
 	/* s390 vm cmm sysctls */
 	VM_CMM_PAGES=1111,
--- linux-2621-rc4.orig/fs/nfs/dir.c
+++ linux-2621-rc4/fs/nfs/dir.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/time.h>
+#include <linux/dcache.h>
 #include <linux/errno.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -1773,7 +1774,8 @@ remove_lru_entry:
 		list_del(&cache->lru);
 		nfs_access_free_entry(cache);
 	}
-	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+	return (atomic_long_read(&nfs_access_nr_entries) /
+		sysctl_vfs_cache_divisor) * sysctl_vfs_cache_pressure;
 }
 
 static void __nfs_access_zap_cache(struct inode *inode)
--- linux-2621-rc4.orig/kernel/sysctl.c
+++ linux-2621-rc4/kernel/sysctl.c
@@ -800,6 +800,16 @@ static ctl_table vm_table[] = {
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 	},
+	{
+		.ctl_name	= VM_VFS_CACHE_DIVISOR,
+		.procname	= "vfs_cache_divisor",
+		.data		= &sysctl_vfs_cache_divisor,
+		.maxlen		= sizeof(sysctl_vfs_cache_divisor),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 	{
 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
--- linux-2621-rc4.orig/Documentation/filesystems/proc.txt
+++ linux-2621-rc4/Documentation/filesystems/proc.txt
@@ -1156,6 +1156,13 @@ swapcache reclaim.  Decreasing vfs_cache
 to retain dentry and inode caches.  Increasing vfs_cache_pressure beyond 100
 causes the kernel to prefer to reclaim dentries and inodes.
 
+vfs_cache_divisor
+-----------------
+The default vfs_cache_divisor value is 100 (like percent).  However, for
+extremely large systems where a value of vfs_cache_pressure of less than
+1 percent is desirable, using a larger vfs_cache_divisor enables this wanted
+characteristic.
+
 dirty_background_ratio
 ----------------------
 
--- linux-2621-rc4.orig/Documentation/sysctl/vm.txt
+++ linux-2621-rc4/Documentation/sysctl/vm.txt
@@ -35,8 +35,8 @@ Currently, these files are in /proc/sys/
 ==============================================================
 
 dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
-dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
-block_dump, swap_token_timeout, drop-caches:
+dirty_writeback_centisecs, vfs_cache_pressure, vfs_cache_divisor,
+laptop_mode, block_dump, swap_token_timeout, drop-caches:
 
 See Documentation/filesystems/proc.txt
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ