[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1175579225.12230.504.camel@localhost.localdomain>
Date: Tue, 03 Apr 2007 15:47:05 +1000
From: Rusty Russell <rusty@...tcorp.com.au>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: lkml - Kernel Mailing List <linux-kernel@...r.kernel.org>,
linux-mm@...ck.org, xfs-masters@....sgi.com,
reiserfs-dev@...esys.com
Subject: Re: [PATCH] Cleanup and kernelify shrinker registration (rc5-mm2)
On Mon, 2007-04-02 at 21:57 -0700, Andrew Morton wrote:
> On Tue, 03 Apr 2007 14:45:02 +1000 Rusty Russell <rusty@...tcorp.com.au> wrote:
> > Does that mean the to function correctly every user needs some internal
> > cursor so it doesn't end up scanning the first N entries over and over?
> >
>
> If it wants to be well-behaved, and to behave as the VM expects, yes.
>
> There's an expectation that the callback will be performing some scan-based
> aging operation and of course to do LRU (or whatever) aging, the callback
> will need to remember where it was up to last time it was called.
>
> But it's just a guideline - callbacks could do something different but
> in-the-spirit, I guess.
Hmm, actually the callers I looked at (nfs, dcache, mbcache) seem to use
an LRU list and just walk the first "nr_to_scan" entries, and nr_to_scan
is always 128.
Someone who keeps a cursor will be disadvantaged: the other shrinkers
could well get less effective on repeated calls, but we won't. Someone
who picks entries at random might have the same issue.
I think it is clearest to describe how we expect everyone to work, and
let whoever is getting creative worry about it themselves.
How's this:
==
Cleanup and kernelify shrinker registration.
I can never remember what the function to register to receive VM pressure
is called. I have to trace down from __alloc_pages() to find it.
It's called "set_shrinker()", and it needs Your Help.
New version:
1) Don't hide struct shrinker. It contains no magic.
2) Don't allocate "struct shrinker". It's not helpful.
3) Call them "register_shrinker" and "unregister_shrinker".
4) Call the function "shrink" not "shrinker".
5) Reduce the 17 lines of waffly comments to 13, but document it properly.
Comments:
1) The comment in reiserfs4 makes me a little queasy.
2) The wrapper code in xfs might no longer be needed.
3) The placing in the x86-64 "hot function list" for seems a little
unlikely. Clearly, Andi was testing if anyone was paying attention.
Signed-off-by: Rusty Russell <rusty@...tcorp.com.au>
diff -r 0b43dab739aa arch/x86_64/kernel/functionlist
--- a/arch/x86_64/kernel/functionlist Tue Apr 03 15:37:49 2007 +1000
+++ b/arch/x86_64/kernel/functionlist Tue Apr 03 15:37:53 2007 +1000
@@ -1118,7 +1118,6 @@
*(.text.simple_strtoll)
*(.text.set_termios)
*(.text.set_task_comm)
-*(.text.set_shrinker)
*(.text.set_normalized_timespec)
*(.text.set_brk)
*(.text.serial_in)
diff -r 0b43dab739aa fs/dcache.c
--- a/fs/dcache.c Tue Apr 03 15:37:49 2007 +1000
+++ b/fs/dcache.c Tue Apr 03 15:37:53 2007 +1000
@@ -884,6 +884,11 @@ static int shrink_dcache_memory(int nr,
}
return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
+
+static struct shrinker dcache_shrinker = {
+ .shrink = shrink_dcache_memory,
+ .seeks = DEFAULT_SEEKS,
+};
/**
* d_alloc - allocate a dcache entry
@@ -2144,8 +2149,8 @@ static void __init dcache_init(unsigned
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
NULL, NULL);
-
- set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
+
+ register_shrinker(&dcache_shrinker);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
diff -r 0b43dab739aa fs/dquot.c
--- a/fs/dquot.c Tue Apr 03 15:37:49 2007 +1000
+++ b/fs/dquot.c Tue Apr 03 15:37:53 2007 +1000
@@ -538,6 +538,11 @@ static int shrink_dqcache_memory(int nr,
}
return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
}
+
+static struct shrinker dqcache_shrinker = {
+ .shrink = shrink_dqcache_memory,
+ .seeks = DEFAULT_SEEKS,
+};
/*
* Put reference to dquot
@@ -1871,7 +1876,7 @@ static int __init dquot_init(void)
printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n",
nr_hash, order, (PAGE_SIZE << order));
- set_shrinker(DEFAULT_SEEKS, shrink_dqcache_memory);
+ register_shrinker(&dqcache_shrinker);
return 0;
}
diff -r 0b43dab739aa fs/inode.c
--- a/fs/inode.c Tue Apr 03 15:37:49 2007 +1000
+++ b/fs/inode.c Tue Apr 03 15:37:53 2007 +1000
@@ -474,6 +474,11 @@ static int shrink_icache_memory(int nr,
return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
+static struct shrinker icache_shrinker = {
+ .shrink = shrink_icache_memory,
+ .seeks = DEFAULT_SEEKS,
+};
+
static void __wait_on_freeing_inode(struct inode *inode);
/*
* Called with the inode lock held.
@@ -1393,7 +1398,7 @@ void __init inode_init(unsigned long mem
SLAB_MEM_SPREAD),
init_once,
NULL);
- set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
+ register_shrinker(&icache_shrinker);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
diff -r 0b43dab739aa fs/mbcache.c
--- a/fs/mbcache.c Tue Apr 03 15:37:49 2007 +1000
+++ b/fs/mbcache.c Tue Apr 03 15:37:53 2007 +1000
@@ -100,7 +100,6 @@ static LIST_HEAD(mb_cache_list);
static LIST_HEAD(mb_cache_list);
static LIST_HEAD(mb_cache_lru_list);
static DEFINE_SPINLOCK(mb_cache_spinlock);
-static struct shrinker *mb_shrinker;
static inline int
mb_cache_indexes(struct mb_cache *cache)
@@ -118,6 +117,10 @@ mb_cache_indexes(struct mb_cache *cache)
static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static struct shrinker mb_cache_shrinker = {
+ .shrink = mb_cache_shrink_fn,
+ .seeks = DEFAULT_SEEKS,
+};
static inline int
__mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
@@ -662,13 +665,13 @@ mb_cache_entry_find_next(struct mb_cache
static int __init init_mbcache(void)
{
- mb_shrinker = set_shrinker(DEFAULT_SEEKS, mb_cache_shrink_fn);
+ register_shrinker(&mb_cache_shrinker);
return 0;
}
static void __exit exit_mbcache(void)
{
- remove_shrinker(mb_shrinker);
+ unregister_shrinker(&mb_cache_shrinker);
}
module_init(init_mbcache)
diff -r 0b43dab739aa fs/nfs/super.c
--- a/fs/nfs/super.c Tue Apr 03 15:37:49 2007 +1000
+++ b/fs/nfs/super.c Tue Apr 03 15:37:53 2007 +1000
@@ -138,7 +138,10 @@ static const struct super_operations nfs
};
#endif
-static struct shrinker *acl_shrinker;
+static struct shrinker acl_shrinker = {
+ .shrink = nfs_access_cache_shrinker,
+ .seeks = DEFAULT_SEEKS,
+};
/*
* Register the NFS filesystems
@@ -159,7 +162,7 @@ int __init register_nfs_fs(void)
if (ret < 0)
goto error_2;
#endif
- acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
+ register_shrinker(&acl_shrinker);
return 0;
#ifdef CONFIG_NFS_V4
@@ -177,8 +180,7 @@ error_0:
*/
void __exit unregister_nfs_fs(void)
{
- if (acl_shrinker != NULL)
- remove_shrinker(acl_shrinker);
+ unregister_shrinker(&acl_shrinker);
#ifdef CONFIG_NFS_V4
unregister_filesystem(&nfs4_fs_type);
nfs_unregister_sysctl();
diff -r 0b43dab739aa fs/reiser4/fsdata.c
--- a/fs/reiser4/fsdata.c Tue Apr 03 15:37:49 2007 +1000
+++ b/fs/reiser4/fsdata.c Tue Apr 03 15:37:53 2007 +1000
@@ -7,7 +7,6 @@
/* cache or dir_cursors */
static struct kmem_cache *d_cursor_cache;
-static struct shrinker *d_cursor_shrinker;
/* list of unused cursors */
static LIST_HEAD(cursor_cache);
@@ -53,6 +52,18 @@ static int d_cursor_shrink(int nr, gfp_t
return d_cursor_unused;
}
+/*
+ * actually, d_cursors are "priceless", because there is no way to
+ * recover information stored in them. On the other hand, we don't
+ * want to consume all kernel memory by them. As a compromise, just
+ * assign higher "seeks" value to d_cursor cache, so that it will be
+ * shrunk only if system is really tight on memory.
+ */
+static struct shrinker d_cursor_shrinker = {
+ .shrink = d_cursor_shrink,
+ .seeks = DEFAULT_SEEKS << 3,
+};
+
/**
* reiser4_init_d_cursor - create d_cursor cache
*
@@ -66,20 +77,7 @@ int reiser4_init_d_cursor(void)
if (d_cursor_cache == NULL)
return RETERR(-ENOMEM);
- /*
- * actually, d_cursors are "priceless", because there is no way to
- * recover information stored in them. On the other hand, we don't
- * want to consume all kernel memory by them. As a compromise, just
- * assign higher "seeks" value to d_cursor cache, so that it will be
- * shrunk only if system is really tight on memory.
- */
- d_cursor_shrinker = set_shrinker(DEFAULT_SEEKS << 3,
- d_cursor_shrink);
- if (d_cursor_shrinker == NULL) {
- destroy_reiser4_cache(&d_cursor_cache);
- d_cursor_cache = NULL;
- return RETERR(-ENOMEM);
- }
+ register_shrinker(&d_cursor_shrinker);
return 0;
}
@@ -90,9 +88,7 @@ int reiser4_init_d_cursor(void)
*/
void reiser4_done_d_cursor(void)
{
- BUG_ON(d_cursor_shrinker == NULL);
- remove_shrinker(d_cursor_shrinker);
- d_cursor_shrinker = NULL;
+ unregister_shrinker(&d_cursor_shrinker);
destroy_reiser4_cache(&d_cursor_cache);
}
diff -r 0b43dab739aa fs/xfs/linux-2.6/kmem.h
--- a/fs/xfs/linux-2.6/kmem.h Tue Apr 03 15:37:49 2007 +1000
+++ b/fs/xfs/linux-2.6/kmem.h Tue Apr 03 15:37:53 2007 +1000
@@ -110,13 +110,23 @@ static inline kmem_shaker_t
static inline kmem_shaker_t
kmem_shake_register(kmem_shake_func_t sfunc)
{
- return set_shrinker(DEFAULT_SEEKS, sfunc);
+ /* FIXME: Perhaps caller should setup & hand in the shrinker? */
+ struct shrinker *shrinker = kmalloc(sizeof *shrinker, GFP_ATOMIC);
+ if (shrinker) {
+ shrinker->shrink = sfunc;
+ shrinker->seeks = DEFAULT_SEEKS;
+ register_shrinker(shrinker);
+ }
+ return shrinker;
}
static inline void
kmem_shake_deregister(kmem_shaker_t shrinker)
{
- remove_shrinker(shrinker);
+ if (shrinker) {
+ unregister_shrinker(shrinker);
+ kfree(shrinker);
+ }
}
static inline int
diff -r 0b43dab739aa include/linux/mm.h
--- a/include/linux/mm.h Tue Apr 03 15:37:49 2007 +1000
+++ b/include/linux/mm.h Tue Apr 03 15:42:36 2007 +1000
@@ -813,27 +813,31 @@ extern unsigned long do_mremap(unsigned
unsigned long flags, unsigned long new_addr);
/*
- * Prototype to add a shrinker callback for ageable caches.
- *
- * These functions are passed a count `nr_to_scan' and a gfpmask. They should
- * scan `nr_to_scan' objects, attempting to free them.
- *
- * The callback must return the number of objects which remain in the cache.
- *
- * The callback will be passed nr_to_scan == 0 when the VM is querying the
- * cache size, so a fastpath for that case is appropriate.
- */
-typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
-
-/*
- * Add an aging callback. The int is the number of 'seeks' it takes
- * to recreate one of the objects that these functions age.
- */
-
-#define DEFAULT_SEEKS 2
-struct shrinker;
-extern struct shrinker *set_shrinker(int, shrinker_t);
-extern void remove_shrinker(struct shrinker *shrinker);
+ * A callback you can register to apply pressure to ageable caches.
+ *
+ * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should
+ * look through the least-recently-used 'nr_to_scan' entries and
+ * attempt to free them up. It should return the number of objects
+ * which remain in the cache. If it returns -1, it means it cannot do
+ * any scanning at this time (eg. there is a risk of deadlock).
+ *
+ * The 'gfpmask' refers to the allocation we are currently trying to
+ * fulfil.
+ *
+ * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
+ * querying the cache size, so a fastpath for that case is appropriate.
+ */
+struct shrinker {
+ int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
+ int seeks; /* seeks to recreate an obj */
+
+ /* These are for internal use */
+ struct list_head list;
+ long nr; /* objs pending delete */
+};
+#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
+extern void register_shrinker(struct shrinker *);
+extern void unregister_shrinker(struct shrinker *);
/*
* Some shared mappigns will want the pages marked read-only
diff -r 0b43dab739aa mm/vmscan.c
--- a/mm/vmscan.c Tue Apr 03 15:37:49 2007 +1000
+++ b/mm/vmscan.c Tue Apr 03 15:37:53 2007 +1000
@@ -72,17 +72,6 @@ struct scan_control {
int order;
};
-/*
- * The list of shrinker callbacks used by to apply pressure to
- * ageable caches.
- */
-struct shrinker {
- shrinker_t shrinker;
- struct list_head list;
- int seeks; /* seeks to recreate an obj */
- long nr; /* objs pending delete */
-};
-
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
@@ -125,34 +114,25 @@ static DECLARE_RWSEM(shrinker_rwsem);
/*
* Add a shrinker callback to be called from the vm
*/
-struct shrinker *set_shrinker(int seeks, shrinker_t theshrinker)
-{
- struct shrinker *shrinker;
-
- shrinker = kmalloc(sizeof(*shrinker), GFP_KERNEL);
- if (shrinker) {
- shrinker->shrinker = theshrinker;
- shrinker->seeks = seeks;
- shrinker->nr = 0;
- down_write(&shrinker_rwsem);
- list_add_tail(&shrinker->list, &shrinker_list);
- up_write(&shrinker_rwsem);
- }
- return shrinker;
-}
-EXPORT_SYMBOL(set_shrinker);
+void register_shrinker(struct shrinker *shrinker)
+{
+ shrinker->nr = 0;
+ down_write(&shrinker_rwsem);
+ list_add_tail(&shrinker->list, &shrinker_list);
+ up_write(&shrinker_rwsem);
+}
+EXPORT_SYMBOL(register_shrinker);
/*
* Remove one
*/
-void remove_shrinker(struct shrinker *shrinker)
+void unregister_shrinker(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
up_write(&shrinker_rwsem);
- kfree(shrinker);
-}
-EXPORT_SYMBOL(remove_shrinker);
+}
+EXPORT_SYMBOL(unregister_shrinker);
#define SHRINK_BATCH 128
/*
@@ -189,11 +169,11 @@ unsigned long shrink_slab(unsigned long
list_for_each_entry(shrinker, &shrinker_list, list) {
unsigned long long delta;
unsigned long total_scan;
- unsigned long max_pass = (*shrinker->shrinker)(0, gfp_mask);
+ unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
if (!shrinker->seeks) {
print_symbol("shrinker %s has zero seeks\n",
- (unsigned long)shrinker->shrinker);
+ (unsigned long)shrinker->shrink);
delta = (4 * scanned) / DEFAULT_SEEKS;
} else {
delta = (4 * scanned) / shrinker->seeks;
@@ -223,8 +203,8 @@ unsigned long shrink_slab(unsigned long
int shrink_ret;
int nr_before;
- nr_before = (*shrinker->shrinker)(0, gfp_mask);
- shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask);
+ nr_before = (*shrinker->shrink)(0, gfp_mask);
+ shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
if (shrink_ret == -1)
break;
if (shrink_ret < nr_before)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists