[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <576FD63C.6020805@redhat.com>
Date: Sun, 26 Jun 2016 14:18:52 +0100
From: Steven Whitehouse <swhiteho@...hat.com>
To: Bob Peterson <rpeterso@...hat.com>, cluster-devel@...hat.com,
linux-fsdevel@...r.kernel.org, Dave Chinner <dchinner@...hat.com>
Cc: linux-kernel@...r.kernel.org, Al Viro <viro@...iv.linux.org.uk>
Subject: Re: [Cluster-devel] [PATCH 2/2] GFS2: Add a gfs2-specific
prune_icache_sb
Hi,
I think the idea looks good. A couple of comments below though...
On 24/06/16 20:50, Bob Peterson wrote:
> This patch adds a new prune_icache_sb function for the VFS slab
> shrinker to call. Trying to directly free the inodes from memory
> might deadlock because it evicts inodes, which calls into DLM
> to acquire the glock. The DLM, in turn, may block on a pending
> fence operation, which may already be blocked on memory allocation
> that caused the slab shrinker to be called in the first place.
>
> Signed-off-by: Bob Peterson <rpeterso@...hat.com>
> ---
> fs/gfs2/incore.h | 2 ++
> fs/gfs2/ops_fstype.c | 1 +
> fs/gfs2/quota.c | 25 +++++++++++++++++++++++++
> fs/gfs2/super.c | 13 +++++++++++++
> 4 files changed, 41 insertions(+)
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index a6a3389..a367459 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -757,6 +757,8 @@ struct gfs2_sbd {
>
> struct task_struct *sd_logd_process;
> struct task_struct *sd_quotad_process;
> + int sd_iprune; /* inodes to prune */
> + spinlock_t sd_shrinkspin;
>
> /* Quota stuff */
>
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index 4546360..65a69be 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -95,6 +95,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
> spin_lock_init(&sdp->sd_jindex_spin);
> mutex_init(&sdp->sd_jindex_mutex);
> init_completion(&sdp->sd_journal_ready);
> + spin_lock_init(&sdp->sd_shrinkspin);
>
> INIT_LIST_HEAD(&sdp->sd_quota_list);
> mutex_init(&sdp->sd_quota_mutex);
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index ce7d69a..5810a2c 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -1528,14 +1528,39 @@ void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) {
> int gfs2_quotad(void *data)
> {
> struct gfs2_sbd *sdp = data;
> + struct super_block *sb = sdp->sd_vfs;
> struct gfs2_tune *tune = &sdp->sd_tune;
> unsigned long statfs_timeo = 0;
> unsigned long quotad_timeo = 0;
> unsigned long t = 0;
> DEFINE_WAIT(wait);
> int empty;
> + int rc;
> + struct shrink_control sc = {.gfp_mask = GFP_KERNEL, };
>
> while (!kthread_should_stop()) {
> + /* TODO: Deal with shrinking of dcache */
> + /* Prune any inode cache intended by the shrinker. */
> + spin_lock(&sdp->sd_shrinkspin);
> + if (sdp->sd_iprune > 0) {
> + sc.nr_to_scan = sdp->sd_iprune;
> + if (sc.nr_to_scan > 1024)
> + sc.nr_to_scan = 1024;
> + sdp->sd_iprune -= sc.nr_to_scan;
> + spin_unlock(&sdp->sd_shrinkspin);
> + rc = prune_icache_sb(sb, &sc);
> + if (rc < 0) {
> + spin_lock(&sdp->sd_shrinkspin);
> + sdp->sd_iprune = 0;
> + spin_unlock(&sdp->sd_shrinkspin);
> + }
> + if (sdp->sd_iprune) {
> + cond_resched();
> + continue;
> + }
> + } else {
> + spin_unlock(&sdp->sd_shrinkspin);
> + }
>
> /* Update the master statfs file */
> if (sdp->sd_statfs_force_sync) {
> diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
> index 9b2ff353..75e8a85 100644
> --- a/fs/gfs2/super.c
> +++ b/fs/gfs2/super.c
> @@ -1667,6 +1667,18 @@ static void gfs2_destroy_inode(struct inode *inode)
> call_rcu(&inode->i_rcu, gfs2_i_callback);
> }
>
> +static long gfs2_prune_icache_sb(struct super_block *sb,
> + struct shrink_control *sc)
> +{
> + struct gfs2_sbd *sdp;
> +
> + sdp = sb->s_fs_info;
> + spin_lock(&sdp->sd_shrinkspin);
> + sdp->sd_iprune = sc->nr_to_scan + 1;
> + spin_unlock(&sdp->sd_shrinkspin);
> + return 0;
> +}
This doesn't wake up the thread that will do the reclaim, so that there
may be a significant delay between the request to shrink and the actual
shrink. Also, using quotad is not a good plan, since it might itself
block waiting for memory. This should be done by a thread on its own to
avoid any deadlock possibility here.
There also appears to be a limit of 1024 to scan per run of quotad,
which means it would take a very long time to push out any significant
number, and it does seem a bit arbitrary - was there a reason for
selecting that number? It would probably be better to simply yield every
now and then if there are a lot of items to process,
Steve.
> +
> const struct super_operations gfs2_super_ops = {
> .alloc_inode = gfs2_alloc_inode,
> .destroy_inode = gfs2_destroy_inode,
> @@ -1681,5 +1693,6 @@ const struct super_operations gfs2_super_ops = {
> .remount_fs = gfs2_remount_fs,
> .drop_inode = gfs2_drop_inode,
> .show_options = gfs2_show_options,
> + .prune_icache_sb = gfs2_prune_icache_sb,
> };
>
Powered by blists - more mailing lists