[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <87tt4jzlia.fsf@igalia.com>
Date: Fri, 13 Jun 2025 16:53:01 +0100
From: Luis Henriques <luis@...lia.com>
To: Miklos Szeredi <miklos@...redi.hu>
Cc: Bernd Schubert <bernd@...ernd.com>, Laura Promberger
<laura.promberger@...n.ch>, Dave Chinner <david@...morbit.com>, Matt
Harvey <mharvey@...ptrading.com>, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3] fuse: new workqueue to periodically invalidate
expired dentries
On Tue, May 20 2025, Luis Henriques wrote:
> This patch adds a new module parameter 'inval_wq' which is used to start a
> workqueue to periodically invalidate expired dentries. The value of this
> new parameter is the period, in seconds, of the workqueue. When it is set,
> every new dentry will be added to an rbtree, sorted by the dentry's expiry
> time.
>
> When the workqueue is executed, it will check the dentries in this tree and
> invalidate them if:
>
> - The dentry has timed-out, or if
> - The connection epoch has been incremented.
>
> The workqueue will run for, at most, 5 seconds each time. It will
> reschedule itself if the dentries tree isn't empty.
>
> The workqueue period is set per filesystem with the 'inval_wq' parameter
> value when it is mounted. This value can not be less than 5 seconds. If
> this module parameter is changed later on, the mounted filesystems will
> keep using the old value until they are remounted.
>
> Signed-off-by: Luis Henriques <luis@...lia.com>
> ---
> Hi!
>
> I've been staring at this patch for a while and hopefully it's ready for a
> fresh set of eyes to find the bugs that are hiding from me. I believe I
> addressed all Miklos comments, and decided to drop the RFC from the subject.
>
> As before, this patch depends on my previous patch (already in Miklos tree)
> with subject: "fuse: add more control over cache invalidation behaviour".
>
> Feedback is welcome.
Gentle ping, feedback on the generic approach would be great.
Cheers,
--
Luís
>
> Cheers,
> --
> Luis
>
> Changes since v2:
>
> - Major rework, the dentries tree nodes are now in fuse_dentry and they are
> tied to the actual dentry lifetime
> - Mount option is now a module parameter
> - workqueue now runs for at most 5 seconds before rescheduling
>
> fs/fuse/dir.c | 180 +++++++++++++++++++++++++++++++++++++++++------
> fs/fuse/fuse_i.h | 12 ++++
> fs/fuse/inode.c | 21 ++++++
> 3 files changed, 190 insertions(+), 23 deletions(-)
>
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 1fb0b15a6088..257ca2b36b94 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -34,33 +34,153 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
> set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
> }
>
> -#if BITS_PER_LONG >= 64
> -static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
> +struct fuse_dentry {
> + u64 time;
> + struct rcu_head rcu;
> + struct rb_node node;
> + struct dentry *dentry;
> +};
> +
> +static void __fuse_dentry_tree_del_node(struct fuse_conn *fc,
> + struct fuse_dentry *fd)
> {
> - entry->d_fsdata = (void *) time;
> + if (!RB_EMPTY_NODE(&fd->node)) {
> + rb_erase(&fd->node, &fc->dentry_tree);
> + RB_CLEAR_NODE(&fd->node);
> + }
> }
>
> -static inline u64 fuse_dentry_time(const struct dentry *entry)
> +static void fuse_dentry_tree_del_node(struct dentry *dentry)
> {
> - return (u64)entry->d_fsdata;
> + struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
> + struct fuse_dentry *fd = dentry->d_fsdata;
> +
> + if (!fc->inval_wq)
> + return;
> +
> + spin_lock(&fc->dentry_tree_lock);
> + __fuse_dentry_tree_del_node(fc, fd);
> + spin_unlock(&fc->dentry_tree_lock);
> }
>
> -#else
> -union fuse_dentry {
> - u64 time;
> - struct rcu_head rcu;
> -};
> +static void fuse_dentry_tree_add_node(struct dentry *dentry)
> +{
> + struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
> + struct fuse_dentry *fd = dentry->d_fsdata;
> + struct fuse_dentry *cur;
> + struct rb_node **p, *parent = NULL;
> + bool start_work = false;
> +
> + if (!fc->inval_wq)
> + return;
> +
> + spin_lock(&fc->dentry_tree_lock);
> +
> + if (!fc->inval_wq) {
> + spin_unlock(&fc->dentry_tree_lock);
> + return;
> + }
> +
> + start_work = RB_EMPTY_ROOT(&fc->dentry_tree);
> + __fuse_dentry_tree_del_node(fc, fd);
> +
> + p = &fc->dentry_tree.rb_node;
> + while (*p) {
> + parent = *p;
> + cur = rb_entry(*p, struct fuse_dentry, node);
> + if (fd->time > cur->time)
> + p = &(*p)->rb_left;
> + else
> + p = &(*p)->rb_right;
> + }
> + rb_link_node(&fd->node, parent, p);
> + rb_insert_color(&fd->node, &fc->dentry_tree);
> + spin_unlock(&fc->dentry_tree_lock);
> +
> + if (start_work)
> + schedule_delayed_work(&fc->dentry_tree_work,
> + secs_to_jiffies(fc->inval_wq));
> +}
> +
> +void fuse_dentry_tree_prune(struct fuse_conn *fc)
> +{
> + struct rb_node *n;
> +
> + if (!fc->inval_wq)
> + return;
> +
> + fc->inval_wq = 0;
> + cancel_delayed_work_sync(&fc->dentry_tree_work);
> +
> + spin_lock(&fc->dentry_tree_lock);
> + while (!RB_EMPTY_ROOT(&fc->dentry_tree)) {
> + n = rb_first(&fc->dentry_tree);
> + rb_erase(n, &fc->dentry_tree);
> + RB_CLEAR_NODE(&rb_entry(n, struct fuse_dentry, node)->node);
> + }
> + spin_unlock(&fc->dentry_tree_lock);
> +}
> +
> +/*
> + * workqueue that, when enabled, will periodically check for expired dentries in
> + * the dentries tree.
> + *
> + * A dentry has expired if:
> + *
> + * 1) it has been around for too long (timeout) or if
> + *
> + * 2) the connection epoch has been incremented.
> + *
> + * The workqueue will be rescheduled itself as long as the dentries tree is not
> + * empty. Also, it will not spend more than 5 seconds invalidating dentries on
> + * each run.
> + */
> +void fuse_dentry_tree_work(struct work_struct *work)
> +{
> + struct fuse_conn *fc = container_of(work, struct fuse_conn,
> + dentry_tree_work.work);
> + struct fuse_dentry *fd;
> + struct rb_node *node;
> + u64 start, end;
> + int epoch;
> + bool reschedule;
> +
> + spin_lock(&fc->dentry_tree_lock);
> + start = get_jiffies_64();
> + /* Don't spend too much time invalidating dentries */
> + end = start + secs_to_jiffies(5);
> + epoch = atomic_read(&fc->epoch);
> +
> + node = rb_first(&fc->dentry_tree);
> + while (node && time_after64(end, get_jiffies_64())) {
> + fd = rb_entry(node, struct fuse_dentry, node);
> + if ((fd->dentry->d_time < epoch) || (fd->time < start)) {
> + rb_erase(&fd->node, &fc->dentry_tree);
> + RB_CLEAR_NODE(&fd->node);
> + spin_unlock(&fc->dentry_tree_lock);
> + d_invalidate(fd->dentry);
> + spin_lock(&fc->dentry_tree_lock);
> + } else
> + break;
> + node = rb_first(&fc->dentry_tree);
> + }
> + reschedule = !RB_EMPTY_ROOT(&fc->dentry_tree);
> + spin_unlock(&fc->dentry_tree_lock);
> +
> + if (reschedule)
> + schedule_delayed_work(&fc->dentry_tree_work,
> + secs_to_jiffies(fc->inval_wq));
> +}
>
> static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
> {
> - ((union fuse_dentry *) dentry->d_fsdata)->time = time;
> + ((struct fuse_dentry *) dentry->d_fsdata)->time = time;
> }
>
> static inline u64 fuse_dentry_time(const struct dentry *entry)
> {
> - return ((union fuse_dentry *) entry->d_fsdata)->time;
> + return ((struct fuse_dentry *) entry->d_fsdata)->time;
> }
> -#endif
>
> static void fuse_dentry_settime(struct dentry *dentry, u64 time)
> {
> @@ -81,6 +201,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time)
> }
>
> __fuse_dentry_settime(dentry, time);
> + fuse_dentry_tree_add_node(dentry);
> }
>
> /*
> @@ -283,21 +404,36 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
> goto out;
> }
>
> -#if BITS_PER_LONG < 64
> static int fuse_dentry_init(struct dentry *dentry)
> {
> - dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
> - GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
> + struct fuse_dentry *fd;
> +
> + fd = kzalloc(sizeof(struct fuse_dentry),
> + GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
> + if (!fd)
> + return -ENOMEM;
> +
> + fd->dentry = dentry;
> + RB_CLEAR_NODE(&fd->node);
> + dentry->d_fsdata = fd;
>
> - return dentry->d_fsdata ? 0 : -ENOMEM;
> + return 0;
> +}
> +
> +static void fuse_dentry_prune(struct dentry *dentry)
> +{
> + struct fuse_dentry *fd = dentry->d_fsdata;
> +
> + if (!RB_EMPTY_NODE(&fd->node))
> + fuse_dentry_tree_del_node(dentry);
> }
> +
> static void fuse_dentry_release(struct dentry *dentry)
> {
> - union fuse_dentry *fd = dentry->d_fsdata;
> + struct fuse_dentry *fd = dentry->d_fsdata;
>
> kfree_rcu(fd, rcu);
> }
> -#endif
>
> static int fuse_dentry_delete(const struct dentry *dentry)
> {
> @@ -334,18 +470,16 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
> const struct dentry_operations fuse_dentry_operations = {
> .d_revalidate = fuse_dentry_revalidate,
> .d_delete = fuse_dentry_delete,
> -#if BITS_PER_LONG < 64
> .d_init = fuse_dentry_init,
> + .d_prune = fuse_dentry_prune,
> .d_release = fuse_dentry_release,
> -#endif
> .d_automount = fuse_dentry_automount,
> };
>
> const struct dentry_operations fuse_root_dentry_operations = {
> -#if BITS_PER_LONG < 64
> .d_init = fuse_dentry_init,
> + .d_prune = fuse_dentry_prune,
> .d_release = fuse_dentry_release,
> -#endif
> };
>
> int fuse_valid_type(int m)
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index f870d53a1bcf..3b7794f21573 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -978,6 +978,15 @@ struct fuse_conn {
> /* Request timeout (in jiffies). 0 = no timeout */
> unsigned int req_timeout;
> } timeout;
> +
> + /** Cache dentries tree */
> + struct rb_root dentry_tree;
> + /** Look to protect dentry_tree access */
> + spinlock_t dentry_tree_lock;
> + /** Periodic delayed work to invalidate expired dentries */
> + struct delayed_work dentry_tree_work;
> + /** Period for the invalidation workqueue */
> + unsigned int inval_wq;
> };
>
> /*
> @@ -1262,6 +1271,9 @@ void fuse_wait_aborted(struct fuse_conn *fc);
> /* Check if any requests timed out */
> void fuse_check_timeout(struct work_struct *work);
>
> +void fuse_dentry_tree_prune(struct fuse_conn *fc);
> +void fuse_dentry_tree_work(struct work_struct *work);
> +
> /**
> * Invalidate inode attributes
> */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index b399784cca5f..7dbb11937344 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -57,6 +57,20 @@ MODULE_PARM_DESC(max_user_congthresh,
> "Global limit for the maximum congestion threshold an "
> "unprivileged user can set");
>
> +static unsigned __read_mostly inval_wq;
> +static int inval_wq_set(const char *val, const struct kernel_param *kp)
> +{
> + return param_set_uint_minmax(val, kp, 5, (unsigned int)(-1));
> +}
> +static const struct kernel_param_ops inval_wq_ops = {
> + .set = inval_wq_set,
> + .get = param_get_uint,
> +};
> +module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644);
> +__MODULE_PARM_TYPE(inval_wq, "uint");
> +MODULE_PARM_DESC(inval_wq,
> + "Dentries invalidation workqueue period in secs (>= 5).");
> +
> #define FUSE_DEFAULT_BLKSIZE 512
>
> /** Maximum number of outstanding background requests */
> @@ -959,6 +973,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
> memset(fc, 0, sizeof(*fc));
> spin_lock_init(&fc->lock);
> spin_lock_init(&fc->bg_lock);
> + spin_lock_init(&fc->dentry_tree_lock);
> init_rwsem(&fc->killsb);
> refcount_set(&fc->count, 1);
> atomic_set(&fc->dev_count, 1);
> @@ -968,6 +983,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
> INIT_LIST_HEAD(&fc->bg_queue);
> INIT_LIST_HEAD(&fc->entry);
> INIT_LIST_HEAD(&fc->devices);
> + fc->dentry_tree = RB_ROOT;
> + fc->inval_wq = 0;
> atomic_set(&fc->num_waiting, 0);
> fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
> fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
> @@ -1844,6 +1861,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
> fc->group_id = ctx->group_id;
> fc->legacy_opts_show = ctx->legacy_opts_show;
> fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
> + fc->inval_wq = inval_wq;
> + if (fc->inval_wq > 0)
> + INIT_DELAYED_WORK(&fc->dentry_tree_work, fuse_dentry_tree_work);
> fc->destroy = ctx->destroy;
> fc->no_control = ctx->no_control;
> fc->no_force_umount = ctx->no_force_umount;
> @@ -2048,6 +2068,7 @@ void fuse_conn_destroy(struct fuse_mount *fm)
>
> fuse_abort_conn(fc);
> fuse_wait_aborted(fc);
> + fuse_dentry_tree_prune(fc);
>
> if (!list_empty(&fc->entry)) {
> mutex_lock(&fuse_mutex);
>
Powered by blists - more mailing lists