lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJfpeguwimZgFht9AGV+WafbMrRbLK9Cp3G4Fy4xrLbAjgGARw@mail.gmail.com>
Date: Mon, 18 Aug 2025 15:21:15 +0200
From: Miklos Szeredi <miklos@...redi.hu>
To: Luis Henriques <luis@...lia.com>
Cc: Bernd Schubert <bernd@...ernd.com>, Laura Promberger <laura.promberger@...n.ch>, 
	Dave Chinner <david@...morbit.com>, linux-fsdevel@...r.kernel.org, 
	linux-kernel@...r.kernel.org, kernel-dev@...lia.com, 
	Matt Harvey <mharvey@...ptrading.com>
Subject: Re: [PATCH v4] fuse: new work queue to periodically invalidate
 expired dentries

On Mon, 7 Jul 2025 at 17:34, Luis Henriques <luis@...lia.com> wrote:

> I'm sending v4 without implementing your request to turn the dentries
> trees and work queues into global data structures.  After thinking about
> it a bit more, I'm not sure anymore that it makes sense.  And the reason
> is that the epoch is a per-connection attribute.  I couldn't find an
> elegant way of having a single work queue with a global tree to handle the
> fact that the epoch of a connection may have been incremented.  Any option
> to avoid walking through all the tree dentries when an epoch is
> incremented would be more complex than simply keeping it (and work queue)
> per connection.
>
> Does this make sense?

The global tree works very well for timeouts, but not for epoch.

So I wonder if we should just handle them separately.  Your original
patch dealt with the epoch within NOTIFY_INC_EPOCH, but the same thing
could be done with a workqueue started from the notification.

Thanks,
Miklos


>
> Changes since v3:
>
> - Use of need_resched() instead of limiting the work queue to run for 5
>   seconds
> - Restore usage of union with rcu_head, in struct fuse_dentry
> - Minor changes in comments (e.g. s/workqueue/work queue/)
>
> Changes since v2:
>
> - Major rework, the dentries tree nodes are now in fuse_dentry and they are
>   tied to the actual dentry lifetime
> - Mount option is now a module parameter
> - workqueue now runs for at most 5 seconds before rescheduling
>
>  fs/fuse/dev.c    |   2 -
>  fs/fuse/dir.c    | 179 +++++++++++++++++++++++++++++++++++++++++------
>  fs/fuse/fuse_i.h |  12 ++++
>  fs/fuse/inode.c  |  21 ++++++
>  4 files changed, 189 insertions(+), 25 deletions(-)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index e80cd8f2c049..2ec7fefcc1a1 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -2034,8 +2034,6 @@ static int fuse_notify_resend(struct fuse_conn *fc)
>  /*
>   * Increments the fuse connection epoch.  This will result of dentries from
>   * previous epochs to be invalidated.
> - *
> - * XXX optimization: add call to shrink_dcache_sb()?
>   */
>  static int fuse_notify_inc_epoch(struct fuse_conn *fc)
>  {
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 45b4c3cc1396..7eba86fe52d6 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -34,33 +34,152 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
>         set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
>  }
>
> -#if BITS_PER_LONG >= 64
> -static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
> +struct fuse_dentry {
> +       u64 time;
> +       union {
> +               struct rcu_head rcu;
> +               struct rb_node node;
> +       };
> +       struct dentry *dentry;
> +};
> +
> +static void __fuse_dentry_tree_del_node(struct fuse_conn *fc,
> +                                       struct fuse_dentry *fd)
>  {
> -       entry->d_fsdata = (void *) time;
> +       if (!RB_EMPTY_NODE(&fd->node)) {
> +               rb_erase(&fd->node, &fc->dentry_tree);
> +               RB_CLEAR_NODE(&fd->node);
> +       }
>  }
>
> -static inline u64 fuse_dentry_time(const struct dentry *entry)
> +static void fuse_dentry_tree_del_node(struct dentry *dentry)
>  {
> -       return (u64)entry->d_fsdata;
> +       struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
> +       struct fuse_dentry *fd = dentry->d_fsdata;
> +
> +       if (!fc->inval_wq)
> +               return;
> +
> +       spin_lock(&fc->dentry_tree_lock);
> +       __fuse_dentry_tree_del_node(fc, fd);
> +       spin_unlock(&fc->dentry_tree_lock);
>  }
>
> -#else
> -union fuse_dentry {
> -       u64 time;
> -       struct rcu_head rcu;
> -};
> +static void fuse_dentry_tree_add_node(struct dentry *dentry)
> +{
> +       struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
> +       struct fuse_dentry *fd = dentry->d_fsdata;
> +       struct fuse_dentry *cur;
> +       struct rb_node **p, *parent = NULL;
> +       bool start_work = false;
> +
> +       if (!fc->inval_wq)
> +               return;
> +
> +       spin_lock(&fc->dentry_tree_lock);
> +
> +       if (!fc->inval_wq) {
> +               spin_unlock(&fc->dentry_tree_lock);
> +               return;
> +       }
> +
> +       start_work = RB_EMPTY_ROOT(&fc->dentry_tree);
> +       __fuse_dentry_tree_del_node(fc, fd);
> +
> +       p = &fc->dentry_tree.rb_node;
> +       while (*p) {
> +               parent = *p;
> +               cur = rb_entry(*p, struct fuse_dentry, node);
> +               if (fd->time > cur->time)
> +                       p = &(*p)->rb_left;
> +               else
> +                       p = &(*p)->rb_right;
> +       }
> +       rb_link_node(&fd->node, parent, p);
> +       rb_insert_color(&fd->node, &fc->dentry_tree);
> +       spin_unlock(&fc->dentry_tree_lock);
> +
> +       if (start_work)
> +               schedule_delayed_work(&fc->dentry_tree_work,
> +                                     secs_to_jiffies(fc->inval_wq));
> +}
> +
> +void fuse_dentry_tree_prune(struct fuse_conn *fc)
> +{
> +       struct rb_node *n;
> +
> +       if (!fc->inval_wq)
> +               return;
> +
> +       fc->inval_wq = 0;
> +       cancel_delayed_work_sync(&fc->dentry_tree_work);
> +
> +       spin_lock(&fc->dentry_tree_lock);
> +       while (!RB_EMPTY_ROOT(&fc->dentry_tree)) {
> +               n = rb_first(&fc->dentry_tree);
> +               rb_erase(n, &fc->dentry_tree);
> +               RB_CLEAR_NODE(&rb_entry(n, struct fuse_dentry, node)->node);
> +       }
> +       spin_unlock(&fc->dentry_tree_lock);
> +}
> +
> +/*
> + * work queue that, when enabled, will periodically check for expired dentries
> + * in the dentries tree.
> + *
> + * A dentry has expired if:
> + *
> + *   1) it has been around for too long (timeout) or if
> + *
> + *   2) the connection epoch has been incremented.
> + *
> + * The work queue will be rescheduled itself as long as the dentries tree is not
> + * empty.
> + */
> +void fuse_dentry_tree_work(struct work_struct *work)
> +{
> +       struct fuse_conn *fc = container_of(work, struct fuse_conn,
> +                                           dentry_tree_work.work);
> +       struct fuse_dentry *fd;
> +       struct rb_node *node;
> +       u64 start;
> +       int epoch;
> +       bool reschedule;
> +
> +       spin_lock(&fc->dentry_tree_lock);
> +       start = get_jiffies_64();
> +       epoch = atomic_read(&fc->epoch);
> +
> +       node = rb_first(&fc->dentry_tree);
> +       while (node && !need_resched()) {
> +               fd = rb_entry(node, struct fuse_dentry, node);
> +               if ((fd->dentry->d_time < epoch) || (fd->time < start)) {
> +                       rb_erase(&fd->node, &fc->dentry_tree);
> +                       RB_CLEAR_NODE(&fd->node);
> +                       spin_unlock(&fc->dentry_tree_lock);
> +                       d_invalidate(fd->dentry);
> +                       spin_lock(&fc->dentry_tree_lock);
> +               } else
> +                       break;
> +               node = rb_first(&fc->dentry_tree);
> +       }
> +       reschedule = !RB_EMPTY_ROOT(&fc->dentry_tree);
> +       spin_unlock(&fc->dentry_tree_lock);
> +
> +       if (reschedule)
> +               schedule_delayed_work(&fc->dentry_tree_work,
> +                                     secs_to_jiffies(fc->inval_wq));
> +}
>
>  static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
>  {
> -       ((union fuse_dentry *) dentry->d_fsdata)->time = time;
> +       ((struct fuse_dentry *) dentry->d_fsdata)->time = time;
>  }
>
>  static inline u64 fuse_dentry_time(const struct dentry *entry)
>  {
> -       return ((union fuse_dentry *) entry->d_fsdata)->time;
> +       return ((struct fuse_dentry *) entry->d_fsdata)->time;
>  }
> -#endif
>
>  static void fuse_dentry_settime(struct dentry *dentry, u64 time)
>  {
> @@ -81,6 +200,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time)
>         }
>
>         __fuse_dentry_settime(dentry, time);
> +       fuse_dentry_tree_add_node(dentry);
>  }
>
>  /*
> @@ -283,21 +403,36 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
>         goto out;
>  }
>
> -#if BITS_PER_LONG < 64
>  static int fuse_dentry_init(struct dentry *dentry)
>  {
> -       dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
> -                                  GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
> +       struct fuse_dentry *fd;
> +
> +       fd = kzalloc(sizeof(struct fuse_dentry),
> +                         GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
> +       if (!fd)
> +               return -ENOMEM;
> +
> +       fd->dentry = dentry;
> +       RB_CLEAR_NODE(&fd->node);
> +       dentry->d_fsdata = fd;
> +
> +       return 0;
> +}
> +
> +static void fuse_dentry_prune(struct dentry *dentry)
> +{
> +       struct fuse_dentry *fd = dentry->d_fsdata;
>
> -       return dentry->d_fsdata ? 0 : -ENOMEM;
> +       if (!RB_EMPTY_NODE(&fd->node))
> +               fuse_dentry_tree_del_node(dentry);
>  }
> +
>  static void fuse_dentry_release(struct dentry *dentry)
>  {
> -       union fuse_dentry *fd = dentry->d_fsdata;
> +       struct fuse_dentry *fd = dentry->d_fsdata;
>
>         kfree_rcu(fd, rcu);
>  }
> -#endif
>
>  static int fuse_dentry_delete(const struct dentry *dentry)
>  {
> @@ -331,18 +466,16 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
>  const struct dentry_operations fuse_dentry_operations = {
>         .d_revalidate   = fuse_dentry_revalidate,
>         .d_delete       = fuse_dentry_delete,
> -#if BITS_PER_LONG < 64
>         .d_init         = fuse_dentry_init,
> +       .d_prune        = fuse_dentry_prune,
>         .d_release      = fuse_dentry_release,
> -#endif
>         .d_automount    = fuse_dentry_automount,
>  };
>
>  const struct dentry_operations fuse_root_dentry_operations = {
> -#if BITS_PER_LONG < 64
>         .d_init         = fuse_dentry_init,
> +       .d_prune        = fuse_dentry_prune,
>         .d_release      = fuse_dentry_release,
> -#endif
>  };
>
>  int fuse_valid_type(int m)
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index b54f4f57789f..638d62d995a2 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -975,6 +975,15 @@ struct fuse_conn {
>                 /* Request timeout (in jiffies). 0 = no timeout */
>                 unsigned int req_timeout;
>         } timeout;
> +
> +       /** Cache dentries tree */
> +       struct rb_root dentry_tree;
> +       /** Look to protect dentry_tree access */
> +       spinlock_t dentry_tree_lock;
> +       /** Periodic delayed work to invalidate expired dentries */
> +       struct delayed_work dentry_tree_work;
> +       /** Period for the invalidation work queue */
> +       unsigned int inval_wq;
>  };
>
>  /*
> @@ -1259,6 +1268,9 @@ void fuse_wait_aborted(struct fuse_conn *fc);
>  /* Check if any requests timed out */
>  void fuse_check_timeout(struct work_struct *work);
>
> +void fuse_dentry_tree_prune(struct fuse_conn *fc);
> +void fuse_dentry_tree_work(struct work_struct *work);
> +
>  /**
>   * Invalidate inode attributes
>   */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 9572bdef49ee..df20ff91898f 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -58,6 +58,20 @@ MODULE_PARM_DESC(max_user_congthresh,
>   "Global limit for the maximum congestion threshold an "
>   "unprivileged user can set");
>
> +static unsigned __read_mostly inval_wq;
> +static int inval_wq_set(const char *val, const struct kernel_param *kp)
> +{
> +       return param_set_uint_minmax(val, kp, 5, (unsigned int)(-1));
> +}
> +static const struct kernel_param_ops inval_wq_ops = {
> +       .set = inval_wq_set,
> +       .get = param_get_uint,
> +};
> +module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644);
> +__MODULE_PARM_TYPE(inval_wq, "uint");
> +MODULE_PARM_DESC(inval_wq,
> +                "Dentries invalidation work queue period in secs (>= 5).");
> +
>  #define FUSE_DEFAULT_BLKSIZE 512
>
>  /** Maximum number of outstanding background requests */
> @@ -963,6 +977,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
>         memset(fc, 0, sizeof(*fc));
>         spin_lock_init(&fc->lock);
>         spin_lock_init(&fc->bg_lock);
> +       spin_lock_init(&fc->dentry_tree_lock);
>         init_rwsem(&fc->killsb);
>         refcount_set(&fc->count, 1);
>         atomic_set(&fc->dev_count, 1);
> @@ -972,6 +987,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
>         INIT_LIST_HEAD(&fc->bg_queue);
>         INIT_LIST_HEAD(&fc->entry);
>         INIT_LIST_HEAD(&fc->devices);
> +       fc->dentry_tree = RB_ROOT;
> +       fc->inval_wq = 0;
>         atomic_set(&fc->num_waiting, 0);
>         fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
>         fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
> @@ -1848,6 +1865,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>         fc->group_id = ctx->group_id;
>         fc->legacy_opts_show = ctx->legacy_opts_show;
>         fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
> +       fc->inval_wq = inval_wq;
> +       if (fc->inval_wq > 0)
> +               INIT_DELAYED_WORK(&fc->dentry_tree_work, fuse_dentry_tree_work);
>         fc->destroy = ctx->destroy;
>         fc->no_control = ctx->no_control;
>         fc->no_force_umount = ctx->no_force_umount;
> @@ -2052,6 +2072,7 @@ void fuse_conn_destroy(struct fuse_mount *fm)
>
>         fuse_abort_conn(fc);
>         fuse_wait_aborted(fc);
> +       fuse_dentry_tree_prune(fc);
>
>         if (!list_empty(&fc->entry)) {
>                 mutex_lock(&fuse_mutex);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ