lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 1 Jun 2021 14:41:27 +0200
From:   Miklos Szeredi <miklos@...redi.hu>
To:     Ian Kent <raven@...maw.net>
Cc:     Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Tejun Heo <tj@...nel.org>, Eric Sandeen <sandeen@...deen.net>,
        Fox Chen <foxhlchen@...il.com>,
        Brice Goglin <brice.goglin@...il.com>,
        Al Viro <viro@...iv.linux.org.uk>,
        Rick Lindsley <ricklind@...ux.vnet.ibm.com>,
        David Howells <dhowells@...hat.com>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        linux-fsdevel <linux-fsdevel@...r.kernel.org>,
        Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [REPOST PATCH v4 2/5] kernfs: use VFS negative dentry caching

On Fri, 28 May 2021 at 08:34, Ian Kent <raven@...maw.net> wrote:
>
> If there are many lookups for non-existent paths these negative lookups
> can lead to a lot of overhead during path walks.
>
> The VFS allows dentries to be created as negative and hashed, and caches
> them so they can be used to reduce the fairly high overhead alloc/free
> cycle that occurs during these lookups.

Obviously there's a cost associated with negative caching too.  For
normal filesystems it's trivially worth that cost, but in case of
kernfs, not sure...

Can "fairly high" be somewhat substantiated with a microbenchmark for
negative lookups?

More comments inline.

>
> Signed-off-by: Ian Kent <raven@...maw.net>
> ---
>  fs/kernfs/dir.c |   55 +++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 33 insertions(+), 22 deletions(-)
>
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 4c69e2af82dac..5151c712f06f5 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -1037,12 +1037,33 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
>         if (flags & LOOKUP_RCU)
>                 return -ECHILD;
>
> -       /* Always perform fresh lookup for negatives */
> -       if (d_really_is_negative(dentry))
> -               goto out_bad_unlocked;
> +       mutex_lock(&kernfs_mutex);
>
>         kn = kernfs_dentry_node(dentry);
> -       mutex_lock(&kernfs_mutex);
> +
> +       /* Negative hashed dentry? */
> +       if (!kn) {
> +               struct kernfs_node *parent;
> +
> +               /* If the kernfs node can be found this is a stale negative
> +                * hashed dentry so it must be discarded and the lookup redone.
> +                */
> +               parent = kernfs_dentry_node(dentry->d_parent);

This doesn't look safe WRT a racing sys_rename().  In this case
d_move() is called only with parent inode locked, but not with
kernfs_mutex while ->d_revalidate() may not have parent inode locked.
After d_move() the old parent dentry can be freed, resulting in use
after free.  Easily fixed by dget_parent().

> +               if (parent) {
> +                       const void *ns = NULL;
> +
> +                       if (kernfs_ns_enabled(parent))
> +                               ns = kernfs_info(dentry->d_sb)->ns;
> +                       kn = kernfs_find_ns(parent, dentry->d_name.name, ns);

Same thing with d_name.  There's
take_dentry_name_snapshot()/release_dentry_name_snapshot() to properly
take care of that.


> +                       if (kn)
> +                               goto out_bad;
> +               }
> +
> +               /* The kernfs node doesn't exist, leave the dentry negative
> +                * and return success.
> +                */
> +               goto out;
> +       }
>
>         /* The kernfs node has been deactivated */
>         if (!kernfs_active_read(kn))
> @@ -1060,12 +1081,11 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
>         if (kn->parent && kernfs_ns_enabled(kn->parent) &&
>             kernfs_info(dentry->d_sb)->ns != kn->ns)
>                 goto out_bad;
> -
> +out:
>         mutex_unlock(&kernfs_mutex);
>         return 1;
>  out_bad:
>         mutex_unlock(&kernfs_mutex);
> -out_bad_unlocked:
>         return 0;
>  }
>
> @@ -1080,33 +1100,24 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
>         struct dentry *ret;
>         struct kernfs_node *parent = dir->i_private;
>         struct kernfs_node *kn;
> -       struct inode *inode;
> +       struct inode *inode = NULL;
>         const void *ns = NULL;
>
>         mutex_lock(&kernfs_mutex);
> -
>         if (kernfs_ns_enabled(parent))
>                 ns = kernfs_info(dir->i_sb)->ns;
>
>         kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
> -
> -       /* no such entry */
> -       if (!kn || !kernfs_active(kn)) {
> -               ret = NULL;
> -               goto out_unlock;
> -       }
> -
>         /* attach dentry and inode */
> -       inode = kernfs_get_inode(dir->i_sb, kn);
> -       if (!inode) {
> -               ret = ERR_PTR(-ENOMEM);
> -               goto out_unlock;
> +       if (kn && kernfs_active(kn)) {
> +               inode = kernfs_get_inode(dir->i_sb, kn);
> +               if (!inode)
> +                       inode = ERR_PTR(-ENOMEM);
>         }
> -
> -       /* instantiate and hash dentry */
> +       /* instantiate and hash (possibly negative) dentry */
>         ret = d_splice_alias(inode, dentry);
> - out_unlock:
>         mutex_unlock(&kernfs_mutex);
> +
>         return ret;
>  }
>
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ