linux-kernel - Re: [PATCH v6 2/2] statmount: accept fd as a parameter

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CANaxB-zG3JQRzZSF+rwfGqLRP8eWvc+J1w+2yZoB5uk5jhGyPA@mail.gmail.com>
Date: Wed, 19 Nov 2025 09:20:05 -0800
From: Andrei Vagin <avagin@...il.com>
To: Bhavik Sachdev <b.sachdev1904@...il.com>
Cc: Alexander Viro <viro@...iv.linux.org.uk>, Christian Brauner <brauner@...nel.org>, 
	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org, 
	criu@...ts.linux.dev, Aleksa Sarai <cyphar@...har.com>, 
	Pavel Tikhomirov <ptikhomirov@...tuozzo.com>, Jan Kara <jack@...e.cz>, 
	John Garry <john.g.garry@...cle.com>, Arnaldo Carvalho de Melo <acme@...hat.com>, 
	"Darrick J . Wong" <djwong@...nel.org>, Namhyung Kim <namhyung@...nel.org>, Ingo Molnar <mingo@...nel.org>, 
	Alexander Mikhalitsyn <alexander@...alicyn.com>, Miklos Szeredi <miklos@...redi.hu>
Subject: Re: [PATCH v6 2/2] statmount: accept fd as a parameter

On Tue, Nov 18, 2025 at 12:49 AM Bhavik Sachdev <b.sachdev1904@...il.com> wrote:
>
> Extend `struct mnt_id_req` to take in a fd and introduce STATMOUNT_BY_FD
> flag. When a valid fd is provided and STATMOUNT_BY_FD is set, statmount
> will return mountinfo about the mount the fd is on.
>
> This even works for "unmounted" mounts (mounts that have been umounted
> using umount2(mnt, MNT_DETACH)), if you have access to a file descriptor
> on that mount. These "umounted" mounts will have no mountpoint and no
> valid mount namespace. Hence, we unset the STATMOUNT_MNT_POINT and
> STATMOUNT_MNT_NS_ID in statmount.mask for "unmounted" mounts.
>
> In case of STATMOUNT_BY_FD, given that we already have access to an fd
> on the mount, accessing mount information without a capability check
> seems fine because of the following reasons:
>
> - All fs related information is available via fstatfs() without any
>   capability check.
> - Mount information is also available via /proc/pid/mountinfo (without
>   any capability check).
> - Given that we have access to a fd on the mount which tells us that we
>   had access to the mount at some point (or someone that had access gave
>   us the fd). So, we should be able to access mount info.
>
> Co-developed-by: Pavel Tikhomirov <ptikhomirov@...tuozzo.com>
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@...tuozzo.com>
> Signed-off-by: Bhavik Sachdev <b.sachdev1904@...il.com>
> ---
>  fs/namespace.c             | 99 ++++++++++++++++++++++++++------------
>  include/uapi/linux/mount.h |  7 ++-
>  2 files changed, 74 insertions(+), 32 deletions(-)
>
> diff --git a/fs/namespace.c b/fs/namespace.c
> index ee36d67f1ac2..1c41c6e2304a 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -5563,29 +5563,41 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
>
>  /* locks: namespace_shared */
>  static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
> -                       struct mnt_namespace *ns)
> +                       struct mnt_namespace *ns, unsigned int flags)
>  {
>         struct mount *m;
>         int err;
>
> -       /* Has the namespace already been emptied? */
> -       if (mnt_ns_id && mnt_ns_empty(ns))
> -               return -ENOENT;
> +       /* caller sets s->mnt in case of STATMOUNT_BY_FD */
> +       if (!(flags & STATMOUNT_BY_FD)) {
> +               /* Has the namespace already been emptied? */
> +               if (mnt_ns_id && mnt_ns_empty(ns))
> +                       return -ENOENT;
>
> -       s->mnt = lookup_mnt_in_ns(mnt_id, ns);
> -       if (!s->mnt)
> -               return -ENOENT;
> +               s->mnt = lookup_mnt_in_ns(mnt_id, ns);
> +               if (!s->mnt)
> +                       return -ENOENT;
> +       }
>
> -       err = grab_requested_root(ns, &s->root);
> -       if (err)
> -               return err;
> +       if (ns) {
> +               err = grab_requested_root(ns, &s->root);
> +               if (err)
> +                       return err;
> +       } else {
> +               /*
> +                * We can't set mount point and mnt_ns_id since we don't have a
> +                * ns for the mount. This can happen if the mount is unmounted
> +                * with MNT_DETACH.
> +                */
> +               s->mask &= ~(STATMOUNT_MNT_POINT | STATMOUNT_MNT_NS_ID);
> +       }
>
>         /*
>          * Don't trigger audit denials. We just want to determine what
>          * mounts to show users.
>          */
>         m = real_mount(s->mnt);
> -       if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
> +       if (ns && !is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
>             !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
>                 return -EPERM;
>
> @@ -5709,7 +5721,7 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
>  }
>
>  static int copy_mnt_id_req(const struct mnt_id_req __user *req,
> -                          struct mnt_id_req *kreq)
> +                          struct mnt_id_req *kreq, unsigned int flags)
>  {
>         int ret;
>         size_t usize;
> @@ -5727,11 +5739,18 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
>         ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize);
>         if (ret)
>                 return ret;
> -       if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id)
> -               return -EINVAL;
> -       /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
> -       if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
> -               return -EINVAL;
> +
> +       if (flags & STATMOUNT_BY_FD) {
> +               if (kreq->mnt_id || kreq->mnt_ns_id)
> +                       return -EINVAL;
> +       } else {
> +               if (kreq->fd != 0 && kreq->mnt_ns_id)
> +                       return -EINVAL;
> +
> +               /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
> +               if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
> +                       return -EINVAL;
> +       }
>         return 0;
>  }
>
> @@ -5740,16 +5759,18 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
>   * that, or if not simply grab a passive reference on our mount namespace and
>   * return that.
>   */
> -static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq)
> +static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq,
> +                                                  unsigned int flags)

In patch, grab_requested_mnt_ns is always called with zero flags, do we
really need adding flags here?

>  {
>         struct mnt_namespace *mnt_ns;
>
>         if (kreq->mnt_ns_id) {
>                 mnt_ns = lookup_mnt_ns(kreq->mnt_ns_id);
> -       } else if (kreq->mnt_ns_fd) {
> +       /* caller sets mnt_ns in case of STATMOUNT_BY_FD */
> +       } else if (!(flags & STATMOUNT_BY_FD) && kreq->fd) {

I don't understand this part. If STATMOUNT_BY_FD is set, we take a
reference to the current mount namespace. What is the idea here?

It looks like we don't need to change this function at all.

>                 struct ns_common *ns;
>
> -               CLASS(fd, f)(kreq->mnt_ns_fd);
> +               CLASS(fd, f)(kreq->fd);
>                 if (fd_empty(f))
>                         return ERR_PTR(-EBADF);
>
> @@ -5777,25 +5798,38 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
>  {
>         struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
>         struct kstatmount *ks __free(kfree) = NULL;
> +       struct file *file_from_fd __free(fput) = NULL;
> +       struct vfsmount *fd_mnt;
>         struct mnt_id_req kreq;
>         /* We currently support retrieval of 3 strings. */
>         size_t seq_size = 3 * PATH_MAX;
>         int ret;
>
> -       if (flags)
> +       if (flags & ~STATMOUNT_BY_FD)
>                 return -EINVAL;
>
> -       ret = copy_mnt_id_req(req, &kreq);
> +       ret = copy_mnt_id_req(req, &kreq, flags);
>         if (ret)
>                 return ret;
>
> -       ns = grab_requested_mnt_ns(&kreq);
> -       if (IS_ERR(ns))
> -               return PTR_ERR(ns);
> +       if (flags & STATMOUNT_BY_FD) {
> +               file_from_fd = fget_raw(kreq.fd);
> +               if (!file_from_fd)
> +                       return -EBADF;
>
> -       if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
> -           !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
> -               return -EPERM;
> +               fd_mnt = file_from_fd->f_path.mnt;
> +               ns = real_mount(fd_mnt)->mnt_ns;

I think accessing mnt_ns here should be guarded with a lock/rcu.

> +               if (ns)
> +                       refcount_inc(&ns->passive);
> +       } else {
> +               ns = grab_requested_mnt_ns(&kreq, 0);
> +               if (IS_ERR(ns))
> +                       return PTR_ERR(ns);
> +
> +               if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
> +                   !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
> +                       return -EPERM;
> +       }
>
>         ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT);
>         if (!ks)
> @@ -5806,8 +5840,11 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
>         if (ret)
>                 return ret;
>
> +       if (flags & STATMOUNT_BY_FD)
> +               ks->mnt = fd_mnt;
> +
>         scoped_guard(namespace_shared)
> -               ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);
> +               ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns, flags);
>
>         if (!ret)
>                 ret = copy_statmount_to_user(ks);
> @@ -5916,7 +5953,7 @@ static inline int prepare_klistmount(struct klistmount *kls, struct mnt_id_req *
>         if (!kls->kmnt_ids)
>                 return -ENOMEM;
>
> -       ns = grab_requested_mnt_ns(kreq);
> +       ns = grab_requested_mnt_ns(kreq, 0);
>         if (IS_ERR(ns))
>                 return PTR_ERR(ns);
>         kls->ns = ns;
> @@ -5947,7 +5984,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
>         if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
>                 return -EFAULT;
>
> -       ret = copy_mnt_id_req(req, &kreq);
> +       ret = copy_mnt_id_req(req, &kreq, 0);
>         if (ret)
>                 return ret;
>
> diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
> index 5d3f8c9e3a62..a2156599ddc6 100644
> --- a/include/uapi/linux/mount.h
> +++ b/include/uapi/linux/mount.h
> @@ -197,7 +197,7 @@ struct statmount {
>   */
>  struct mnt_id_req {
>         __u32 size;
> -       __u32 mnt_ns_fd;
> +       __u32 fd;

we can consider using union here:
      union {
            __u32 mnt_ns_fd;
            __u32 mnt_fd;
       };

>         __u64 mnt_id;
>         __u64 param;
>         __u64 mnt_ns_id;
> @@ -232,4 +232,9 @@ struct mnt_id_req {
>  #define LSMT_ROOT              0xffffffffffffffff      /* root mount */
>  #define LISTMOUNT_REVERSE      (1 << 0) /* List later mounts first */
>
> +/*
> + * @flag bits for statmount(2)
> + */
> +#define STATMOUNT_BY_FD                0x00000001U     /* want mountinfo for given fd */
> +
>  #endif /* _UAPI_LINUX_MOUNT_H */
> --
> 2.51.1
>