[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <diqzilea0xqr.fsf@ackerleytng-cloudtop.c.googlers.com>
Date: Wed, 05 Apr 2023 22:29:00 +0000
From: Ackerley Tng <ackerleytng@...gle.com>
To: David Hildenbrand <david@...hat.com>
Cc: kvm@...r.kernel.org, linux-api@...r.kernel.org,
linux-arch@...r.kernel.org, linux-doc@...r.kernel.org,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-mm@...ck.org, qemu-devel@...gnu.org, aarcange@...hat.com,
ak@...ux.intel.com, akpm@...ux-foundation.org, arnd@...db.de,
bfields@...ldses.org, bp@...en8.de, chao.p.peng@...ux.intel.com,
corbet@....net, dave.hansen@...el.com, ddutile@...hat.com,
dhildenb@...hat.com, hpa@...or.com, hughd@...gle.com,
jlayton@...nel.org, jmattson@...gle.com, joro@...tes.org,
jun.nakajima@...el.com, kirill.shutemov@...ux.intel.com,
linmiaohe@...wei.com, luto@...nel.org, mail@...iej.szmigiero.name,
mhocko@...e.com, michael.roth@....com, mingo@...hat.com,
naoya.horiguchi@....com, pbonzini@...hat.com, qperret@...gle.com,
rppt@...nel.org, seanjc@...gle.com, shuah@...nel.org,
steven.price@....com, tabba@...gle.com, tglx@...utronix.de,
vannapurve@...gle.com, vbabka@...e.cz, vkuznets@...hat.com,
wanpengli@...cent.com, wei.w.wang@...el.com, x86@...nel.org,
yu.c.zhang@...ux.intel.com
Subject: Re: [RFC PATCH v3 1/2] mm: restrictedmem: Allow userspace to specify
mount for memfd_restricted
Thanks for your review!
David Hildenbrand <david@...hat.com> writes:
> On 01.04.23 01:50, Ackerley Tng wrote:
>> ...
>> diff --git a/include/uapi/linux/restrictedmem.h
>> b/include/uapi/linux/restrictedmem.h
>> new file mode 100644
>> index 000000000000..22d6f2285f6d
>> --- /dev/null
>> +++ b/include/uapi/linux/restrictedmem.h
>> @@ -0,0 +1,8 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +#ifndef _UAPI_LINUX_RESTRICTEDMEM_H
>> +#define _UAPI_LINUX_RESTRICTEDMEM_H
>> +
>> +/* flags for memfd_restricted */
>> +#define RMFD_USERMNT 0x0001U
> I wonder if we can come up with a more expressive prefix than RMFD.
> Sounds more like "rm fd" ;) Maybe it should better match the
> "memfd_restricted" syscall name, like "MEMFD_RSTD_USERMNT".
RMFD did actually sound vulgar, I'm good with MEMFD_RSTD_USERMNT!
>> +
>> +#endif /* _UAPI_LINUX_RESTRICTEDMEM_H */
>> diff --git a/mm/restrictedmem.c b/mm/restrictedmem.c
>> index c5d869d8c2d8..f7b62364a31a 100644
>> --- a/mm/restrictedmem.c
>> +++ b/mm/restrictedmem.c
>> @@ -1,11 +1,12 @@
>> // SPDX-License-Identifier: GPL-2.0
>> -#include "linux/sbitmap.h"
> Looks like an unrelated change?
Will remove this in the next revision.
>> +#include <linux/namei.h>
>> #include <linux/pagemap.h>
>> #include <linux/pseudo_fs.h>
>> #include <linux/shmem_fs.h>
>> #include <linux/syscalls.h>
>> #include <uapi/linux/falloc.h>
>> #include <uapi/linux/magic.h>
>> +#include <uapi/linux/restrictedmem.h>
>> #include <linux/restrictedmem.h>
>> struct restrictedmem {
>> @@ -189,19 +190,20 @@ static struct file
>> *restrictedmem_file_create(struct file *memfd)
>> return file;
>> }
>> -SYSCALL_DEFINE1(memfd_restricted, unsigned int, flags)
>> +static int restrictedmem_create(struct vfsmount *mount)
>> {
>> struct file *file, *restricted_file;
>> int fd, err;
>> - if (flags)
>> - return -EINVAL;
>> -
>> fd = get_unused_fd_flags(0);
>> if (fd < 0)
>> return fd;
>> - file = shmem_file_setup("memfd:restrictedmem", 0, VM_NORESERVE);
>> + if (mount)
>> + file = shmem_file_setup_with_mnt(mount, "memfd:restrictedmem", 0,
>> VM_NORESERVE);
>> + else
>> + file = shmem_file_setup("memfd:restrictedmem", 0, VM_NORESERVE);
>> +
>> if (IS_ERR(file)) {
>> err = PTR_ERR(file);
>> goto err_fd;
>> @@ -223,6 +225,66 @@ SYSCALL_DEFINE1(memfd_restricted, unsigned int,
>> flags)
>> return err;
>> }
>> +static bool is_shmem_mount(struct vfsmount *mnt)
>> +{
>> + return mnt && mnt->mnt_sb && mnt->mnt_sb->s_magic == TMPFS_MAGIC;
>> +}
>> +
>> +static bool is_mount_root(struct file *file)
>> +{
>> + return file->f_path.dentry == file->f_path.mnt->mnt_root;
>> +}
> I'd inline at least that function, pretty self-explaining.
Will inline this in the next revision.
>> +
>> +static int restrictedmem_create_on_user_mount(int mount_fd)
>> +{
>> + int ret;
>> + struct fd f;
>> + struct vfsmount *mnt;
>> +
>> + f = fdget_raw(mount_fd);
>> + if (!f.file)
>> + return -EBADF;
>> +
>> + ret = -EINVAL;
>> + if (!is_mount_root(f.file))
>> + goto out;
>> +
>> + mnt = f.file->f_path.mnt;
>> + if (!is_shmem_mount(mnt))
>> + goto out;
>> +
>> + ret = file_permission(f.file, MAY_WRITE | MAY_EXEC);
>> + if (ret)
>> + goto out;
>> +
>> + ret = mnt_want_write(mnt);
>> + if (unlikely(ret))
>> + goto out;
>> +
>> + ret = restrictedmem_create(mnt);
>> +
>> + mnt_drop_write(mnt);
>> +out:
>> + fdput(f);
>> +
>> + return ret;
>> +}
>> +
>> +SYSCALL_DEFINE2(memfd_restricted, unsigned int, flags, int, mount_fd)
>> +{
>> + if (flags & ~RMFD_USERMNT)
>> + return -EINVAL;
>> +
>> + if (flags == RMFD_USERMNT) {
>> + if (mount_fd < 0)
>> + return -EINVAL;
>> +
>> + return restrictedmem_create_on_user_mount(mount_fd);
>> + } else {
>> + return restrictedmem_create(NULL);
>> + }
> You can drop the else case:
> if (flags == RMFD_USERMNT) {
> ...
> return restrictedmem_create_on_user_mount(mount_fd);
> }
> return restrictedmem_create(NULL);
I'll be refactoring this to adopt Kirill's suggestion of using a single
restrictedmem_create(mnt) call.
> I do wonder if you want to properly check for a flag instead of
> comparing values. Results in a more natural way to deal with flags:
> if (flags & RMFD_USERMNT) {
> }
Will use this in the next revision.
>> +}
>> +
>> int restrictedmem_bind(struct file *file, pgoff_t start, pgoff_t end,
>> struct restrictedmem_notifier *notifier, bool exclusive)
>> {
> The "memfd_restricted" vs. "restrictedmem" terminology is a bit
> unfortunate, but not your fault here.
> I'm not a FS person, but it does look good to me.
Powered by blists - more mailing lists