lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <diqzilea0xqr.fsf@ackerleytng-cloudtop.c.googlers.com>
Date:   Wed, 05 Apr 2023 22:29:00 +0000
From:   Ackerley Tng <ackerleytng@...gle.com>
To:     David Hildenbrand <david@...hat.com>
Cc:     kvm@...r.kernel.org, linux-api@...r.kernel.org,
        linux-arch@...r.kernel.org, linux-doc@...r.kernel.org,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-mm@...ck.org, qemu-devel@...gnu.org, aarcange@...hat.com,
        ak@...ux.intel.com, akpm@...ux-foundation.org, arnd@...db.de,
        bfields@...ldses.org, bp@...en8.de, chao.p.peng@...ux.intel.com,
        corbet@....net, dave.hansen@...el.com, ddutile@...hat.com,
        dhildenb@...hat.com, hpa@...or.com, hughd@...gle.com,
        jlayton@...nel.org, jmattson@...gle.com, joro@...tes.org,
        jun.nakajima@...el.com, kirill.shutemov@...ux.intel.com,
        linmiaohe@...wei.com, luto@...nel.org, mail@...iej.szmigiero.name,
        mhocko@...e.com, michael.roth@....com, mingo@...hat.com,
        naoya.horiguchi@....com, pbonzini@...hat.com, qperret@...gle.com,
        rppt@...nel.org, seanjc@...gle.com, shuah@...nel.org,
        steven.price@....com, tabba@...gle.com, tglx@...utronix.de,
        vannapurve@...gle.com, vbabka@...e.cz, vkuznets@...hat.com,
        wanpengli@...cent.com, wei.w.wang@...el.com, x86@...nel.org,
        yu.c.zhang@...ux.intel.com
Subject: Re: [RFC PATCH v3 1/2] mm: restrictedmem: Allow userspace to specify
 mount for memfd_restricted


Thanks for your review!

David Hildenbrand <david@...hat.com> writes:

> On 01.04.23 01:50, Ackerley Tng wrote:

>> ...

>> diff --git a/include/uapi/linux/restrictedmem.h  
>> b/include/uapi/linux/restrictedmem.h
>> new file mode 100644
>> index 000000000000..22d6f2285f6d
>> --- /dev/null
>> +++ b/include/uapi/linux/restrictedmem.h
>> @@ -0,0 +1,8 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +#ifndef _UAPI_LINUX_RESTRICTEDMEM_H
>> +#define _UAPI_LINUX_RESTRICTEDMEM_H
>> +
>> +/* flags for memfd_restricted */
>> +#define RMFD_USERMNT		0x0001U

> I wonder if we can come up with a more expressive prefix than RMFD.
> Sounds more like "rm fd" ;) Maybe it should better match the
> "memfd_restricted" syscall name, like "MEMFD_RSTD_USERMNT".


RMFD did actually sound vulgar, I'm good with MEMFD_RSTD_USERMNT!

>> +
>> +#endif /* _UAPI_LINUX_RESTRICTEDMEM_H */
>> diff --git a/mm/restrictedmem.c b/mm/restrictedmem.c
>> index c5d869d8c2d8..f7b62364a31a 100644
>> --- a/mm/restrictedmem.c
>> +++ b/mm/restrictedmem.c
>> @@ -1,11 +1,12 @@
>>    // SPDX-License-Identifier: GPL-2.0
>> -#include "linux/sbitmap.h"

> Looks like an unrelated change?


Will remove this in the next revision.

>> +#include <linux/namei.h>
>>    #include <linux/pagemap.h>
>>    #include <linux/pseudo_fs.h>
>>    #include <linux/shmem_fs.h>
>>    #include <linux/syscalls.h>
>>    #include <uapi/linux/falloc.h>
>>    #include <uapi/linux/magic.h>
>> +#include <uapi/linux/restrictedmem.h>
>>    #include <linux/restrictedmem.h>

>>    struct restrictedmem {
>> @@ -189,19 +190,20 @@ static struct file  
>> *restrictedmem_file_create(struct file *memfd)
>>    	return file;
>>    }

>> -SYSCALL_DEFINE1(memfd_restricted, unsigned int, flags)
>> +static int restrictedmem_create(struct vfsmount *mount)
>>    {
>>    	struct file *file, *restricted_file;
>>    	int fd, err;

>> -	if (flags)
>> -		return -EINVAL;
>> -
>>    	fd = get_unused_fd_flags(0);
>>    	if (fd < 0)
>>    		return fd;

>> -	file = shmem_file_setup("memfd:restrictedmem", 0, VM_NORESERVE);
>> +	if (mount)
>> +		file = shmem_file_setup_with_mnt(mount, "memfd:restrictedmem", 0,  
>> VM_NORESERVE);
>> +	else
>> +		file = shmem_file_setup("memfd:restrictedmem", 0, VM_NORESERVE);
>> +
>>    	if (IS_ERR(file)) {
>>    		err = PTR_ERR(file);
>>    		goto err_fd;
>> @@ -223,6 +225,66 @@ SYSCALL_DEFINE1(memfd_restricted, unsigned int,  
>> flags)
>>    	return err;
>>    }

>> +static bool is_shmem_mount(struct vfsmount *mnt)
>> +{
>> +	return mnt && mnt->mnt_sb && mnt->mnt_sb->s_magic == TMPFS_MAGIC;
>> +}
>> +
>> +static bool is_mount_root(struct file *file)
>> +{
>> +	return file->f_path.dentry == file->f_path.mnt->mnt_root;
>> +}

> I'd inline at least that function, pretty self-explaining.


Will inline this in the next revision.

>> +
>> +static int restrictedmem_create_on_user_mount(int mount_fd)
>> +{
>> +	int ret;
>> +	struct fd f;
>> +	struct vfsmount *mnt;
>> +
>> +	f = fdget_raw(mount_fd);
>> +	if (!f.file)
>> +		return -EBADF;
>> +
>> +	ret = -EINVAL;
>> +	if (!is_mount_root(f.file))
>> +		goto out;
>> +
>> +	mnt = f.file->f_path.mnt;
>> +	if (!is_shmem_mount(mnt))
>> +		goto out;
>> +
>> +	ret = file_permission(f.file, MAY_WRITE | MAY_EXEC);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = mnt_want_write(mnt);
>> +	if (unlikely(ret))
>> +		goto out;
>> +
>> +	ret = restrictedmem_create(mnt);
>> +
>> +	mnt_drop_write(mnt);
>> +out:
>> +	fdput(f);
>> +
>> +	return ret;
>> +}
>> +
>> +SYSCALL_DEFINE2(memfd_restricted, unsigned int, flags, int, mount_fd)
>> +{
>> +	if (flags & ~RMFD_USERMNT)
>> +		return -EINVAL;
>> +
>> +	if (flags == RMFD_USERMNT) {
>> +		if (mount_fd < 0)
>> +			return -EINVAL;
>> +
>> +		return restrictedmem_create_on_user_mount(mount_fd);
>> +	} else {
>> +		return restrictedmem_create(NULL);
>> +	}


> You can drop the else case:

> if (flags == RMFD_USERMNT) {
> 	...
> 	return restrictedmem_create_on_user_mount(mount_fd);
> }
> return restrictedmem_create(NULL);


I'll be refactoring this to adopt Kirill's suggestion of using a single
restrictedmem_create(mnt) call.


> I do wonder if you want to properly check for a flag instead of
> comparing values. Results in a more natural way to deal with flags:

> if (flags & RMFD_USERMNT) {

> }


Will use this in the next revision.

>> +}
>> +
>>    int restrictedmem_bind(struct file *file, pgoff_t start, pgoff_t end,
>>    		       struct restrictedmem_notifier *notifier, bool exclusive)
>>    {

> The "memfd_restricted" vs. "restrictedmem" terminology is a bit
> unfortunate, but not your fault here.


> I'm not a FS person, but it does look good to me.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ