[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <75aaa2fc-f044-2096-bf6b-740b0075467e@oracle.com>
Date: Thu, 30 Jun 2022 17:30:35 -0600
From: Khalid Aziz <khalid.aziz@...cle.com>
To: "Darrick J. Wong" <djwong@...nel.org>
Cc: akpm@...ux-foundation.org, willy@...radead.org,
aneesh.kumar@...ux.ibm.com, arnd@...db.de, 21cnbao@...il.com,
corbet@....net, dave.hansen@...ux.intel.com, david@...hat.com,
ebiederm@...ssion.com, hagen@...u.net, jack@...e.cz,
keescook@...omium.org, kirill@...temov.name, kucharsk@...il.com,
linkinjeon@...nel.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
longpeng2@...wei.com, luto@...nel.org, markhemm@...glemail.com,
pcc@...gle.com, rppt@...nel.org, sieberf@...zon.com,
sjpark@...zon.de, surenb@...gle.com, tst@...oebel-theuer.de,
yzaikin@...gle.com
Subject: Re: [PATCH v2 6/9] mm/mshare: Add mmap operation
On 6/30/22 15:44, Darrick J. Wong wrote:
> On Wed, Jun 29, 2022 at 04:53:57PM -0600, Khalid Aziz wrote:
>> mmap is used to establish address range for mshare region and map the
>> region into process's address space. Add basic mmap operation that
>> supports setting address range. Also fix code to not allocate new
>> mm_struct for files in msharefs that exist for information and not
>> for defining a new mshare region.
>>
>> Signed-off-by: Khalid Aziz <khalid.aziz@...cle.com>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@...radead.org>
>> ---
>> mm/mshare.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
>> 1 file changed, 41 insertions(+), 7 deletions(-)
>>
>> diff --git a/mm/mshare.c b/mm/mshare.c
>> index d238b68b0576..088a6cab1e93 100644
>> --- a/mm/mshare.c
>> +++ b/mm/mshare.c
>> @@ -9,7 +9,8 @@
>> *
>> *
>> * Copyright (C) 2022 Oracle Corp. All rights reserved.
>> - * Author: Khalid Aziz <khalid.aziz@...cle.com>
>> + * Authors: Khalid Aziz <khalid.aziz@...cle.com>
>> + * Matthew Wilcox <willy@...radead.org>
>> *
>> */
>>
>> @@ -60,9 +61,36 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
>> return ret;
>> }
>>
>> +static int
>> +msharefs_mmap(struct file *file, struct vm_area_struct *vma)
>> +{
>> + struct mshare_data *info = file->private_data;
>> + struct mm_struct *mm = info->mm;
>> +
>> + /*
>> + * If this mshare region has been set up once already, bail out
>> + */
>> + if (mm->mmap_base != 0)
>> + return -EINVAL;
>> +
>> + if ((vma->vm_start | vma->vm_end) & (PGDIR_SIZE - 1))
>> + return -EINVAL;
>> +
>> + mm->mmap_base = vma->vm_start;
>> + mm->task_size = vma->vm_end - vma->vm_start;
>> + if (!mm->task_size)
>> + mm->task_size--;
>> + info->minfo->start = mm->mmap_base;
>> + info->minfo->size = mm->task_size;
>
> So, uh, if the second mmap() caller decides to ignore the mshare_info,
> should they get an -EINVAL here since the memory mappings won't be at
> the same process virtual address?
Yes, that is in patch 9. A second mmap will result in EINVAL until patch 9 irrespective of address and size passed to mmap.
>
>> + vma->vm_flags |= VM_SHARED_PT;
>> + vma->vm_private_data = info;
>> + return 0;
>> +}
>> +
>> static const struct file_operations msharefs_file_operations = {
>> .open = msharefs_open,
>> .read_iter = msharefs_read,
>> + .mmap = msharefs_mmap,
>> .llseek = no_llseek,
>> };
>>
>> @@ -119,7 +147,12 @@ msharefs_fill_mm(struct inode *inode)
>> goto err_free;
>> }
>> info->mm = mm;
>> - info->minfo = NULL;
>> + info->minfo = kzalloc(sizeof(struct mshare_info), GFP_KERNEL);
>> + if (info->minfo == NULL) {
>> + retval = -ENOMEM;
>> + goto err_free;
>> + }
>> +
>> refcount_set(&info->refcnt, 1);
>> inode->i_private = info;
>>
>> @@ -128,13 +161,14 @@ msharefs_fill_mm(struct inode *inode)
>> err_free:
>> if (mm)
>> mmput(mm);
>> + kfree(info->minfo);
>> kfree(info);
>> return retval;
>> }
>>
>> static struct inode
>> *msharefs_get_inode(struct super_block *sb, const struct inode *dir,
>> - umode_t mode)
>> + umode_t mode, bool newmm)
>> {
>> struct inode *inode = new_inode(sb);
>> if (inode) {
>> @@ -147,7 +181,7 @@ static struct inode
>> case S_IFREG:
>> inode->i_op = &msharefs_file_inode_ops;
>> inode->i_fop = &msharefs_file_operations;
>> - if (msharefs_fill_mm(inode) != 0) {
>> + if (newmm && msharefs_fill_mm(inode) != 0) {
>> discard_new_inode(inode);
>> inode = ERR_PTR(-ENOMEM);
>> }
>> @@ -177,7 +211,7 @@ msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
>> struct inode *inode;
>> int err = 0;
>>
>> - inode = msharefs_get_inode(dir->i_sb, dir, mode);
>> + inode = msharefs_get_inode(dir->i_sb, dir, mode, true);
>> if (IS_ERR(inode))
>> return PTR_ERR(inode);
>>
>> @@ -267,7 +301,7 @@ prepopulate_files(struct super_block *s, struct inode *dir,
>> if (!dentry)
>> return -ENOMEM;
>>
>> - inode = msharefs_get_inode(s, dir, S_IFREG | files->mode);
>> + inode = msharefs_get_inode(s, dir, S_IFREG | files->mode, false);
>
> I was wondering why the information files were getting their own
> mshare_data.
>
> TBH I'm not really sure what the difference is between mshare_data and
> mshare_info, since those names are not especially distinct.
mshare_data is superset and internal while mshare_info is what is sent back to userspace when it reads a file
representing an mshare region.
>
>> if (!inode) {
>> dput(dentry);
>> return -ENOMEM;
>> @@ -301,7 +335,7 @@ msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
>> sb->s_d_op = &msharefs_d_ops;
>> sb->s_time_gran = 1;
>>
>> - inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777);
>> + inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777, false);
>
> Is it wise to default to world-writable? Surely whatever userspace
> software wraps an msharefs can relax permissions as needed.
>
Since this is for the root inode, the default is so any process can create mshare region in msharefs which I think is
most flexible. Individual userspace app can create mshare regions with any permissions they deem fit using open(). Does
that make sense?
Thanks,
Khalid
Powered by blists - more mailing lists