[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <dc6eb85f-87b6-43a1-b1f7-4727c0b834cc@amd.com>
Date: Thu, 25 Sep 2025 17:14:15 +0530
From: "Garg, Shivank" <shivankg@....com>
To: Sean Christopherson <seanjc@...gle.com>,
Ackerley Tng <ackerleytng@...gle.com>
Cc: willy@...radead.org, akpm@...ux-foundation.org, david@...hat.com,
pbonzini@...hat.com, shuah@...nel.org, vbabka@...e.cz, brauner@...nel.org,
viro@...iv.linux.org.uk, dsterba@...e.com, xiang@...nel.org,
chao@...nel.org, jaegeuk@...nel.org, clm@...com, josef@...icpanda.com,
kent.overstreet@...ux.dev, zbestahu@...il.com, jefflexu@...ux.alibaba.com,
dhavale@...gle.com, lihongbo22@...wei.com, lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com, rppt@...nel.org, surenb@...gle.com,
mhocko@...e.com, ziy@...dia.com, matthew.brost@...el.com,
joshua.hahnjy@...il.com, rakie.kim@...com, byungchul@...com,
gourry@...rry.net, ying.huang@...ux.alibaba.com, apopple@...dia.com,
tabba@...gle.com, paul@...l-moore.com, jmorris@...ei.org, serge@...lyn.com,
pvorel@...e.cz, bfoster@...hat.com, vannapurve@...gle.com,
chao.gao@...el.com, bharata@....com, nikunj@....com, michael.day@....com,
shdhiman@....com, yan.y.zhao@...el.com, Neeraj.Upadhyay@....com,
thomas.lendacky@....com, michael.roth@....com, aik@....com, jgg@...dia.com,
kalyazin@...zon.com, peterx@...hat.com, jack@...e.cz, hch@...radead.org,
cgzones@...glemail.com, ira.weiny@...el.com, rientjes@...gle.com,
roypat@...zon.co.uk, chao.p.peng@...el.com, amit@...radead.org,
ddutile@...hat.com, dan.j.williams@...el.com, ashish.kalra@....com,
gshan@...hat.com, jgowans@...zon.com, pankaj.gupta@....com,
papaluri@....com, yuzhao@...gle.com, suzuki.poulose@....com,
quic_eberman@...cinc.com, linux-bcachefs@...r.kernel.org,
linux-btrfs@...r.kernel.org, linux-erofs@...ts.ozlabs.org,
linux-f2fs-devel@...ts.sourceforge.net, linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org, linux-kernel@...r.kernel.org,
linux-security-module@...r.kernel.org, kvm@...r.kernel.org,
linux-kselftest@...r.kernel.org, linux-coco@...ts.linux.dev
Subject: Re: [PATCH kvm-next V11 4/7] KVM: guest_memfd: Use guest mem inodes
instead of anonymous inodes
On 9/25/2025 8:20 AM, Sean Christopherson wrote:
> My apologies for the super late feedback. None of this is critical (mechanical
> things that can be cleaned up after the fact), so if there's any urgency to
> getting this series into 6.18, just ignore it.
>
> On Wed, Aug 27, 2025, Ackerley Tng wrote:
>> Shivank Garg <shivankg@....com> writes:
>> @@ -463,11 +502,70 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
>> return true;
>> }
>>
>> +static struct inode *kvm_gmem_inode_create(const char *name, loff_t size,
>> + u64 flags)
>> +{
>> + struct inode *inode;
>> +
>> + inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
>> + if (IS_ERR(inode))
>> + return inode;
>> +
>> + inode->i_private = (void *)(unsigned long)flags;
>> + inode->i_op = &kvm_gmem_iops;
>> + inode->i_mapping->a_ops = &kvm_gmem_aops;
>> + inode->i_mode |= S_IFREG;
>> + inode->i_size = size;
>> + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> + mapping_set_inaccessible(inode->i_mapping);
>> + /* Unmovable mappings are supposed to be marked unevictable as well. */
>> + WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> +
>> + return inode;
>> +}
>> +
>> +static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size,
>> + u64 flags)
>> +{
>> + static const char *name = "[kvm-gmem]";
>> + struct inode *inode;
>> + struct file *file;
>> + int err;
>> +
>> + err = -ENOENT;
>> + /* __fput() will take care of fops_put(). */
>> + if (!fops_get(&kvm_gmem_fops))
>> + goto err;
>> +
>> + inode = kvm_gmem_inode_create(name, size, flags);
>> + if (IS_ERR(inode)) {
>> + err = PTR_ERR(inode);
>> + goto err_fops_put;
>> + }
>> +
>> + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR,
>> + &kvm_gmem_fops);
>> + if (IS_ERR(file)) {
>> + err = PTR_ERR(file);
>> + goto err_put_inode;
>> + }
>> +
>> + file->f_flags |= O_LARGEFILE;
>> + file->private_data = priv;
>> +
>> + return file;
>> +
>> +err_put_inode:
>> + iput(inode);
>> +err_fops_put:
>> + fops_put(&kvm_gmem_fops);
>> +err:
>> + return ERR_PTR(err);
>> +}
>
> I don't see any reason to add two helpers. It requires quite a bit more lines
> of code due to adding more error paths and local variables, and IMO doesn't make
> the code any easier to read.
>
> Passing in "gmem" as @priv is especially ridiculous, as it adds code and
> obfuscates what file->private_data is set to.
>
> I get the sense that the code was written to be a "replacement" for common APIs,
> but that is nonsensical (no pun intended).
>
>> static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>> {
>> - const char *anon_name = "[kvm-gmem]";
>> struct kvm_gmem *gmem;
>> - struct inode *inode;
>> struct file *file;
>> int fd, err;
>>
>> @@ -481,32 +579,16 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>> goto err_fd;
>> }
>>
>> - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
>> - O_RDWR, NULL);
>> + file = kvm_gmem_inode_create_getfile(gmem, size, flags);
>> if (IS_ERR(file)) {
>> err = PTR_ERR(file);
>> goto err_gmem;
>> }
>>
>> - file->f_flags |= O_LARGEFILE;
>> -
>> - inode = file->f_inode;
>> - WARN_ON(file->f_mapping != inode->i_mapping);
>> -
>> - inode->i_private = (void *)(unsigned long)flags;
>> - inode->i_op = &kvm_gmem_iops;
>> - inode->i_mapping->a_ops = &kvm_gmem_aops;
>> - inode->i_mode |= S_IFREG;
>> - inode->i_size = size;
>> - mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> - mapping_set_inaccessible(inode->i_mapping);
>> - /* Unmovable mappings are supposed to be marked unevictable as well. */
>> - WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> -
>> kvm_get_kvm(kvm);
>> gmem->kvm = kvm;
>> xa_init(&gmem->bindings);
>> - list_add(&gmem->entry, &inode->i_mapping->i_private_list);
>> + list_add(&gmem->entry, &file_inode(file)->i_mapping->i_private_list);
>
> I don't understand this change? Isn't file_inode(file) == inode?
>
> Compile tested only, and again not critical, but it's -40 LoC...
>
>
Thanks.
I did functional testing and it works fine.
> ---
> include/uapi/linux/magic.h | 1 +
> virt/kvm/guest_memfd.c | 75 ++++++++++++++++++++++++++++++++------
> virt/kvm/kvm_main.c | 7 +++-
> virt/kvm/kvm_mm.h | 9 +++--
> 4 files changed, 76 insertions(+), 16 deletions(-)
>
> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> index bb575f3ab45e..638ca21b7a90 100644
> --- a/include/uapi/linux/magic.h
> +++ b/include/uapi/linux/magic.h
> @@ -103,5 +103,6 @@
> #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
> #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
> #define PID_FS_MAGIC 0x50494446 /* "PIDF" */
> +#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */
>
> #endif /* __LINUX_MAGIC_H__ */
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 08a6bc7d25b6..73c9791879d5 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -1,12 +1,16 @@
> // SPDX-License-Identifier: GPL-2.0
> +#include <linux/anon_inodes.h>
> #include <linux/backing-dev.h>
> #include <linux/falloc.h>
> +#include <linux/fs.h>
> #include <linux/kvm_host.h>
> +#include <linux/pseudo_fs.h>
> #include <linux/pagemap.h>
> -#include <linux/anon_inodes.h>
>
> #include "kvm_mm.h"
>
> +static struct vfsmount *kvm_gmem_mnt;
> +
> struct kvm_gmem {
> struct kvm *kvm;
> struct xarray bindings;
> @@ -385,9 +389,45 @@ static struct file_operations kvm_gmem_fops = {
> .fallocate = kvm_gmem_fallocate,
> };
>
> -void kvm_gmem_init(struct module *module)
> +static int kvm_gmem_init_fs_context(struct fs_context *fc)
> +{
> + if (!init_pseudo(fc, GUEST_MEMFD_MAGIC))
> + return -ENOMEM;
> +
> + fc->s_iflags |= SB_I_NOEXEC;
> + fc->s_iflags |= SB_I_NODEV;
> +
> + return 0;
> +}
> +
> +static struct file_system_type kvm_gmem_fs = {
> + .name = "guest_memfd",
> + .init_fs_context = kvm_gmem_init_fs_context,
> + .kill_sb = kill_anon_super,
> +};
> +
> +static int kvm_gmem_init_mount(void)
> +{
> + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs);
> +
> + if (IS_ERR(kvm_gmem_mnt))
> + return PTR_ERR(kvm_gmem_mnt);
> +
> + kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC;
> + return 0;
> +}
> +
> +int kvm_gmem_init(struct module *module)
> {
> kvm_gmem_fops.owner = module;
> +
> + return kvm_gmem_init_mount();
> +}
> +
> +void kvm_gmem_exit(void)
> +{
> + kern_unmount(kvm_gmem_mnt);
> + kvm_gmem_mnt = NULL;
> }
>
> static int kvm_gmem_migrate_folio(struct address_space *mapping,
> @@ -465,7 +505,7 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
>
> static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> {
> - const char *anon_name = "[kvm-gmem]";
> + static const char *name = "[kvm-gmem]";
> struct kvm_gmem *gmem;
> struct inode *inode;
> struct file *file;
> @@ -481,17 +521,17 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> goto err_fd;
> }
>
> - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
> - O_RDWR, NULL);
> - if (IS_ERR(file)) {
> - err = PTR_ERR(file);
> + /* __fput() will take care of fops_put(). */
> + if (!fops_get(&kvm_gmem_fops)) {
> + err = -ENOENT;
> goto err_gmem;
> }
>
> - file->f_flags |= O_LARGEFILE;
> -
> - inode = file->f_inode;
> - WARN_ON(file->f_mapping != inode->i_mapping);
> + inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
> + if (IS_ERR(inode)) {
> + err = PTR_ERR(inode);
> + goto err_fops;
> + }
>
> inode->i_private = (void *)(unsigned long)flags;
> inode->i_op = &kvm_gmem_iops;
> @@ -503,6 +543,15 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> /* Unmovable mappings are supposed to be marked unevictable as well. */
> WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>
> + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, &kvm_gmem_fops);
> + if (IS_ERR(file)) {
> + err = PTR_ERR(file);
> + goto err_inode;
> + }
> +
> + file->f_flags |= O_LARGEFILE;
> + file->private_data = gmem;
> +
> kvm_get_kvm(kvm);
> gmem->kvm = kvm;
> xa_init(&gmem->bindings);
> @@ -511,6 +560,10 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> fd_install(fd, file);
> return fd;
>
> +err_inode:
> + iput(inode);
> +err_fops:
> + fops_put(&kvm_gmem_fops);
> err_gmem:
> kfree(gmem);
> err_fd:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 18f29ef93543..301d48d6e00d 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -6489,7 +6489,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
> if (WARN_ON_ONCE(r))
> goto err_vfio;
>
> - kvm_gmem_init(module);
> + r = kvm_gmem_init(module);
> + if (r)
> + goto err_gmem;
>
> r = kvm_init_virtualization();
> if (r)
> @@ -6510,6 +6512,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
> err_register:
> kvm_uninit_virtualization();
> err_virt:
> + kvm_gmem_exit();
> +err_gmem:
> kvm_vfio_ops_exit();
> err_vfio:
> kvm_async_pf_deinit();
> @@ -6541,6 +6545,7 @@ void kvm_exit(void)
> for_each_possible_cpu(cpu)
> free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
> kmem_cache_destroy(kvm_vcpu_cache);
> + kvm_gmem_exit();
> kvm_vfio_ops_exit();
> kvm_async_pf_deinit();
> kvm_irqfd_exit();
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index 31defb08ccba..9fcc5d5b7f8d 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -68,17 +68,18 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
> #endif /* HAVE_KVM_PFNCACHE */
>
> #ifdef CONFIG_KVM_GUEST_MEMFD
> -void kvm_gmem_init(struct module *module);
> +int kvm_gmem_init(struct module *module);
> +void kvm_gmem_exit(void);
> int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
> int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
> unsigned int fd, loff_t offset);
> void kvm_gmem_unbind(struct kvm_memory_slot *slot);
> #else
> -static inline void kvm_gmem_init(struct module *module)
> +static inline int kvm_gmem_init(struct module *module)
> {
> -
> + return 0;
> }
> -
> +static inline void kvm_gmem_exit(void) {};
> static inline int kvm_gmem_bind(struct kvm *kvm,
> struct kvm_memory_slot *slot,
> unsigned int fd, loff_t offset)
>
> base-commit: d133892dddd6607de651b7e32510359a6af97c4c
> --
Powered by blists - more mailing lists