lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <dc6eb85f-87b6-43a1-b1f7-4727c0b834cc@amd.com>
Date: Thu, 25 Sep 2025 17:14:15 +0530
From: "Garg, Shivank" <shivankg@....com>
To: Sean Christopherson <seanjc@...gle.com>,
 Ackerley Tng <ackerleytng@...gle.com>
Cc: willy@...radead.org, akpm@...ux-foundation.org, david@...hat.com,
 pbonzini@...hat.com, shuah@...nel.org, vbabka@...e.cz, brauner@...nel.org,
 viro@...iv.linux.org.uk, dsterba@...e.com, xiang@...nel.org,
 chao@...nel.org, jaegeuk@...nel.org, clm@...com, josef@...icpanda.com,
 kent.overstreet@...ux.dev, zbestahu@...il.com, jefflexu@...ux.alibaba.com,
 dhavale@...gle.com, lihongbo22@...wei.com, lorenzo.stoakes@...cle.com,
 Liam.Howlett@...cle.com, rppt@...nel.org, surenb@...gle.com,
 mhocko@...e.com, ziy@...dia.com, matthew.brost@...el.com,
 joshua.hahnjy@...il.com, rakie.kim@...com, byungchul@...com,
 gourry@...rry.net, ying.huang@...ux.alibaba.com, apopple@...dia.com,
 tabba@...gle.com, paul@...l-moore.com, jmorris@...ei.org, serge@...lyn.com,
 pvorel@...e.cz, bfoster@...hat.com, vannapurve@...gle.com,
 chao.gao@...el.com, bharata@....com, nikunj@....com, michael.day@....com,
 shdhiman@....com, yan.y.zhao@...el.com, Neeraj.Upadhyay@....com,
 thomas.lendacky@....com, michael.roth@....com, aik@....com, jgg@...dia.com,
 kalyazin@...zon.com, peterx@...hat.com, jack@...e.cz, hch@...radead.org,
 cgzones@...glemail.com, ira.weiny@...el.com, rientjes@...gle.com,
 roypat@...zon.co.uk, chao.p.peng@...el.com, amit@...radead.org,
 ddutile@...hat.com, dan.j.williams@...el.com, ashish.kalra@....com,
 gshan@...hat.com, jgowans@...zon.com, pankaj.gupta@....com,
 papaluri@....com, yuzhao@...gle.com, suzuki.poulose@....com,
 quic_eberman@...cinc.com, linux-bcachefs@...r.kernel.org,
 linux-btrfs@...r.kernel.org, linux-erofs@...ts.ozlabs.org,
 linux-f2fs-devel@...ts.sourceforge.net, linux-fsdevel@...r.kernel.org,
 linux-mm@...ck.org, linux-kernel@...r.kernel.org,
 linux-security-module@...r.kernel.org, kvm@...r.kernel.org,
 linux-kselftest@...r.kernel.org, linux-coco@...ts.linux.dev
Subject: Re: [PATCH kvm-next V11 4/7] KVM: guest_memfd: Use guest mem inodes
 instead of anonymous inodes



On 9/25/2025 8:20 AM, Sean Christopherson wrote:
> My apologies for the super late feedback.  None of this is critical (mechanical
> things that can be cleaned up after the fact), so if there's any urgency to
> getting this series into 6.18, just ignore it.
> 
> On Wed, Aug 27, 2025, Ackerley Tng wrote:
>> Shivank Garg <shivankg@....com> writes:
>> @@ -463,11 +502,70 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
>>  	return true;
>>  }
>>
>> +static struct inode *kvm_gmem_inode_create(const char *name, loff_t size,
>> +					   u64 flags)
>> +{
>> +	struct inode *inode;
>> +
>> +	inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
>> +	if (IS_ERR(inode))
>> +		return inode;
>> +
>> +	inode->i_private = (void *)(unsigned long)flags;
>> +	inode->i_op = &kvm_gmem_iops;
>> +	inode->i_mapping->a_ops = &kvm_gmem_aops;
>> +	inode->i_mode |= S_IFREG;
>> +	inode->i_size = size;
>> +	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> +	mapping_set_inaccessible(inode->i_mapping);
>> +	/* Unmovable mappings are supposed to be marked unevictable as well. */
>> +	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> +
>> +	return inode;
>> +}
>> +
>> +static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size,
>> +						  u64 flags)
>> +{
>> +	static const char *name = "[kvm-gmem]";
>> +	struct inode *inode;
>> +	struct file *file;
>> +	int err;
>> +
>> +	err = -ENOENT;
>> +	/* __fput() will take care of fops_put(). */
>> +	if (!fops_get(&kvm_gmem_fops))
>> +		goto err;
>> +
>> +	inode = kvm_gmem_inode_create(name, size, flags);
>> +	if (IS_ERR(inode)) {
>> +		err = PTR_ERR(inode);
>> +		goto err_fops_put;
>> +	}
>> +
>> +	file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR,
>> +				 &kvm_gmem_fops);
>> +	if (IS_ERR(file)) {
>> +		err = PTR_ERR(file);
>> +		goto err_put_inode;
>> +	}
>> +
>> +	file->f_flags |= O_LARGEFILE;
>> +	file->private_data = priv;
>> +
>> +	return file;
>> +
>> +err_put_inode:
>> +	iput(inode);
>> +err_fops_put:
>> +	fops_put(&kvm_gmem_fops);
>> +err:
>> +	return ERR_PTR(err);
>> +}
> 
> I don't see any reason to add two helpers.  It requires quite a bit more lines
> of code due to adding more error paths and local variables, and IMO doesn't make
> the code any easier to read.
> 
> Passing in "gmem" as @priv is especially ridiculous, as it adds code and
> obfuscates what file->private_data is set to.
> 
> I get the sense that the code was written to be a "replacement" for common APIs,
> but that is nonsensical (no pun intended).
> 
>>  static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>>  {
>> -	const char *anon_name = "[kvm-gmem]";
>>  	struct kvm_gmem *gmem;
>> -	struct inode *inode;
>>  	struct file *file;
>>  	int fd, err;
>>
>> @@ -481,32 +579,16 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>>  		goto err_fd;
>>  	}
>>
>> -	file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
>> -					 O_RDWR, NULL);
>> +	file = kvm_gmem_inode_create_getfile(gmem, size, flags);
>>  	if (IS_ERR(file)) {
>>  		err = PTR_ERR(file);
>>  		goto err_gmem;
>>  	}
>>
>> -	file->f_flags |= O_LARGEFILE;
>> -
>> -	inode = file->f_inode;
>> -	WARN_ON(file->f_mapping != inode->i_mapping);
>> -
>> -	inode->i_private = (void *)(unsigned long)flags;
>> -	inode->i_op = &kvm_gmem_iops;
>> -	inode->i_mapping->a_ops = &kvm_gmem_aops;
>> -	inode->i_mode |= S_IFREG;
>> -	inode->i_size = size;
>> -	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> -	mapping_set_inaccessible(inode->i_mapping);
>> -	/* Unmovable mappings are supposed to be marked unevictable as well. */
>> -	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> -
>>  	kvm_get_kvm(kvm);
>>  	gmem->kvm = kvm;
>>  	xa_init(&gmem->bindings);
>> -	list_add(&gmem->entry, &inode->i_mapping->i_private_list);
>> +	list_add(&gmem->entry, &file_inode(file)->i_mapping->i_private_list);
> 
> I don't understand this change?  Isn't file_inode(file) == inode?
> 
> Compile tested only, and again not critical, but it's -40 LoC...
> 
> 

Thanks.
I did functional testing and it works fine.


> ---
>  include/uapi/linux/magic.h |  1 +
>  virt/kvm/guest_memfd.c     | 75 ++++++++++++++++++++++++++++++++------
>  virt/kvm/kvm_main.c        |  7 +++-
>  virt/kvm/kvm_mm.h          |  9 +++--
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> index bb575f3ab45e..638ca21b7a90 100644
> --- a/include/uapi/linux/magic.h
> +++ b/include/uapi/linux/magic.h
> @@ -103,5 +103,6 @@
>  #define DEVMEM_MAGIC		0x454d444d	/* "DMEM" */
>  #define SECRETMEM_MAGIC		0x5345434d	/* "SECM" */
>  #define PID_FS_MAGIC		0x50494446	/* "PIDF" */
> +#define GUEST_MEMFD_MAGIC	0x474d454d	/* "GMEM" */
>  
>  #endif /* __LINUX_MAGIC_H__ */
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 08a6bc7d25b6..73c9791879d5 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -1,12 +1,16 @@
>  // SPDX-License-Identifier: GPL-2.0
> +#include <linux/anon_inodes.h>
>  #include <linux/backing-dev.h>
>  #include <linux/falloc.h>
> +#include <linux/fs.h>
>  #include <linux/kvm_host.h>
> +#include <linux/pseudo_fs.h>
>  #include <linux/pagemap.h>
> -#include <linux/anon_inodes.h>
>  
>  #include "kvm_mm.h"
>  
> +static struct vfsmount *kvm_gmem_mnt;
> +
>  struct kvm_gmem {
>  	struct kvm *kvm;
>  	struct xarray bindings;
> @@ -385,9 +389,45 @@ static struct file_operations kvm_gmem_fops = {
>  	.fallocate	= kvm_gmem_fallocate,
>  };
>  
> -void kvm_gmem_init(struct module *module)
> +static int kvm_gmem_init_fs_context(struct fs_context *fc)
> +{
> +	if (!init_pseudo(fc, GUEST_MEMFD_MAGIC))
> +		return -ENOMEM;
> +
> +	fc->s_iflags |= SB_I_NOEXEC;
> +	fc->s_iflags |= SB_I_NODEV;
> +
> +	return 0;
> +}
> +
> +static struct file_system_type kvm_gmem_fs = {
> +	.name		 = "guest_memfd",
> +	.init_fs_context = kvm_gmem_init_fs_context,
> +	.kill_sb	 = kill_anon_super,
> +};
> +
> +static int kvm_gmem_init_mount(void)
> +{
> +	kvm_gmem_mnt = kern_mount(&kvm_gmem_fs);
> +
> +	if (IS_ERR(kvm_gmem_mnt))
> +		return PTR_ERR(kvm_gmem_mnt);
> +
> +	kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC;
> +	return 0;
> +}
> +
> +int kvm_gmem_init(struct module *module)
>  {
>  	kvm_gmem_fops.owner = module;
> +
> +	return kvm_gmem_init_mount();
> +}
> +
> +void kvm_gmem_exit(void)
> +{
> +	kern_unmount(kvm_gmem_mnt);
> +	kvm_gmem_mnt = NULL;
>  }
>  
>  static int kvm_gmem_migrate_folio(struct address_space *mapping,
> @@ -465,7 +505,7 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
>  
>  static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  {
> -	const char *anon_name = "[kvm-gmem]";
> +	static const char *name = "[kvm-gmem]";
>  	struct kvm_gmem *gmem;
>  	struct inode *inode;
>  	struct file *file;
> @@ -481,17 +521,17 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  		goto err_fd;
>  	}
>  
> -	file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
> -					 O_RDWR, NULL);
> -	if (IS_ERR(file)) {
> -		err = PTR_ERR(file);
> +	/* __fput() will take care of fops_put(). */
> +	if (!fops_get(&kvm_gmem_fops)) {
> +		err = -ENOENT;
>  		goto err_gmem;
>  	}
>  
> -	file->f_flags |= O_LARGEFILE;
> -
> -	inode = file->f_inode;
> -	WARN_ON(file->f_mapping != inode->i_mapping);
> +	inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
> +	if (IS_ERR(inode)) {
> +		err = PTR_ERR(inode);
> +		goto err_fops;
> +	}
>  
>  	inode->i_private = (void *)(unsigned long)flags;
>  	inode->i_op = &kvm_gmem_iops;
> @@ -503,6 +543,15 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  	/* Unmovable mappings are supposed to be marked unevictable as well. */
>  	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>  
> +	file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, &kvm_gmem_fops);
> +	if (IS_ERR(file)) {
> +		err = PTR_ERR(file);
> +		goto err_inode;
> +	}
> +
> +	file->f_flags |= O_LARGEFILE;
> +	file->private_data = gmem;
> +
>  	kvm_get_kvm(kvm);
>  	gmem->kvm = kvm;
>  	xa_init(&gmem->bindings);
> @@ -511,6 +560,10 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  	fd_install(fd, file);
>  	return fd;
>  
> +err_inode:
> +	iput(inode);
> +err_fops:
> +	fops_put(&kvm_gmem_fops);
>  err_gmem:
>  	kfree(gmem);
>  err_fd:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 18f29ef93543..301d48d6e00d 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -6489,7 +6489,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
>  	if (WARN_ON_ONCE(r))
>  		goto err_vfio;
>  
> -	kvm_gmem_init(module);
> +	r = kvm_gmem_init(module);
> +	if (r)
> +		goto err_gmem;
>  
>  	r = kvm_init_virtualization();
>  	if (r)
> @@ -6510,6 +6512,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
>  err_register:
>  	kvm_uninit_virtualization();
>  err_virt:
> +	kvm_gmem_exit();
> +err_gmem:
>  	kvm_vfio_ops_exit();
>  err_vfio:
>  	kvm_async_pf_deinit();
> @@ -6541,6 +6545,7 @@ void kvm_exit(void)
>  	for_each_possible_cpu(cpu)
>  		free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
>  	kmem_cache_destroy(kvm_vcpu_cache);
> +	kvm_gmem_exit();
>  	kvm_vfio_ops_exit();
>  	kvm_async_pf_deinit();
>  	kvm_irqfd_exit();
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index 31defb08ccba..9fcc5d5b7f8d 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -68,17 +68,18 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
>  #endif /* HAVE_KVM_PFNCACHE */
>  
>  #ifdef CONFIG_KVM_GUEST_MEMFD
> -void kvm_gmem_init(struct module *module);
> +int kvm_gmem_init(struct module *module);
> +void kvm_gmem_exit(void);
>  int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
>  int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
>  		  unsigned int fd, loff_t offset);
>  void kvm_gmem_unbind(struct kvm_memory_slot *slot);
>  #else
> -static inline void kvm_gmem_init(struct module *module)
> +static inline int kvm_gmem_init(struct module *module)
>  {
> -
> +	return 0;
>  }
> -
> +static inline void kvm_gmem_exit(void) {};
>  static inline int kvm_gmem_bind(struct kvm *kvm,
>  					 struct kvm_memory_slot *slot,
>  					 unsigned int fd, loff_t offset)
> 
> base-commit: d133892dddd6607de651b7e32510359a6af97c4c
> --

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ