linux-kernel - Re: [PATCH kvm-next V11 6/7] KVM: guest_memfd: Enforce NUMA mempolicy using shared policy

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <aNVQJqYLX17v-fsf@google.com>
Date: Thu, 25 Sep 2025 07:22:30 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: Shivank Garg <shivankg@....com>
Cc: willy@...radead.org, akpm@...ux-foundation.org, david@...hat.com, 
	pbonzini@...hat.com, shuah@...nel.org, vbabka@...e.cz, brauner@...nel.org, 
	viro@...iv.linux.org.uk, dsterba@...e.com, xiang@...nel.org, chao@...nel.org, 
	jaegeuk@...nel.org, clm@...com, josef@...icpanda.com, 
	kent.overstreet@...ux.dev, zbestahu@...il.com, jefflexu@...ux.alibaba.com, 
	dhavale@...gle.com, lihongbo22@...wei.com, lorenzo.stoakes@...cle.com, 
	Liam.Howlett@...cle.com, rppt@...nel.org, surenb@...gle.com, mhocko@...e.com, 
	ziy@...dia.com, matthew.brost@...el.com, joshua.hahnjy@...il.com, 
	rakie.kim@...com, byungchul@...com, gourry@...rry.net, 
	ying.huang@...ux.alibaba.com, apopple@...dia.com, tabba@...gle.com, 
	ackerleytng@...gle.com, paul@...l-moore.com, jmorris@...ei.org, 
	serge@...lyn.com, pvorel@...e.cz, bfoster@...hat.com, vannapurve@...gle.com, 
	chao.gao@...el.com, bharata@....com, nikunj@....com, michael.day@....com, 
	shdhiman@....com, yan.y.zhao@...el.com, Neeraj.Upadhyay@....com, 
	thomas.lendacky@....com, michael.roth@....com, aik@....com, jgg@...dia.com, 
	kalyazin@...zon.com, peterx@...hat.com, jack@...e.cz, hch@...radead.org, 
	cgzones@...glemail.com, ira.weiny@...el.com, rientjes@...gle.com, 
	roypat@...zon.co.uk, chao.p.peng@...el.com, amit@...radead.org, 
	ddutile@...hat.com, dan.j.williams@...el.com, ashish.kalra@....com, 
	gshan@...hat.com, jgowans@...zon.com, pankaj.gupta@....com, papaluri@....com, 
	yuzhao@...gle.com, suzuki.poulose@....com, quic_eberman@...cinc.com, 
	linux-bcachefs@...r.kernel.org, linux-btrfs@...r.kernel.org, 
	linux-erofs@...ts.ozlabs.org, linux-f2fs-devel@...ts.sourceforge.net, 
	linux-fsdevel@...r.kernel.org, linux-mm@...ck.org, 
	linux-kernel@...r.kernel.org, linux-security-module@...r.kernel.org, 
	kvm@...r.kernel.org, linux-kselftest@...r.kernel.org, 
	linux-coco@...ts.linux.dev
Subject: Re: [PATCH kvm-next V11 6/7] KVM: guest_memfd: Enforce NUMA mempolicy
 using shared policy

On Wed, Aug 27, 2025, Shivank Garg wrote:
> @@ -26,6 +28,9 @@ static inline struct kvm_gmem_inode_info *KVM_GMEM_I(struct inode *inode)
>  	return container_of(inode, struct kvm_gmem_inode_info, vfs_inode);
>  }
>  
> +static struct mempolicy *kvm_gmem_get_pgoff_policy(struct kvm_gmem_inode_info *info,
> +						   pgoff_t index);
> +
>  /**
>   * folio_file_pfn - like folio_file_page, but return a pfn.
>   * @folio: The folio which contains this index.
> @@ -112,7 +117,25 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
>  static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
>  {
>  	/* TODO: Support huge pages. */
> -	return filemap_grab_folio(inode->i_mapping, index);
> +	struct mempolicy *policy;
> +	struct folio *folio;
> +
> +	/*
> +	 * Fast-path: See if folio is already present in mapping to avoid
> +	 * policy_lookup.
> +	 */
> +	folio = __filemap_get_folio(inode->i_mapping, index,
> +				    FGP_LOCK | FGP_ACCESSED, 0);
> +	if (!IS_ERR(folio))
> +		return folio;
> +
> +	policy = kvm_gmem_get_pgoff_policy(KVM_GMEM_I(inode), index);
> +	folio = __filemap_get_folio_mpol(inode->i_mapping, index,
> +					 FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
> +					 mapping_gfp_mask(inode->i_mapping), policy);
> +	mpol_cond_put(policy);
> +
> +	return folio;
>  }
>  
>  static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
> @@ -372,8 +395,45 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
>  	return ret;
>  }
>  
> +#ifdef CONFIG_NUMA
> +static int kvm_gmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
> +{
> +	struct inode *inode = file_inode(vma->vm_file);
> +
> +	return mpol_set_shared_policy(&KVM_GMEM_I(inode)->policy, vma, mpol);
> +}
> +
> +static struct mempolicy *kvm_gmem_get_policy(struct vm_area_struct *vma,
> +					     unsigned long addr, pgoff_t *pgoff)
> +{
> +	struct inode *inode = file_inode(vma->vm_file);
> +
> +	*pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
> +	return mpol_shared_policy_lookup(&KVM_GMEM_I(inode)->policy, *pgoff);
> +}
> +
> +static struct mempolicy *kvm_gmem_get_pgoff_policy(struct kvm_gmem_inode_info *info,
> +						   pgoff_t index)

I keep reading this is "page offset policy", as opposed to "policy given a page
offset".  Another oddity that is confusing is that this helper explicitly does
get_task_policy(current), while kvm_gmem_get_policy() lets the caller do that.
The end result is the same, but I think it would be helpful for gmem to be
internally consistent.

If we have kvm_gmem_get_policy() use this helper, then we can kill two birds with
one stone:

static struct mempolicy *__kvm_gmem_get_policy(struct gmem_inode *gi,
					       pgoff_t index)
{
	struct mempolicy *mpol;

	mpol = mpol_shared_policy_lookup(&gi->policy, index);
	return mpol ? mpol : get_task_policy(current);
}

static struct mempolicy *kvm_gmem_get_policy(struct vm_area_struct *vma,
					     unsigned long addr, pgoff_t *pgoff)
{
	*pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);

	return __kvm_gmem_get_policy(GMEM_I(file_inode(vma->vm_file)), *pgoff);
}