[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aNVQJqYLX17v-fsf@google.com>
Date: Thu, 25 Sep 2025 07:22:30 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: Shivank Garg <shivankg@....com>
Cc: willy@...radead.org, akpm@...ux-foundation.org, david@...hat.com,
pbonzini@...hat.com, shuah@...nel.org, vbabka@...e.cz, brauner@...nel.org,
viro@...iv.linux.org.uk, dsterba@...e.com, xiang@...nel.org, chao@...nel.org,
jaegeuk@...nel.org, clm@...com, josef@...icpanda.com,
kent.overstreet@...ux.dev, zbestahu@...il.com, jefflexu@...ux.alibaba.com,
dhavale@...gle.com, lihongbo22@...wei.com, lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com, rppt@...nel.org, surenb@...gle.com, mhocko@...e.com,
ziy@...dia.com, matthew.brost@...el.com, joshua.hahnjy@...il.com,
rakie.kim@...com, byungchul@...com, gourry@...rry.net,
ying.huang@...ux.alibaba.com, apopple@...dia.com, tabba@...gle.com,
ackerleytng@...gle.com, paul@...l-moore.com, jmorris@...ei.org,
serge@...lyn.com, pvorel@...e.cz, bfoster@...hat.com, vannapurve@...gle.com,
chao.gao@...el.com, bharata@....com, nikunj@....com, michael.day@....com,
shdhiman@....com, yan.y.zhao@...el.com, Neeraj.Upadhyay@....com,
thomas.lendacky@....com, michael.roth@....com, aik@....com, jgg@...dia.com,
kalyazin@...zon.com, peterx@...hat.com, jack@...e.cz, hch@...radead.org,
cgzones@...glemail.com, ira.weiny@...el.com, rientjes@...gle.com,
roypat@...zon.co.uk, chao.p.peng@...el.com, amit@...radead.org,
ddutile@...hat.com, dan.j.williams@...el.com, ashish.kalra@....com,
gshan@...hat.com, jgowans@...zon.com, pankaj.gupta@....com, papaluri@....com,
yuzhao@...gle.com, suzuki.poulose@....com, quic_eberman@...cinc.com,
linux-bcachefs@...r.kernel.org, linux-btrfs@...r.kernel.org,
linux-erofs@...ts.ozlabs.org, linux-f2fs-devel@...ts.sourceforge.net,
linux-fsdevel@...r.kernel.org, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, linux-security-module@...r.kernel.org,
kvm@...r.kernel.org, linux-kselftest@...r.kernel.org,
linux-coco@...ts.linux.dev
Subject: Re: [PATCH kvm-next V11 6/7] KVM: guest_memfd: Enforce NUMA mempolicy
using shared policy
On Wed, Aug 27, 2025, Shivank Garg wrote:
> @@ -26,6 +28,9 @@ static inline struct kvm_gmem_inode_info *KVM_GMEM_I(struct inode *inode)
> return container_of(inode, struct kvm_gmem_inode_info, vfs_inode);
> }
>
> +static struct mempolicy *kvm_gmem_get_pgoff_policy(struct kvm_gmem_inode_info *info,
> + pgoff_t index);
> +
> /**
> * folio_file_pfn - like folio_file_page, but return a pfn.
> * @folio: The folio which contains this index.
> @@ -112,7 +117,25 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
> static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
> {
> /* TODO: Support huge pages. */
> - return filemap_grab_folio(inode->i_mapping, index);
> + struct mempolicy *policy;
> + struct folio *folio;
> +
> + /*
> + * Fast-path: See if folio is already present in mapping to avoid
> + * policy_lookup.
> + */
> + folio = __filemap_get_folio(inode->i_mapping, index,
> + FGP_LOCK | FGP_ACCESSED, 0);
> + if (!IS_ERR(folio))
> + return folio;
> +
> + policy = kvm_gmem_get_pgoff_policy(KVM_GMEM_I(inode), index);
> + folio = __filemap_get_folio_mpol(inode->i_mapping, index,
> + FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
> + mapping_gfp_mask(inode->i_mapping), policy);
> + mpol_cond_put(policy);
> +
> + return folio;
> }
>
> static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
> @@ -372,8 +395,45 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
> return ret;
> }
>
> +#ifdef CONFIG_NUMA
> +static int kvm_gmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
> +{
> + struct inode *inode = file_inode(vma->vm_file);
> +
> + return mpol_set_shared_policy(&KVM_GMEM_I(inode)->policy, vma, mpol);
> +}
> +
> +static struct mempolicy *kvm_gmem_get_policy(struct vm_area_struct *vma,
> + unsigned long addr, pgoff_t *pgoff)
> +{
> + struct inode *inode = file_inode(vma->vm_file);
> +
> + *pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
> + return mpol_shared_policy_lookup(&KVM_GMEM_I(inode)->policy, *pgoff);
> +}
> +
> +static struct mempolicy *kvm_gmem_get_pgoff_policy(struct kvm_gmem_inode_info *info,
> + pgoff_t index)
I keep reading this is "page offset policy", as opposed to "policy given a page
offset". Another oddity that is confusing is that this helper explicitly does
get_task_policy(current), while kvm_gmem_get_policy() lets the caller do that.
The end result is the same, but I think it would be helpful for gmem to be
internally consistent.
If we have kvm_gmem_get_policy() use this helper, then we can kill two birds with
one stone:
static struct mempolicy *__kvm_gmem_get_policy(struct gmem_inode *gi,
pgoff_t index)
{
struct mempolicy *mpol;
mpol = mpol_shared_policy_lookup(&gi->policy, index);
return mpol ? mpol : get_task_policy(current);
}
static struct mempolicy *kvm_gmem_get_policy(struct vm_area_struct *vma,
unsigned long addr, pgoff_t *pgoff)
{
*pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
return __kvm_gmem_get_policy(GMEM_I(file_inode(vma->vm_file)), *pgoff);
}
Powered by blists - more mailing lists