[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b66c38ba-ca16-44c5-b498-7c8eb533d805@linux.intel.com>
Date: Wed, 28 May 2025 11:16:15 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: Ackerley Tng <ackerleytng@...gle.com>
Cc: kvm@...r.kernel.org, linux-mm@...ck.org, linux-kernel@...r.kernel.org,
x86@...nel.org, linux-fsdevel@...r.kernel.org, aik@....com,
ajones@...tanamicro.com, akpm@...ux-foundation.org, amoorthy@...gle.com,
anthony.yznaga@...cle.com, anup@...infault.org, aou@...s.berkeley.edu,
bfoster@...hat.com, brauner@...nel.org, catalin.marinas@....com,
chao.p.peng@...el.com, chenhuacai@...nel.org, dave.hansen@...el.com,
david@...hat.com, dmatlack@...gle.com, dwmw@...zon.co.uk,
erdemaktas@...gle.com, fan.du@...el.com, fvdl@...gle.com, graf@...zon.com,
haibo1.xu@...el.com, hch@...radead.org, hughd@...gle.com,
ira.weiny@...el.com, isaku.yamahata@...el.com, jack@...e.cz,
james.morse@....com, jarkko@...nel.org, jgg@...pe.ca, jgowans@...zon.com,
jhubbard@...dia.com, jroedel@...e.de, jthoughton@...gle.com,
jun.miao@...el.com, kai.huang@...el.com, keirf@...gle.com,
kent.overstreet@...ux.dev, kirill.shutemov@...el.com,
liam.merwick@...cle.com, maciej.wieczor-retman@...el.com,
mail@...iej.szmigiero.name, maz@...nel.org, mic@...ikod.net,
michael.roth@....com, mpe@...erman.id.au, muchun.song@...ux.dev,
nikunj@....com, nsaenz@...zon.es, oliver.upton@...ux.dev,
palmer@...belt.com, pankaj.gupta@....com, paul.walmsley@...ive.com,
pbonzini@...hat.com, pdurrant@...zon.co.uk, peterx@...hat.com,
pgonda@...gle.com, pvorel@...e.cz, qperret@...gle.com,
quic_cvanscha@...cinc.com, quic_eberman@...cinc.com,
quic_mnalajal@...cinc.com, quic_pderrin@...cinc.com,
quic_pheragu@...cinc.com, quic_svaddagi@...cinc.com, quic_tsoni@...cinc.com,
richard.weiyang@...il.com, rick.p.edgecombe@...el.com, rientjes@...gle.com,
roypat@...zon.co.uk, rppt@...nel.org, seanjc@...gle.com, shuah@...nel.org,
steven.price@....com, steven.sistare@...cle.com, suzuki.poulose@....com,
tabba@...gle.com, thomas.lendacky@....com, usama.arif@...edance.com,
vannapurve@...gle.com, vbabka@...e.cz, viro@...iv.linux.org.uk,
vkuznets@...hat.com, wei.w.wang@...el.com, will@...nel.org,
willy@...radead.org, xiaoyao.li@...el.com, yan.y.zhao@...el.com,
yilun.xu@...el.com, yuzenghui@...wei.com, zhiquan1.li@...el.com
Subject: Re: [RFC PATCH v2 04/51] KVM: guest_memfd: Introduce
KVM_GMEM_CONVERT_SHARED/PRIVATE ioctls
On 5/15/2025 7:41 AM, Ackerley Tng wrote:
[...]
> +
> +static int kvm_gmem_convert_range(struct file *file, pgoff_t start,
> + size_t nr_pages, bool shared,
> + pgoff_t *error_index)
> +{
> + struct conversion_work *work, *tmp, *rollback_stop_item;
> + LIST_HEAD(work_list);
> + struct inode *inode;
> + enum shareability m;
> + int ret;
> +
> + inode = file_inode(file);
> +
> + filemap_invalidate_lock(inode->i_mapping);
> +
> + m = shared ? SHAREABILITY_ALL : SHAREABILITY_GUEST;
> + ret = kvm_gmem_convert_compute_work(inode, start, nr_pages, m, &work_list);
> + if (ret || list_empty(&work_list))
> + goto out;
> +
> + list_for_each_entry(work, &work_list, list)
> + kvm_gmem_convert_invalidate_begin(inode, work);
> +
> + list_for_each_entry(work, &work_list, list) {
> + ret = kvm_gmem_convert_should_proceed(inode, work, shared,
> + error_index);
Since kvm_gmem_invalidate_begin() begins to handle shared memory,
kvm_gmem_convert_invalidate_begin() will zap the table.
The shared mapping could be zapped in kvm_gmem_convert_invalidate_begin() even
when kvm_gmem_convert_should_proceed() returns error.
The sequence is a bit confusing to me, at least in this patch so far.
> + if (ret)
> + goto invalidate_end;
> + }
> +
> + list_for_each_entry(work, &work_list, list) {
> + rollback_stop_item = work;
> + ret = kvm_gmem_shareability_apply(inode, work, m);
> + if (ret)
> + break;
> + }
> +
> + if (ret) {
> + m = shared ? SHAREABILITY_GUEST : SHAREABILITY_ALL;
> + list_for_each_entry(work, &work_list, list) {
> + if (work == rollback_stop_item)
> + break;
> +
> + WARN_ON(kvm_gmem_shareability_apply(inode, work, m));
> + }
> + }
> +
> +invalidate_end:
> + list_for_each_entry(work, &work_list, list)
> + kvm_gmem_convert_invalidate_end(inode, work);
> +out:
> + filemap_invalidate_unlock(inode->i_mapping);
> +
> + list_for_each_entry_safe(work, tmp, &work_list, list) {
> + list_del(&work->list);
> + kfree(work);
> + }
> +
> + return ret;
> +}
> +
[...]
> @@ -186,15 +490,26 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
> unsigned long index;
>
> xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
> + enum kvm_gfn_range_filter filter;
> pgoff_t pgoff = slot->gmem.pgoff;
>
> + filter = KVM_FILTER_PRIVATE;
> + if (kvm_gmem_memslot_supports_shared(slot)) {
> + /*
> + * Unmapping would also cause invalidation, but cannot
> + * rely on mmu_notifiers to do invalidation via
> + * unmapping, since memory may not be mapped to
> + * userspace.
> + */
> + filter |= KVM_FILTER_SHARED;
> + }
> +
> struct kvm_gfn_range gfn_range = {
> .start = slot->base_gfn + max(pgoff, start) - pgoff,
> .end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff,
> .slot = slot,
> .may_block = true,
> - /* guest memfd is relevant to only private mappings. */
> - .attr_filter = KVM_FILTER_PRIVATE,
> + .attr_filter = filter,
> };
>
> if (!found_memslot) {
> @@ -484,11 +799,49 @@ EXPORT_SYMBOL_GPL(kvm_gmem_memslot_supports_shared);
> #define kvm_gmem_mmap NULL
> #endif /* CONFIG_KVM_GMEM_SHARED_MEM */
>
[...]
Powered by blists - more mailing lists