[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <3d12c060-f794-417f-bcf5-4f549ea00f02@amd.com>
Date: Tue, 16 Dec 2025 04:29:33 +0100
From: "Gupta, Pankaj" <pankaj.gupta@....com>
To: Michael Roth <michael.roth@....com>, kvm@...r.kernel.org
Cc: linux-coco@...ts.linux.dev, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, thomas.lendacky@....com, pbonzini@...hat.com,
seanjc@...gle.com, vbabka@...e.cz, ashish.kalra@....com,
liam.merwick@...cle.com, david@...hat.com, vannapurve@...gle.com,
ackerleytng@...gle.com, aik@....com, ira.weiny@...el.com,
yan.y.zhao@...el.com
Subject: Re: [PATCH v2 2/5] KVM: guest_memfd: Remove preparation tracking
> guest_memfd currently uses the folio uptodate flag to track:
>
> 1) whether or not a page has been cleared before initial usage
> 2) whether or not the architecture hooks have been issued to put the
> page in a private state as defined by the architecture
>
> In practice, 2) is only actually being tracked for SEV-SNP VMs, and
> there do not seem to be any plans/reasons that would suggest this will
> change in the future, so this additional tracking/complexity is not
> really providing any general benefit to guest_memfd users. Future plans
> around in-place conversion and hugepage support, where the per-folio
> uptodate flag is planned to be used purely to track the initial clearing
> of folios, whereas conversion operations could trigger multiple
> transitions between 'prepared' and 'unprepared' and thus need separate
> tracking, will make the burden of tracking this information within
> guest_memfd even more complex, since preparation generally happens
> during fault time, on the "read-side" of any global locks that might
> protect state tracked by guest_memfd, and so may require more complex
> locking schemes to allow for concurrent handling of page faults for
> multiple vCPUs where the "preparedness" state tracked by guest_memfd
> might need to be updated as part of handling the fault.
>
> Instead of keeping this current/future complexity within guest_memfd for
> what is essentially just SEV-SNP, just drop the tracking for 2) and have
> the arch-specific preparation hooks get triggered unconditionally on
> every fault so the arch-specific hooks can check the preparation state
> directly and decide whether or not a folio still needs additional
> preparation. In the case of SEV-SNP, the preparation state is already
> checked again via the preparation hooks to avoid double-preparation, so
> nothing extra needs to be done to update the handling of things there.
>
> Signed-off-by: Michael Roth <michael.roth@....com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@....com>
> ---
> virt/kvm/guest_memfd.c | 44 ++++++++++++------------------------------
> 1 file changed, 12 insertions(+), 32 deletions(-)
>
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 9dafa44838fe..8b1248f42aae 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -76,11 +76,6 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo
> return 0;
> }
>
> -static inline void kvm_gmem_mark_prepared(struct folio *folio)
> -{
> - folio_mark_uptodate(folio);
> -}
> -
> /*
> * Process @folio, which contains @gfn, so that the guest can use it.
> * The folio must be locked and the gfn must be contained in @slot.
> @@ -90,13 +85,7 @@ static inline void kvm_gmem_mark_prepared(struct folio *folio)
> static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
> gfn_t gfn, struct folio *folio)
> {
> - unsigned long nr_pages, i;
> pgoff_t index;
> - int r;
> -
> - nr_pages = folio_nr_pages(folio);
> - for (i = 0; i < nr_pages; i++)
> - clear_highpage(folio_page(folio, i));
>
> /*
> * Preparing huge folios should always be safe, since it should
> @@ -114,11 +103,8 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
> WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, folio_nr_pages(folio)));
> index = kvm_gmem_get_index(slot, gfn);
> index = ALIGN_DOWN(index, folio_nr_pages(folio));
> - r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
> - if (!r)
> - kvm_gmem_mark_prepared(folio);
>
> - return r;
> + return __kvm_gmem_prepare_folio(kvm, slot, index, folio);
> }
>
> /*
> @@ -429,7 +415,7 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
>
> if (!folio_test_uptodate(folio)) {
> clear_highpage(folio_page(folio, 0));
> - kvm_gmem_mark_prepared(folio);
> + folio_mark_uptodate(folio);
> }
>
> vmf->page = folio_file_page(folio, vmf->pgoff);
> @@ -766,7 +752,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
> static struct folio *__kvm_gmem_get_pfn(struct file *file,
> struct kvm_memory_slot *slot,
> pgoff_t index, kvm_pfn_t *pfn,
> - bool *is_prepared, int *max_order)
> + int *max_order)
> {
> struct file *slot_file = READ_ONCE(slot->gmem.file);
> struct gmem_file *f = file->private_data;
> @@ -796,7 +782,6 @@ static struct folio *__kvm_gmem_get_pfn(struct file *file,
> if (max_order)
> *max_order = 0;
>
> - *is_prepared = folio_test_uptodate(folio);
> return folio;
> }
>
> @@ -806,19 +791,22 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
> {
> pgoff_t index = kvm_gmem_get_index(slot, gfn);
> struct folio *folio;
> - bool is_prepared = false;
> int r = 0;
>
> CLASS(gmem_get_file, file)(slot);
> if (!file)
> return -EFAULT;
>
> - folio = __kvm_gmem_get_pfn(file, slot, index, pfn, &is_prepared, max_order);
> + folio = __kvm_gmem_get_pfn(file, slot, index, pfn, max_order);
> if (IS_ERR(folio))
> return PTR_ERR(folio);
>
> - if (!is_prepared)
> - r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
> + if (!folio_test_uptodate(folio)) {
> + clear_highpage(folio_page(folio, 0));
> + folio_mark_uptodate(folio);
> + }
> +
> + r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
>
> folio_unlock(folio);
>
> @@ -861,7 +849,6 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
> struct folio *folio;
> gfn_t gfn = start_gfn + i;
> pgoff_t index = kvm_gmem_get_index(slot, gfn);
> - bool is_prepared = false;
> kvm_pfn_t pfn;
>
> if (signal_pending(current)) {
> @@ -869,19 +856,12 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
> break;
> }
>
> - folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, &is_prepared, NULL);
> + folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, NULL);
> if (IS_ERR(folio)) {
> ret = PTR_ERR(folio);
> break;
> }
>
> - if (is_prepared) {
> - folio_unlock(folio);
> - folio_put(folio);
> - ret = -EEXIST;
> - break;
> - }
> -
> folio_unlock(folio);
>
> ret = -EINVAL;
> @@ -893,7 +873,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
> p = src ? src + i * PAGE_SIZE : NULL;
> ret = post_populate(kvm, gfn, pfn, p, opaque);
> if (!ret)
> - kvm_gmem_mark_prepared(folio);
> + folio_mark_uptodate(folio);
>
> put_folio_and_exit:
> folio_put(folio);
Powered by blists - more mailing lists