[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <de1b0bbc-b781-4372-88ad-81f26c9152c2@redhat.com>
Date: Tue, 11 Jun 2024 00:26:40 +0200
From: Paolo Bonzini <pbonzini@...hat.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Isaku Yamahata <isaku.yamahata@...el.com>, linux-kernel@...r.kernel.org,
kvm@...r.kernel.org, michael.roth@....com, isaku.yamahata@...ux.intel.com
Subject: Re: [PATCH 09/11] KVM: guest_memfd: Add interface for populating gmem
pages with user data
On 6/10/24 23:48, Paolo Bonzini wrote:
> On Sat, Jun 8, 2024 at 1:03 AM Sean Christopherson <seanjc@...gle.com> wrote:
>> SNP folks and/or Paolo, what's the plan for this? I don't see how what's sitting
>> in kvm/next can possibly be correct without conditioning population on the folio
>> being !uptodate.
>
> I don't think I have time to look at it closely until Friday; but
> thanks for reminding me.
Ok, I'm officially confused. I think I understand what you did in your
suggested code. Limiting it to the bare minimum (keeping the callback
instead of CONFIG_HAVE_KVM_GMEM_INITIALIZE) it would be something
like what I include at the end of the message.
But the discussion upthread was about whether to do the check for
RMP state in sev.c, or do it in common code using folio_mark_uptodate().
I am not sure what you mean by "cannot possibly be correct", and
whether it's referring to kvm_gmem_populate() in general or the
callback in sev_gmem_post_populate().
The change below looks like just an optimization to me, which
suggests that I'm missing something glaring.
Paolo
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index d4206e53a9c81..a0417ef5b86eb 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -52,37 +52,39 @@ static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct fol
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
{
struct folio *folio;
+ int r;
/* TODO: Support huge pages. */
folio = filemap_grab_folio(inode->i_mapping, index);
if (IS_ERR(folio))
return folio;
- /*
- * Use the up-to-date flag to track whether or not the memory has been
- * zeroed before being handed off to the guest. There is no backing
- * storage for the memory, so the folio will remain up-to-date until
- * it's removed.
- *
- * TODO: Skip clearing pages when trusted firmware will do it when
- * assigning memory to the guest.
- */
- if (!folio_test_uptodate(folio)) {
- unsigned long nr_pages = folio_nr_pages(folio);
- unsigned long i;
+ if (prepare) {
+ /*
+ * Use the up-to-date flag to track whether or not the memory has
+ * been handed off to the guest. There is no backing storage for
+ * the memory, so the folio will remain up-to-date until it's
+ * removed.
+ *
+ * Take the occasion of the first prepare operation to clear it.
+ */
+ if (!folio_test_uptodate(folio)) {
+ unsigned long nr_pages = folio_nr_pages(folio);
+ unsigned long i;
- for (i = 0; i < nr_pages; i++)
- clear_highpage(folio_page(folio, i));
+ for (i = 0; i < nr_pages; i++)
+ clear_highpage(folio_page(folio, i));
+ }
+
+ r = kvm_gmem_prepare_folio(inode, index, folio);
+ if (r < 0)
+ goto err_unlock_put;
folio_mark_uptodate(folio);
- }
-
- if (prepare) {
- int r = kvm_gmem_prepare_folio(inode, index, folio);
- if (r < 0) {
- folio_unlock(folio);
- folio_put(folio);
- return ERR_PTR(r);
+ } else {
+ if (folio_test_uptodate(folio)) {
+ r = -EEXIST;
+ goto err_unlock_put;
}
}
@@ -91,6 +93,11 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool
* unevictable and there is no storage to write back to.
*/
return folio;
+
+err_unlock_put:
+ folio_unlock(folio);
+ folio_put(folio);
+ return ERR_PTR(r);
}
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
@@ -545,8 +552,15 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
fput(file);
}
+/* If p_folio is NULL, the folio is cleared, prepared and marked up-to-date
+ * before returning.
+ *
+ * If p_folio is not NULL, this is left to the caller, who must call
+ * folio_mark_uptodate() once the page is ready for use by the guest.
+ */
static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
- gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
+ gfn_t gfn, kvm_pfn_t *pfn, int *max_order,
+ struct folio **p_folio)
{
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
struct kvm_gmem *gmem = file->private_data;
@@ -565,7 +579,7 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
return -EIO;
}
- folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
+ folio = kvm_gmem_get_folio(file_inode(file), index, !p_folio);
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -577,6 +591,8 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
page = folio_file_page(folio, index);
*pfn = page_to_pfn(page);
+ if (p_folio)
+ *p_folio = folio;
if (max_order)
*max_order = 0;
@@ -597,7 +613,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
if (!file)
return -EFAULT;
- r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
+ r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, NULL);
fput(file);
return r;
}
@@ -629,10 +645,11 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
for (i = 0; i < npages; i += (1 << max_order)) {
+ struct folio *folio;
gfn_t gfn = start_gfn + i;
kvm_pfn_t pfn;
- ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false);
+ ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, &folio);
if (ret)
break;
@@ -642,8 +659,10 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
p = src ? src + i * PAGE_SIZE : NULL;
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
+ if (!ret)
+ folio_mark_uptodate(folio);
- put_page(pfn_to_page(pfn));
+ folio_put(folio);
if (ret)
break;
}
Powered by blists - more mailing lists