[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <30ffe039-c9e2-b996-500d-5e11bf6ea789@linux.intel.com>
Date: Wed, 30 Aug 2023 23:12:19 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: kvm@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
kvmarm@...ts.linux.dev, linux-mips@...r.kernel.org,
linuxppc-dev@...ts.ozlabs.org, kvm-riscv@...ts.infradead.org,
linux-riscv@...ts.infradead.org, linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org, linux-security-module@...r.kernel.org,
linux-kernel@...r.kernel.org, Paolo Bonzini <pbonzini@...hat.com>,
Marc Zyngier <maz@...nel.org>,
Oliver Upton <oliver.upton@...ux.dev>,
Huacai Chen <chenhuacai@...nel.org>,
Michael Ellerman <mpe@...erman.id.au>,
Anup Patel <anup@...infault.org>,
Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>,
Albert Ou <aou@...s.berkeley.edu>,
"Matthew Wilcox (Oracle)" <willy@...radead.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Paul Moore <paul@...l-moore.com>,
James Morris <jmorris@...ei.org>,
"Serge E. Hallyn" <serge@...lyn.com>,
Chao Peng <chao.p.peng@...ux.intel.com>,
Fuad Tabba <tabba@...gle.com>,
Jarkko Sakkinen <jarkko@...nel.org>,
Yu Zhang <yu.c.zhang@...ux.intel.com>,
Vishal Annapurve <vannapurve@...gle.com>,
Ackerley Tng <ackerleytng@...gle.com>,
Maciej Szmigiero <mail@...iej.szmigiero.name>,
Vlastimil Babka <vbabka@...e.cz>,
David Hildenbrand <david@...hat.com>,
Quentin Perret <qperret@...gle.com>,
Michael Roth <michael.roth@....com>,
Wang <wei.w.wang@...el.com>,
Liam Merwick <liam.merwick@...cle.com>,
Isaku Yamahata <isaku.yamahata@...il.com>,
"Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: Re: [RFC PATCH v11 12/29] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for
guest-specific backing memory
On 7/19/2023 7:44 AM, Sean Christopherson wrote:
[...]
> +
> +static struct folio *kvm_gmem_get_folio(struct file *file, pgoff_t index)
> +{
> + struct folio *folio;
> +
> + /* TODO: Support huge pages. */
> + folio = filemap_grab_folio(file->f_mapping, index);
> + if (!folio)
Should use if ((IS_ERR(folio)) instead.
> + return NULL;
> +
> + /*
> + * Use the up-to-date flag to track whether or not the memory has been
> + * zeroed before being handed off to the guest. There is no backing
> + * storage for the memory, so the folio will remain up-to-date until
> + * it's removed.
> + *
> + * TODO: Skip clearing pages when trusted firmware will do it when
> + * assigning memory to the guest.
> + */
> + if (!folio_test_uptodate(folio)) {
> + unsigned long nr_pages = folio_nr_pages(folio);
> + unsigned long i;
> +
> + for (i = 0; i < nr_pages; i++)
> + clear_highpage(folio_page(folio, i));
> +
> + folio_mark_uptodate(folio);
> + }
> +
> + /*
> + * Ignore accessed, referenced, and dirty flags. The memory is
> + * unevictable and there is no storage to write back to.
> + */
> + return folio;
> +}
[...]
> +
> +static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
> +{
> + struct address_space *mapping = inode->i_mapping;
> + pgoff_t start, index, end;
> + int r;
> +
> + /* Dedicated guest is immutable by default. */
> + if (offset + len > i_size_read(inode))
> + return -EINVAL;
> +
> + filemap_invalidate_lock_shared(mapping);
> +
> + start = offset >> PAGE_SHIFT;
> + end = (offset + len) >> PAGE_SHIFT;
> +
> + r = 0;
> + for (index = start; index < end; ) {
> + struct folio *folio;
> +
> + if (signal_pending(current)) {
> + r = -EINTR;
> + break;
> + }
> +
> + folio = kvm_gmem_get_folio(inode, index);
> + if (!folio) {
> + r = -ENOMEM;
> + break;
> + }
> +
> + index = folio_next_index(folio);
> +
> + folio_unlock(folio);
> + folio_put(folio);
May be a dumb question, why we get the folio and then put it immediately?
Will it make the folio be released back to the page allocator?
> +
> + /* 64-bit only, wrapping the index should be impossible. */
> + if (WARN_ON_ONCE(!index))
> + break;
> +
> + cond_resched();
> + }
> +
> + filemap_invalidate_unlock_shared(mapping);
> +
> + return r;
> +}
> +
[...]
> +
> +int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
> + unsigned int fd, loff_t offset)
> +{
> + loff_t size = slot->npages << PAGE_SHIFT;
> + unsigned long start, end, flags;
> + struct kvm_gmem *gmem;
> + struct inode *inode;
> + struct file *file;
> +
> + BUILD_BUG_ON(sizeof(gfn_t) != sizeof(slot->gmem.pgoff));
> +
> + file = fget(fd);
> + if (!file)
> + return -EINVAL;
> +
> + if (file->f_op != &kvm_gmem_fops)
> + goto err;
> +
> + gmem = file->private_data;
> + if (gmem->kvm != kvm)
> + goto err;
> +
> + inode = file_inode(file);
> + flags = (unsigned long)inode->i_private;
> +
> + /*
> + * For simplicity, require the offset into the file and the size of the
> + * memslot to be aligned to the largest possible page size used to back
> + * the file (same as the size of the file itself).
> + */
> + if (!kvm_gmem_is_valid_size(offset, flags) ||
> + !kvm_gmem_is_valid_size(size, flags))
> + goto err;
> +
> + if (offset + size > i_size_read(inode))
> + goto err;
> +
> + filemap_invalidate_lock(inode->i_mapping);
> +
> + start = offset >> PAGE_SHIFT;
> + end = start + slot->npages;
> +
> + if (!xa_empty(&gmem->bindings) &&
> + xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) {
> + filemap_invalidate_unlock(inode->i_mapping);
> + goto err;
> + }
> +
> + /*
> + * No synchronize_rcu() needed, any in-flight readers are guaranteed to
> + * be see either a NULL file or this new file, no need for them to go
> + * away.
> + */
> + rcu_assign_pointer(slot->gmem.file, file);
> + slot->gmem.pgoff = start;
> +
> + xa_store_range(&gmem->bindings, start, end - 1, slot, GFP_KERNEL);
> + filemap_invalidate_unlock(inode->i_mapping);
> +
> + /*
> + * Drop the reference to the file, even on success. The file pins KVM,
> + * not the other way 'round. Active bindings are invalidated if the
an extra ', or maybe around?
> + * file is closed before memslots are destroyed.
> + */
> + fput(file);
> + return 0;
> +
> +err:
> + fput(file);
> + return -EINVAL;
> +}
> +
[...]
> []
Powered by blists - more mailing lists