lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3349f838-2c73-4ef0-aa30-a21e41fb39e5@amd.com>
Date: Thu, 21 Nov 2024 11:40:59 -0600
From: Mike Day <michael.day@....com>
To: Elliot Berman <quic_eberman@...cinc.com>,
 Paolo Bonzini <pbonzini@...hat.com>,
 Andrew Morton <akpm@...ux-foundation.org>,
 Sean Christopherson <seanjc@...gle.com>, Fuad Tabba <tabba@...gle.com>,
 Ackerley Tng <ackerleytng@...gle.com>, Mike Rapoport <rppt@...nel.org>,
 David Hildenbrand <david@...hat.com>, "H. Peter Anvin" <hpa@...or.com>,
 "Matthew Wilcox (Oracle)" <willy@...radead.org>,
 Jonathan Corbet <corbet@....net>, Trond Myklebust <trondmy@...nel.org>,
 Anna Schumaker <anna@...nel.org>, Mike Marshall <hubcap@...ibond.com>,
 Martin Brandenburg <martin@...ibond.com>,
 Alexander Viro <viro@...iv.linux.org.uk>,
 Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>
Cc: James Gowans <jgowans@...zon.com>, linux-fsdevel@...r.kernel.org,
 kvm@...r.kernel.org, linux-coco@...ts.linux.dev,
 linux-arm-msm@...r.kernel.org, linux-kernel@...r.kernel.org,
 linux-mm@...ck.org, linux-doc@...r.kernel.org, linux-nfs@...r.kernel.org,
 devel@...ts.orangefs.org, linux-arm-kernel@...ts.infradead.org
Subject: Re: [PATCH v4 2/2] mm: guestmem: Convert address_space operations to
 guestmem library



On 11/21/24 10:43, Elliot Berman wrote:
> On Wed, Nov 20, 2024 at 10:12:08AM -0800, Elliot Berman wrote:
>> diff --git a/mm/guestmem.c b/mm/guestmem.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..19dd7e5d498f07577ec5cec5b52055f7435980f4
>> --- /dev/null
>> +++ b/mm/guestmem.c
>> @@ -0,0 +1,196 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * guestmem library
>> + *
>> + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
>> + */
>> +
>> +#include <linux/fs.h>
>> +#include <linux/guestmem.h>
>> +#include <linux/mm.h>
>> +#include <linux/pagemap.h>
>> +
>> +struct guestmem {
>> +	const struct guestmem_ops *ops;
>> +};
>> +
>> +static inline struct guestmem *folio_to_guestmem(struct folio *folio)
>> +{
>> +	struct address_space *mapping = folio->mapping;
>> +
>> +	return mapping->i_private_data;
>> +}
>> +
>> +static inline bool __guestmem_release_folio(struct address_space *mapping,
>> +					    struct folio *folio)
>> +{
>> +	struct guestmem *gmem = mapping->i_private_data;
>> +	struct list_head *entry;
>> +
>> +	if (gmem->ops->release_folio) {
>> +		list_for_each(entry, &mapping->i_private_list) {
>> +			if (!gmem->ops->release_folio(entry, folio))
>> +				return false;
>> +		}
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +static inline int
>> +__guestmem_invalidate_begin(struct address_space *const mapping, pgoff_t start,
>> +			    pgoff_t end)
>> +{
>> +	struct guestmem *gmem = mapping->i_private_data;
>> +	struct list_head *entry;
>> +	int ret = 0;
>> +
>> +	list_for_each(entry, &mapping->i_private_list) {
>> +		ret = gmem->ops->invalidate_begin(entry, start, end);
>> +		if (ret)
>> +			return ret;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static inline void
>> +__guestmem_invalidate_end(struct address_space *const mapping, pgoff_t start,
>> +			  pgoff_t end)
>> +{
>> +	struct guestmem *gmem = mapping->i_private_data;
>> +	struct list_head *entry;
>> +
>> +	if (gmem->ops->invalidate_end) {
>> +		list_for_each(entry, &mapping->i_private_list)
>> +			gmem->ops->invalidate_end(entry, start, end);
>> +	}
>> +}
>> +
>> +static void guestmem_free_folio(struct address_space *mapping,
>> +				struct folio *folio)
>> +{
>> +	WARN_ON_ONCE(!__guestmem_release_folio(mapping, folio));
>> +}
>> +
>> +static int guestmem_error_folio(struct address_space *mapping,
>> +				struct folio *folio)
>> +{
>> +	pgoff_t start, end;
>> +	int ret;
>> +
>> +	filemap_invalidate_lock_shared(mapping);
>> +
>> +	start = folio->index;
>> +	end = start + folio_nr_pages(folio);
>> +
>> +	ret = __guestmem_invalidate_begin(mapping, start, end);
>> +	if (ret)
>> +		goto out;
>> +
>> +	/*
>> +	 * Do not truncate the range, what action is taken in response to the
>> +	 * error is userspace's decision (assuming the architecture supports
>> +	 * gracefully handling memory errors).  If/when the guest attempts to
>> +	 * access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON,
>> +	 * at which point KVM can either terminate the VM or propagate the
>> +	 * error to userspace.
>> +	 */
>> +
>> +	__guestmem_invalidate_end(mapping, start, end);
>> +
>> +out:
>> +	filemap_invalidate_unlock_shared(mapping);
>> +	return ret ? MF_DELAYED : MF_FAILED;
>> +}
>> +
>> +static int guestmem_migrate_folio(struct address_space *mapping,
>> +				  struct folio *dst, struct folio *src,
>> +				  enum migrate_mode mode)
>> +{
>> +	WARN_ON_ONCE(1);
>> +	return -EINVAL;
>> +}
>> +
>> +static const struct address_space_operations guestmem_aops = {
>> +	.dirty_folio = noop_dirty_folio,
>> +	.free_folio = guestmem_free_folio,
>> +	.error_remove_folio = guestmem_error_folio,
>> +	.migrate_folio = guestmem_migrate_folio,
>> +};
>> +
>> +int guestmem_attach_mapping(struct address_space *mapping,
>> +			    const struct guestmem_ops *const ops,
>> +			    struct list_head *data)
>> +{
>> +	struct guestmem *gmem;
>> +
>> +	if (mapping->a_ops == &guestmem_aops) {
>> +		gmem = mapping->i_private_data;
>> +		if (gmem->ops != ops)
>> +			return -EINVAL;
>> +
>> +		goto add;
>> +	}
>> +
>> +	gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
>> +	if (!gmem)
>> +		return -ENOMEM;
>> +
>> +	gmem->ops = ops;
>> +
>> +	mapping->a_ops = &guestmem_aops;
>> +	mapping->i_private_data = gmem;
>> +
>> +	mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
>> +	mapping_set_inaccessible(mapping);
>> +	/* Unmovable mappings are supposed to be marked unevictable as well. */
>> +	WARN_ON_ONCE(!mapping_unevictable(mapping));
>> +
>> +add:
>> +	list_add(data, &mapping->i_private_list);
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(guestmem_attach_mapping);
>> +
>> +void guestmem_detach_mapping(struct address_space *mapping,
>> +			     struct list_head *data)
>> +{
>> +	list_del(data);
>> +
>> +	if (list_empty(&mapping->i_private_list)) {
>> +		kfree(mapping->i_private_data);
> 
> Mike was helping me test this out for SEV-SNP. They helped find a bug
> here. Right now, when the file closes, KVM calls
> guestmem_detach_mapping() which will uninstall the ops. When that
> happens, it's not necessary that all of the folios aren't removed from
> the filemap yet and so our free_folio() callback isn't invoked. This
> means that we skip updating the RMP entry back to shared/KVM-owned.
> 
> There are a few approaches I could take:
> 
> 1. Create a guestmem superblock so I can register guestmem-specific
>     destroy_inode() to do the kfree() above. This requires a lot of
>     boilerplate code, and I think it's not preferred approach.
> 2. Update how KVM tracks the memory so it is back in "shared" state when
>     the file closes. This requires some significant rework about the page
>     state compared to current guest_memfd. That rework might be useful
>     for the shared/private state machine.
> 3. Call truncate_inode_pages(mapping, 0) to force pages to be freed
>     here. It's might be possible that a page is allocated after this
>     point. In order for that to be a problem, KVM would need to update
>     RMP entry as guest-owned, and I don't believe that's possible after
>     the last guestmem_detach_mapping().
> 
> My preference is to go with #3 as it was the most easy thing to do.

#3 is my preference as well. The semantics are that the guest is "closing" the gmem
object, which means all the memory is being released from the guest.

Mike
> 
>> +		mapping->i_private_data = NULL;
>> +		mapping->a_ops = &empty_aops;
>> +	}
>> +}
>> +EXPORT_SYMBOL_GPL(guestmem_detach_mapping);
>> +
>> +struct folio *guestmem_grab_folio(struct address_space *mapping, pgoff_t index)
>> +{
>> +	/* TODO: Support huge pages. */
>> +	return filemap_grab_folio(mapping, index);
>> +}
>> +EXPORT_SYMBOL_GPL(guestmem_grab_folio);
>> +
>> +int guestmem_punch_hole(struct address_space *mapping, loff_t offset,
>> +			loff_t len)
>> +{
>> +	pgoff_t start = offset >> PAGE_SHIFT;
>> +	pgoff_t end = (offset + len) >> PAGE_SHIFT;
>> +	int ret;
>> +
>> +	filemap_invalidate_lock(mapping);
>> +	ret = __guestmem_invalidate_begin(mapping, start, end);
>> +	if (ret)
>> +		goto out;
>> +
>> +	truncate_inode_pages_range(mapping, offset, offset + len - 1);
>> +
>> +	__guestmem_invalidate_end(mapping, start, end);
>> +
>> +out:
>> +	filemap_invalidate_unlock(mapping);
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(guestmem_punch_hole);
>> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
>> index fd6a3010afa833e077623065b80bdbb5b1012250..1339098795d2e859b2ee0ef419b29045aedc8487 100644
>> --- a/virt/kvm/Kconfig
>> +++ b/virt/kvm/Kconfig
>> @@ -106,6 +106,7 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES
>>   
>>   config KVM_PRIVATE_MEM
>>          select XARRAY_MULTI
>> +       select GUESTMEM
>>          bool
>>   
>>   config KVM_GENERIC_PRIVATE_MEM
>> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
>> index 24dcbad0cb76e353509cf4718837a1999f093414..edf57d5662cb8634bbd9ca3118b293c4f7ca229a 100644
>> --- a/virt/kvm/guest_memfd.c
>> +++ b/virt/kvm/guest_memfd.c
>> @@ -1,6 +1,7 @@
>>   // SPDX-License-Identifier: GPL-2.0
>>   #include <linux/backing-dev.h>
>>   #include <linux/falloc.h>
>> +#include <linux/guestmem.h>
>>   #include <linux/kvm_host.h>
>>   #include <linux/pagemap.h>
>>   #include <linux/anon_inodes.h>
>> @@ -98,8 +99,7 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
>>    */
>>   static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
>>   {
>> -	/* TODO: Support huge pages. */
>> -	return filemap_grab_folio(inode->i_mapping, index);
>> +	return guestmem_grab_folio(inode->i_mapping, index);
>>   }
>>   
>>   static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
>> @@ -151,28 +151,7 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
>>   
>>   static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>   {
>> -	struct list_head *gmem_list = &inode->i_mapping->i_private_list;
>> -	pgoff_t start = offset >> PAGE_SHIFT;
>> -	pgoff_t end = (offset + len) >> PAGE_SHIFT;
>> -	struct kvm_gmem *gmem;
>> -
>> -	/*
>> -	 * Bindings must be stable across invalidation to ensure the start+end
>> -	 * are balanced.
>> -	 */
>> -	filemap_invalidate_lock(inode->i_mapping);
>> -
>> -	list_for_each_entry(gmem, gmem_list, entry)
>> -		kvm_gmem_invalidate_begin(gmem, start, end);
>> -
>> -	truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
>> -
>> -	list_for_each_entry(gmem, gmem_list, entry)
>> -		kvm_gmem_invalidate_end(gmem, start, end);
>> -
>> -	filemap_invalidate_unlock(inode->i_mapping);
>> -
>> -	return 0;
>> +	return guestmem_punch_hole(inode->i_mapping, offset, len);
>>   }
>>   
>>   static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
>> @@ -277,7 +256,7 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
>>   	kvm_gmem_invalidate_begin(gmem, 0, -1ul);
>>   	kvm_gmem_invalidate_end(gmem, 0, -1ul);
>>   
>> -	list_del(&gmem->entry);
>> +	guestmem_detach_mapping(inode->i_mapping, &gmem->entry);
>>   
>>   	filemap_invalidate_unlock(inode->i_mapping);
>>   
>> @@ -318,63 +297,42 @@ void kvm_gmem_init(struct module *module)
>>   	kvm_gmem_fops.owner = module;
>>   }
>>   
>> -static int kvm_gmem_migrate_folio(struct address_space *mapping,
>> -				  struct folio *dst, struct folio *src,
>> -				  enum migrate_mode mode)
>> +static int kvm_guestmem_invalidate_begin(struct list_head *entry, pgoff_t start,
>> +					 pgoff_t end)
>>   {
>> -	WARN_ON_ONCE(1);
>> -	return -EINVAL;
>> -}
>> +	struct kvm_gmem *gmem = container_of(entry, struct kvm_gmem, entry);
>>   
>> -static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio)
>> -{
>> -	struct list_head *gmem_list = &mapping->i_private_list;
>> -	struct kvm_gmem *gmem;
>> -	pgoff_t start, end;
>> -
>> -	filemap_invalidate_lock_shared(mapping);
>> -
>> -	start = folio->index;
>> -	end = start + folio_nr_pages(folio);
>> -
>> -	list_for_each_entry(gmem, gmem_list, entry)
>> -		kvm_gmem_invalidate_begin(gmem, start, end);
>> -
>> -	/*
>> -	 * Do not truncate the range, what action is taken in response to the
>> -	 * error is userspace's decision (assuming the architecture supports
>> -	 * gracefully handling memory errors).  If/when the guest attempts to
>> -	 * access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON,
>> -	 * at which point KVM can either terminate the VM or propagate the
>> -	 * error to userspace.
>> -	 */
>> +	kvm_gmem_invalidate_begin(gmem, start, end);
>>   
>> -	list_for_each_entry(gmem, gmem_list, entry)
>> -		kvm_gmem_invalidate_end(gmem, start, end);
>> +	return 0;
>> +}
>>   
>> -	filemap_invalidate_unlock_shared(mapping);
>> +static void kvm_guestmem_invalidate_end(struct list_head *entry, pgoff_t start,
>> +					pgoff_t end)
>> +{
>> +	struct kvm_gmem *gmem = container_of(entry, struct kvm_gmem, entry);
>>   
>> -	return MF_DELAYED;
>> +	kvm_gmem_invalidate_end(gmem, start, end);
>>   }
>>   
>>   #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
>> -static void kvm_gmem_free_folio(struct address_space *mapping,
>> -				struct folio *folio)
>> +static bool kvm_gmem_release_folio(struct list_head *entry, struct folio *folio)
>>   {
>>   	struct page *page = folio_page(folio, 0);
>>   	kvm_pfn_t pfn = page_to_pfn(page);
>>   	int order = folio_order(folio);
>>   
>>   	kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
>> +
>> +	return true;
>>   }
>>   #endif
>>   
>> -static const struct address_space_operations kvm_gmem_aops = {
>> -	.dirty_folio = noop_dirty_folio,
>> -	.migrate_folio	= kvm_gmem_migrate_folio,
>> -	.error_remove_folio = kvm_gmem_error_folio,
>> +static const struct guestmem_ops kvm_guestmem_ops = {
>> +	.invalidate_begin = kvm_guestmem_invalidate_begin,
>> +	.invalidate_end = kvm_guestmem_invalidate_end,
>>   #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
>> -	.free_folio = kvm_gmem_free_folio,
>> +	.release_folio = kvm_gmem_release_folio,
>>   #endif
>>   };
>>   
>> @@ -430,22 +388,22 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>>   
>>   	inode->i_private = (void *)(unsigned long)flags;
>>   	inode->i_op = &kvm_gmem_iops;
>> -	inode->i_mapping->a_ops = &kvm_gmem_aops;
>>   	inode->i_mode |= S_IFREG;
>>   	inode->i_size = size;
>> -	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> -	mapping_set_inaccessible(inode->i_mapping);
>> -	/* Unmovable mappings are supposed to be marked unevictable as well. */
>> -	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> +	err = guestmem_attach_mapping(inode->i_mapping, &kvm_guestmem_ops,
>> +				      &gmem->entry);
>> +	if (err)
>> +		goto err_putfile;
>>   
>>   	kvm_get_kvm(kvm);
>>   	gmem->kvm = kvm;
>>   	xa_init(&gmem->bindings);
>> -	list_add(&gmem->entry, &inode->i_mapping->i_private_list);
>>   
>>   	fd_install(fd, file);
>>   	return fd;
>>   
>> +err_putfile:
>> +	fput(file);
>>   err_gmem:
>>   	kfree(gmem);
>>   err_fd:
>>
>> -- 
>> 2.34.1
>>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ