lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 19 Jul 2022 11:55:24 +0200
From:   "Gupta, Pankaj" <pankaj.gupta@....com>
To:     Chao Peng <chao.p.peng@...ux.intel.com>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-fsdevel@...r.kernel.org, linux-api@...r.kernel.org,
        linux-doc@...r.kernel.org, qemu-devel@...gnu.org,
        linux-kselftest@...r.kernel.org
Cc:     Paolo Bonzini <pbonzini@...hat.com>,
        Jonathan Corbet <corbet@....net>,
        Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        x86@...nel.org, "H . Peter Anvin" <hpa@...or.com>,
        Hugh Dickins <hughd@...gle.com>,
        Jeff Layton <jlayton@...nel.org>,
        "J . Bruce Fields" <bfields@...ldses.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Shuah Khan <shuah@...nel.org>, Mike Rapoport <rppt@...nel.org>,
        Steven Price <steven.price@....com>,
        "Maciej S . Szmigiero" <mail@...iej.szmigiero.name>,
        Vlastimil Babka <vbabka@...e.cz>,
        Vishal Annapurve <vannapurve@...gle.com>,
        Yu Zhang <yu.c.zhang@...ux.intel.com>,
        "Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
        luto@...nel.org, jun.nakajima@...el.com, dave.hansen@...el.com,
        ak@...ux.intel.com, david@...hat.com, aarcange@...hat.com,
        ddutile@...hat.com, dhildenb@...hat.com,
        Quentin Perret <qperret@...gle.com>,
        Michael Roth <michael.roth@....com>, mhocko@...e.com,
        Muchun Song <songmuchun@...edance.com>
Subject: Re: [PATCH v7 13/14] KVM: Enable and expose KVM_MEM_PRIVATE

> Register private memslot to fd-based memory backing store and handle the
> memfile notifiers to zap the existing mappings.
> 
> Currently the register is happened at memslot creating time and the
> initial support does not include page migration/swap.
> 
> KVM_MEM_PRIVATE is not exposed by default, architecture code can turn
> on it by implementing kvm_arch_private_mem_supported().
> 
> A 'kvm' reference is added in memslot structure since in
> memfile_notifier callbacks we can only obtain a memslot reference while
> kvm is need to do the zapping.
> 
> Co-developed-by: Yu Zhang <yu.c.zhang@...ux.intel.com>
> Signed-off-by: Yu Zhang <yu.c.zhang@...ux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@...ux.intel.com>
> ---
>   include/linux/kvm_host.h |   1 +
>   virt/kvm/kvm_main.c      | 117 ++++++++++++++++++++++++++++++++++++---
>   2 files changed, 109 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 8f56426aa1e3..4e5a0db68799 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -584,6 +584,7 @@ struct kvm_memory_slot {
>   	struct file *private_file;
>   	loff_t private_offset;
>   	struct memfile_notifier notifier;
> +	struct kvm *kvm;
>   };
>   
>   static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index bb714c2a4b06..d6f7e074cab2 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -941,6 +941,63 @@ static int kvm_vm_ioctl_set_encrypted_region(struct kvm *kvm, unsigned int ioctl
>   
>   	return r;
>   }
> +
> +static void kvm_memfile_notifier_invalidate(struct memfile_notifier *notifier,
> +					    pgoff_t start, pgoff_t end)
> +{
> +	struct kvm_memory_slot *slot = container_of(notifier,
> +						    struct kvm_memory_slot,
> +						    notifier);
> +	unsigned long base_pgoff = slot->private_offset >> PAGE_SHIFT;
> +	gfn_t start_gfn = slot->base_gfn;
> +	gfn_t end_gfn = slot->base_gfn + slot->npages;
> +
> +
> +	if (start > base_pgoff)
> +		start_gfn = slot->base_gfn + start - base_pgoff;
> +
> +	if (end < base_pgoff + slot->npages)
> +		end_gfn = slot->base_gfn + end - base_pgoff;
> +
> +	if (start_gfn >= end_gfn)
> +		return;
> +
> +	kvm_zap_gfn_range(slot->kvm, start_gfn, end_gfn);
> +}
> +
> +static struct memfile_notifier_ops kvm_memfile_notifier_ops = {
> +	.invalidate = kvm_memfile_notifier_invalidate,
> +};
> +
> +#define KVM_MEMFILE_FLAGS (MEMFILE_F_USER_INACCESSIBLE | \
> +			   MEMFILE_F_UNMOVABLE | \
> +			   MEMFILE_F_UNRECLAIMABLE)
> +
> +static inline int kvm_private_mem_register(struct kvm_memory_slot *slot)
> +{
> +	slot->notifier.ops = &kvm_memfile_notifier_ops;
> +	return memfile_register_notifier(slot->private_file, KVM_MEMFILE_FLAGS,
> +					 &slot->notifier);
> +}
> +
> +static inline void kvm_private_mem_unregister(struct kvm_memory_slot *slot)
> +{
> +	memfile_unregister_notifier(&slot->notifier);
> +}
> +
> +#else /* !CONFIG_HAVE_KVM_PRIVATE_MEM */
> +
> +static inline int kvm_private_mem_register(struct kvm_memory_slot *slot)
> +{
> +	WARN_ON_ONCE(1);
> +	return -EOPNOTSUPP;
> +}
> +
> +static inline void kvm_private_mem_unregister(struct kvm_memory_slot *slot)
> +{
> +	WARN_ON_ONCE(1);
> +}
> +
>   #endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
>   
>   #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
> @@ -987,6 +1044,11 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
>   /* This does not remove the slot from struct kvm_memslots data structures */
>   static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
>   {
> +	if (slot->flags & KVM_MEM_PRIVATE) {
> +		kvm_private_mem_unregister(slot);
> +		fput(slot->private_file);
> +	}
> +
>   	kvm_destroy_dirty_bitmap(slot);
>   
>   	kvm_arch_free_memslot(kvm, slot);
> @@ -1548,10 +1610,16 @@ bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
>   	return false;
>   }
>   
> -static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
> +static int check_memory_region_flags(struct kvm *kvm,
> +				     const struct kvm_user_mem_region *mem)
>   {
>   	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
>   
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +	if (kvm_arch_private_mem_supported(kvm))
> +		valid_flags |= KVM_MEM_PRIVATE;
> +#endif
> +
>   #ifdef __KVM_HAVE_READONLY_MEM
>   	valid_flags |= KVM_MEM_READONLY;
>   #endif
> @@ -1627,6 +1695,12 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
>   {
>   	int r;
>   
> +	if (change == KVM_MR_CREATE && new->flags & KVM_MEM_PRIVATE) {
> +		r = kvm_private_mem_register(new);
> +		if (r)
> +			return r;
> +	}
> +
>   	/*
>   	 * If dirty logging is disabled, nullify the bitmap; the old bitmap
>   	 * will be freed on "commit".  If logging is enabled in both old and
> @@ -1655,6 +1729,9 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
>   	if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap))
>   		kvm_destroy_dirty_bitmap(new);
>   
> +	if (r && change == KVM_MR_CREATE && new->flags & KVM_MEM_PRIVATE)
> +		kvm_private_mem_unregister(new);
> +
>   	return r;
>   }
>   
> @@ -1952,7 +2029,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   	int as_id, id;
>   	int r;
>   
> -	r = check_memory_region_flags(mem);
> +	r = check_memory_region_flags(kvm, mem);
>   	if (r)
>   		return r;
>   
> @@ -1971,6 +2048,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   	     !access_ok((void __user *)(unsigned long)mem->userspace_addr,
>   			mem->memory_size))
>   		return -EINVAL;
> +	if (mem->flags & KVM_MEM_PRIVATE &&
> +		(mem->private_offset & (PAGE_SIZE - 1) ||
> +		 mem->private_offset > U64_MAX - mem->memory_size))
> +		return -EINVAL;
>   	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
>   		return -EINVAL;
>   	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
> @@ -2009,6 +2090,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   		if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
>   			return -EINVAL;
>   	} else { /* Modify an existing slot. */
> +		/* Private memslots are immutable, they can only be deleted. */
> +		if (mem->flags & KVM_MEM_PRIVATE)
> +			return -EINVAL;
>   		if ((mem->userspace_addr != old->userspace_addr) ||
>   		    (npages != old->npages) ||
>   		    ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
> @@ -2037,10 +2121,27 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   	new->npages = npages;
>   	new->flags = mem->flags;
>   	new->userspace_addr = mem->userspace_addr;
> +	if (mem->flags & KVM_MEM_PRIVATE) {
> +		new->private_file = fget(mem->private_fd);
> +		if (!new->private_file) {
> +			r = -EINVAL;
> +			goto out;
> +		}
> +		new->private_offset = mem->private_offset;
> +	}
> +
> +	new->kvm = kvm;
>   
>   	r = kvm_set_memslot(kvm, old, new, change);
>   	if (r)
> -		kfree(new);
> +		goto out;
> +
> +	return 0;
> +
> +out:
> +	if (new->private_file)
> +		fput(new->private_file);
> +	kfree(new);
>   	return r;
>   }
>   EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
> @@ -4712,12 +4813,10 @@ static long kvm_vm_ioctl(struct file *filp,
>   			(u32 __user *)(argp + offsetof(typeof(mem), flags))))
>   			goto out;
>   
> -		if (flags & KVM_MEM_PRIVATE) {
> -			r = -EINVAL;
> -			goto out;
> -		}
> -
> -		size = sizeof(struct kvm_userspace_memory_region);
> +		if (flags & KVM_MEM_PRIVATE)
> +			size = sizeof(struct kvm_userspace_memory_region_ext);

Not sure if we use kvm_userspace_memory_region_ext or 
kvm_user_mem_region, just for readability.

> +		else
> +			size = sizeof(struct kvm_userspace_memory_region);
>   
>   		if (copy_from_user(&mem, argp, size))
>   			goto out;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ