[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <db397198-d079-faa5-691f-c4b06822d2f3@linaro.org>
Date: Tue, 21 Feb 2023 12:28:53 +0000
From: Srinivas Kandagatla <srinivas.kandagatla@...aro.org>
To: Elliot Berman <quic_eberman@...cinc.com>,
Alex Elder <elder@...aro.org>,
Prakruthi Deepak Heragu <quic_pheragu@...cinc.com>
Cc: Murali Nalajala <quic_mnalajal@...cinc.com>,
Trilok Soni <quic_tsoni@...cinc.com>,
Srivatsa Vaddagiri <quic_svaddagi@...cinc.com>,
Carl van Schaik <quic_cvanscha@...cinc.com>,
Dmitry Baryshkov <dmitry.baryshkov@...aro.org>,
Bjorn Andersson <andersson@...nel.org>,
Konrad Dybcio <konrad.dybcio@...aro.org>,
Arnd Bergmann <arnd@...db.de>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Rob Herring <robh+dt@...nel.org>,
Krzysztof Kozlowski <krzysztof.kozlowski+dt@...aro.org>,
Jonathan Corbet <corbet@....net>,
Bagas Sanjaya <bagasdotme@...il.com>,
Catalin Marinas <catalin.marinas@....com>,
Jassi Brar <jassisinghbrar@...il.com>,
linux-arm-msm@...r.kernel.org, devicetree@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-doc@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org
Subject: Re: [PATCH v10 12/26] gunyah: vm_mgr: Add/remove user memory regions
On 14/02/2023 21:24, Elliot Berman wrote:
>
> When launching a virtual machine, Gunyah userspace allocates memory for
> the guest and informs Gunyah about these memory regions through
> SET_USER_MEMORY_REGION ioctl.
>
> Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@...cinc.com>
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@...cinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@...cinc.com>
> ---
> drivers/virt/gunyah/Makefile | 2 +-
> drivers/virt/gunyah/vm_mgr.c | 44 ++++++
> drivers/virt/gunyah/vm_mgr.h | 25 ++++
> drivers/virt/gunyah/vm_mgr_mm.c | 235 ++++++++++++++++++++++++++++++++
> include/uapi/linux/gunyah.h | 33 +++++
> 5 files changed, 338 insertions(+), 1 deletion(-)
> create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c
>
> diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
> index 03951cf82023..ff8bc4925392 100644
> --- a/drivers/virt/gunyah/Makefile
> +++ b/drivers/virt/gunyah/Makefile
> @@ -2,5 +2,5 @@
>
> obj-$(CONFIG_GUNYAH) += gunyah.o
>
> -gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o
> +gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
> obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
> diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
> index fd890a57172e..84102bac03cc 100644
> --- a/drivers/virt/gunyah/vm_mgr.c
> +++ b/drivers/virt/gunyah/vm_mgr.c
> @@ -18,8 +18,16 @@
> static void gh_vm_free(struct work_struct *work)
> {
> struct gh_vm *ghvm = container_of(work, struct gh_vm, free_work);
> + struct gh_vm_mem *mapping, *tmp;
> int ret;
>
> + mutex_lock(&ghvm->mm_lock);
> + list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
> + gh_vm_mem_reclaim(ghvm, mapping);
> + kfree(mapping);
> + }
> + mutex_unlock(&ghvm->mm_lock);
> +
> ret = gh_rm_dealloc_vmid(ghvm->rm, ghvm->vmid);
> if (ret)
> pr_warn("Failed to deallocate vmid: %d\n", ret);
> @@ -48,11 +56,46 @@ static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm)
> ghvm->vmid = vmid;
> ghvm->rm = rm;
>
> + mutex_init(&ghvm->mm_lock);
> + INIT_LIST_HEAD(&ghvm->memory_mappings);
> INIT_WORK(&ghvm->free_work, gh_vm_free);
>
> return ghvm;
> }
>
> +static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> +{
> + struct gh_vm *ghvm = filp->private_data;
> + void __user *argp = (void __user *)arg;
> + long r;
> +
> + switch (cmd) {
> + case GH_VM_SET_USER_MEM_REGION: {
> + struct gh_userspace_memory_region region;
> +
> + if (copy_from_user(®ion, argp, sizeof(region)))
> + return -EFAULT;
> +
> + /* All other flag bits are reserved for future use */
> + if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | GH_MEM_ALLOW_EXEC |
> + GH_MEM_LENT))
> + return -EINVAL;
> +
> +
> + if (region.memory_size)
> + r = gh_vm_mem_alloc(ghvm, ®ion);
> + else
> + r = gh_vm_mem_free(ghvm, region.label);
Looks like we are repurposing GH_VM_SET_USER_MEM_REGION for allocation
and freeing.
Should we have corresponding GH_VM_UN_SET_USER_MEM_REGION instead for
freeing? given that label is the only relevant member of struct
gh_userspace_memory_region in free case.
> + break;
> + }
> + default:
> + r = -ENOTTY;
> + break;
> + }
> +
> + return r;
> +}
> +
> static int gh_vm_release(struct inode *inode, struct file *filp)
> {
> struct gh_vm *ghvm = filp->private_data;
> @@ -65,6 +108,7 @@ static int gh_vm_release(struct inode *inode, struct file *filp)
> }
>
> static const struct file_operations gh_vm_fops = {
> + .unlocked_ioctl = gh_vm_ioctl,
> .release = gh_vm_release,
> .compat_ioctl = compat_ptr_ioctl,
> .llseek = noop_llseek,
> diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
> index 76954da706e9..97bc00c34878 100644
> --- a/drivers/virt/gunyah/vm_mgr.h
> +++ b/drivers/virt/gunyah/vm_mgr.h
> @@ -7,16 +7,41 @@
> #define _GH_PRIV_VM_MGR_H
>
> #include <linux/gunyah_rsc_mgr.h>
> +#include <linux/list.h>
> +#include <linux/miscdevice.h>
> +#include <linux/mutex.h>
>
> #include <uapi/linux/gunyah.h>
>
> long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, unsigned long arg);
>
> +enum gh_vm_mem_share_type {
> + VM_MEM_SHARE,
> + VM_MEM_LEND,
> +};
> +
> +struct gh_vm_mem {
> + struct list_head list;
> + enum gh_vm_mem_share_type share_type;
> + struct gh_rm_mem_parcel parcel;
> +
> + __u64 guest_phys_addr;
> + struct page **pages;
> + unsigned long npages;
> +};
> +
> struct gh_vm {
> u16 vmid;
> struct gh_rm *rm;
>
> struct work_struct free_work;
> + struct mutex mm_lock;
> + struct list_head memory_mappings;
> };
>
> +int gh_vm_mem_alloc(struct gh_vm *ghvm, struct gh_userspace_memory_region *region);
> +void gh_vm_mem_reclaim(struct gh_vm *ghvm, struct gh_vm_mem *mapping);
> +int gh_vm_mem_free(struct gh_vm *ghvm, u32 label);
> +struct gh_vm_mem *gh_vm_mem_find(struct gh_vm *ghvm, u32 label);
> +
> #endif
> diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c
> new file mode 100644
> index 000000000000..03e71a36ea3b
> --- /dev/null
> +++ b/drivers/virt/gunyah/vm_mgr_mm.c
> @@ -0,0 +1,235 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
> + */
> +
> +#define pr_fmt(fmt) "gh_vm_mgr: " fmt
> +
> +#include <linux/gunyah_rsc_mgr.h>
> +#include <linux/mm.h>
> +
> +#include <uapi/linux/gunyah.h>
> +
> +#include "vm_mgr.h"
> +
> +static inline bool page_contiguous(phys_addr_t p, phys_addr_t t)
> +{
> + return t - p == PAGE_SIZE;
> +}
> +
> +static struct gh_vm_mem *__gh_vm_mem_find(struct gh_vm *ghvm, u32 label)
> + __must_hold(&ghvm->mm_lock)
> +{
> + struct gh_vm_mem *mapping;
> +
> + list_for_each_entry(mapping, &ghvm->memory_mappings, list)
> + if (mapping->parcel.label == label)
> + return mapping;
> +
> + return NULL;
> +}
> +
> +void gh_vm_mem_reclaim(struct gh_vm *ghvm, struct gh_vm_mem *mapping)
> + __must_hold(&ghvm->mm_lock)
> +{
> + int i, ret = 0;
> +
> + if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) {
> + ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel);
> + if (ret)
> + pr_warn("Failed to reclaim memory parcel for label %d: %d\n",
> + mapping->parcel.label, ret);
what the behavoir of hypervisor if we failed to reclaim the pages?
> + }
> +
> + if (!ret)
So we will leave the user pages pinned if hypervisor call fails, but
further down we free the mapping all together.
Am not 100% sure if this will have any side-effect, but is it okay to
leave user-pages pinned with no possiblity of unpinning them in such cases?
> + for (i = 0; i < mapping->npages; i++)
> + unpin_user_page(mapping->pages[i]);
> +
> + kfree(mapping->pages);
> + kfree(mapping->parcel.acl_entries);
> + kfree(mapping->parcel.mem_entries);
> +
> + list_del(&mapping->list);
> +}
> +
> +struct gh_vm_mem *gh_vm_mem_find(struct gh_vm *ghvm, u32 label)
> +{
> + struct gh_vm_mem *mapping;
> + int ret;
> +
> + ret = mutex_lock_interruptible(&ghvm->mm_lock);
> + if (ret)
> + return ERR_PTR(ret);
new line would be nice here.
> + mapping = __gh_vm_mem_find(ghvm, label);
> + mutex_unlock(&ghvm->mm_lock);
new line would be nice here.
> + return mapping ? : ERR_PTR(-ENODEV);
> +}
> +
> +int gh_vm_mem_alloc(struct gh_vm *ghvm, struct gh_userspace_memory_region *region)
> +{
> + struct gh_vm_mem *mapping, *tmp_mapping;
> + struct gh_rm_mem_entry *mem_entries;
> + phys_addr_t curr_page, prev_page;
> + struct gh_rm_mem_parcel *parcel;
> + int i, j, pinned, ret = 0;
> + size_t entry_size;
> + u16 vmid;
> +
> + if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT))
> + return -EOPNOTSUPP;
Should this not be first thing to do in ioctl before even entering this
function?
> +
> + if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) ||
> + !PAGE_ALIGNED(region->userspace_addr) || !PAGE_ALIGNED(region->guest_phys_addr))
> + return -EINVAL;
> +
> + ret = mutex_lock_interruptible(&ghvm->mm_lock);
> + if (ret)
> + return ret;
new line.
> + mapping = __gh_vm_mem_find(ghvm, region->label);
> + if (mapping) {
> + mutex_unlock(&ghvm->mm_lock);
> + return -EEXIST;
> + }
> +
> + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
> + if (!mapping) {
> + ret = -ENOMEM;
> + goto free_mapping;
how about,
mutex_unlock(&ghvm->mm_lock);
return -ENMEM;
> + }
> +
> + mapping->parcel.label = region->label;
> + mapping->guest_phys_addr = region->guest_phys_addr;
> + mapping->npages = region->memory_size >> PAGE_SHIFT;
> + parcel = &mapping->parcel;
> + parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later by mem_share/mem_lend */
> + parcel->mem_type = GH_RM_MEM_TYPE_NORMAL;
> +
> + /* Check for overlap */
> + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
> + if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
> + tmp_mapping->guest_phys_addr) ||
> + (mapping->guest_phys_addr >=
> + tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
> + ret = -EEXIST;
> + goto free_mapping;
> + }
> + }
> +
> + list_add(&mapping->list, &ghvm->memory_mappings);
> +
> + mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
> + if (!mapping->pages) {
> + ret = -ENOMEM;
> + mapping->npages = 0; /* update npages for reclaim */
> + goto reclaim;
> + }
> +
> + pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
> + FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
> + if (pinned < 0) {
> + ret = pinned;
> + mapping->npages = 0; /* update npages for reclaim */
> + goto reclaim;
> + } else if (pinned != mapping->npages) {
> + ret = -EFAULT;
> + mapping->npages = pinned; /* update npages for reclaim */
> + goto reclaim;
> + }
> +
> + if (region->flags & GH_MEM_LENT) {
> + parcel->n_acl_entries = 1;
> + mapping->share_type = VM_MEM_LEND;
> + } else {
> + parcel->n_acl_entries = 2;
> + mapping->share_type = VM_MEM_SHARE;
> + }
> + parcel->acl_entries = kcalloc(parcel->n_acl_entries, sizeof(*parcel->acl_entries),
> + GFP_KERNEL);
> + if (!parcel->acl_entries) {
> + ret = -ENOMEM;
> + goto reclaim;
> + }
> +
> + parcel->acl_entries[0].vmid = cpu_to_le16(ghvm->vmid);
new line
> + if (region->flags & GH_MEM_ALLOW_READ)
> + parcel->acl_entries[0].perms |= GH_RM_ACL_R;
> + if (region->flags & GH_MEM_ALLOW_WRITE)
> + parcel->acl_entries[0].perms |= GH_RM_ACL_W;
> + if (region->flags & GH_MEM_ALLOW_EXEC)
> + parcel->acl_entries[0].perms |= GH_RM_ACL_X;
> +
> + if (mapping->share_type == VM_MEM_SHARE) {
> + ret = gh_rm_get_vmid(ghvm->rm, &vmid);
> + if (ret)
> + goto reclaim;
> +
> + parcel->acl_entries[1].vmid = cpu_to_le16(vmid);
> + /* Host assumed to have all these permissions. Gunyah will not
> + * grant new permissions if host actually had less than RWX
> + */
> + parcel->acl_entries[1].perms |= GH_RM_ACL_R | GH_RM_ACL_W | GH_RM_ACL_X;
> + }
> +
> + mem_entries = kcalloc(mapping->npages, sizeof(*mem_entries), GFP_KERNEL);
> + if (!mem_entries) {
> + ret = -ENOMEM;
> + goto reclaim;
> + }
> +
> + /* reduce number of entries by combining contiguous pages into single memory entry */
> + prev_page = page_to_phys(mapping->pages[0]);
> + mem_entries[0].ipa_base = cpu_to_le64(prev_page);
> + entry_size = PAGE_SIZE;
new line
> + for (i = 1, j = 0; i < mapping->npages; i++) {
> + curr_page = page_to_phys(mapping->pages[i]);
> + if (page_contiguous(prev_page, curr_page)) {
> + entry_size += PAGE_SIZE;
> + } else {
> + mem_entries[j].size = cpu_to_le64(entry_size);
> + j++;
> + mem_entries[j].ipa_base = cpu_to_le64(curr_page);
> + entry_size = PAGE_SIZE;
> + }
> +
> + prev_page = curr_page;
> + }
> + mem_entries[j].size = cpu_to_le64(entry_size);
> +
> + parcel->n_mem_entries = j + 1;
> + parcel->mem_entries = kmemdup(mem_entries, sizeof(*mem_entries) * parcel->n_mem_entries,
> + GFP_KERNEL);
> + kfree(mem_entries);
> + if (!parcel->mem_entries) {
> + ret = -ENOMEM;
> + goto reclaim;
> + }
> +
> + mutex_unlock(&ghvm->mm_lock);
> + return 0;
> +reclaim:
> + gh_vm_mem_reclaim(ghvm, mapping);
> +free_mapping:
> + kfree(mapping);
> + mutex_unlock(&ghvm->mm_lock);
> + return ret;
> +}
> +
> +int gh_vm_mem_free(struct gh_vm *ghvm, u32 label)
> +{
> + struct gh_vm_mem *mapping;
> + int ret;
> +
> + ret = mutex_lock_interruptible(&ghvm->mm_lock);
> + if (ret)
> + return ret;
> +
> + mapping = __gh_vm_mem_find(ghvm, label);
> + if (!mapping)
> + goto out;
> +
> + gh_vm_mem_reclaim(ghvm, mapping);
> + kfree(mapping);
> +out:
> + mutex_unlock(&ghvm->mm_lock);
> + return ret;
> +}
> diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
> index 10ba32d2b0a6..d85d12119a48 100644
> --- a/include/uapi/linux/gunyah.h
> +++ b/include/uapi/linux/gunyah.h
> @@ -20,4 +20,37 @@
> */
> #define GH_CREATE_VM _IO(GH_IOCTL_TYPE, 0x0) /* Returns a Gunyah VM fd */
>
> +/*
> + * ioctls for VM fds
> + */
> +
> +/**
> + * struct gh_userspace_memory_region - Userspace memory descripion for GH_VM_SET_USER_MEM_REGION
> + * @label: Unique identifer to the region.
> + * @flags: Flags for memory parcel behavior
> + * @guest_phys_addr: Location of the memory region in guest's memory space (page-aligned)#
Note about overlapping here would be useful.
> + * @memory_size: Size of the region (page-aligned)
> + * @userspace_addr: Location of the memory region in caller (userspace)'s memory
> + *
> + * See Documentation/virt/gunyah/vm-manager.rst for further details.
> + */
> +struct gh_userspace_memory_region {
> + __u32 label;
> +#define GH_MEM_ALLOW_READ (1UL << 0)
> +#define GH_MEM_ALLOW_WRITE (1UL << 1)
> +#define GH_MEM_ALLOW_EXEC (1UL << 2)
> +/*
> + * The guest will be lent the memory instead of shared.
> + * In other words, the guest has exclusive access to the memory region and the host loses access.
> + */
> +#define GH_MEM_LENT (1UL << 3)
> + __u32 flags;
> + __u64 guest_phys_addr;
> + __u64 memory_size;
> + __u64 userspace_addr;
> +};
> +
> +#define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \
> + struct gh_userspace_memory_region)
> +
> #endif
Powered by blists - more mailing lists