[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d63330fa-de83-85de-c8ec-74cc90d680e3@linux.microsoft.com>
Date: Mon, 8 Mar 2021 11:14:13 -0800
From: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
To: Michael Kelley <mikelley@...rosoft.com>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>
Cc: "virtualization@...ts.linux-foundation.org"
<virtualization@...ts.linux-foundation.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"viremana@...ux.microsoft.com" <viremana@...ux.microsoft.com>,
Sunil Muthuswamy <sunilmut@...rosoft.com>,
"wei.liu@...nel.org" <wei.liu@...nel.org>,
Lillian Grassin-Drake <Lillian.GrassinDrake@...rosoft.com>,
KY Srinivasan <kys@...rosoft.com>
Subject: Re: [RFC PATCH 08/18] virt/mshv: map and unmap guest memory
On 2/8/2021 11:45 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@...ux.microsoft.com> Sent: Friday, November 20, 2020 4:30 PM
>>
>> Introduce ioctls for mapping and unmapping regions of guest memory.
>>
>> Uses a table of memory 'slots' similar to KVM, but the slot
>> number is not visible to userspace.
>>
>> For now, this simple implementation requires each new mapping to be
>> disjoint - the underlying hypercalls have no such restriction, and
>> implicitly overwrite any mappings on the pages in the specified regions.
>>
>> Co-developed-by: Lillian Grassin-Drake <ligrassi@...rosoft.com>
>> Signed-off-by: Lillian Grassin-Drake <ligrassi@...rosoft.com>
>> Signed-off-by: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
>> ---
>> Documentation/virt/mshv/api.rst | 15 ++
>> include/asm-generic/hyperv-tlfs.h | 15 ++
>> include/linux/mshv.h | 14 ++
>> include/uapi/asm-generic/hyperv-tlfs.h | 9 +
>> include/uapi/linux/mshv.h | 15 ++
>> virt/mshv/mshv_main.c | 322 ++++++++++++++++++++++++-
>> 6 files changed, 388 insertions(+), 2 deletions(-)
>>
>> diff --git a/Documentation/virt/mshv/api.rst b/Documentation/virt/mshv/api.rst
>> index ce651a1738e0..530efc29d354 100644
>> --- a/Documentation/virt/mshv/api.rst
>> +++ b/Documentation/virt/mshv/api.rst
>> @@ -72,3 +72,18 @@ it is open - this ioctl can only be called once per open.
>> This ioctl creates a guest partition, returning a file descriptor to use as a
>> handle for partition ioctls.
>>
>> +3.3 MSHV_MAP_GUEST_MEMORY and MSHV_UNMAP_GUEST_MEMORY
>> +-----------------------------------------------------
>> +:Type: partition ioctl
>> +:Parameters: struct mshv_user_mem_region
>> +:Returns: 0 on success
>> +
>> +Create a mapping from a region of process memory to a region of physical memory
>> +in a guest partition.
>
> Just to be super explicit:
>
> Create a mapping from memory in the user space of the calling process (running
> in the root partition) to a region of guest physical memory in a guest partition.
>
Thanks, yes this is clearer.
>> +
>> +Mappings must be disjoint in process address space and guest address space.
>> +
>> +Note: In the current implementation, this memory is pinned to stop the pages
>> +being moved by linux and subsequently clobbered by the hypervisor. So the region
>> +is backed by physical memory.
>
> Again to be super explicit:
>
> Note: In the current implementation, this memory is pinned to real physical
> memory to stop the pages being moved by Linux in the root partition,
> and subsequently being clobbered by the hypervisor. So the region is backed
> by real physical memory.
>
Yep, I'll update this also.
>> +
>> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
>> index 2a49503b7396..6e5072e29897 100644
>> --- a/include/asm-generic/hyperv-tlfs.h
>> +++ b/include/asm-generic/hyperv-tlfs.h
>> @@ -149,6 +149,8 @@ struct ms_hyperv_tsc_page {
>> #define HVCALL_GET_PARTITION_ID 0x0046
>> #define HVCALL_DEPOSIT_MEMORY 0x0048
>> #define HVCALL_WITHDRAW_MEMORY 0x0049
>> +#define HVCALL_MAP_GPA_PAGES 0x004b
>> +#define HVCALL_UNMAP_GPA_PAGES 0x004c
>> #define HVCALL_CREATE_VP 0x004e
>> #define HVCALL_GET_VP_REGISTERS 0x0050
>> #define HVCALL_SET_VP_REGISTERS 0x0051
>> @@ -827,4 +829,17 @@ struct hv_delete_partition {
>> u64 partition_id;
>> };
>>
>> +struct hv_map_gpa_pages {
>> + u64 target_partition_id;
>> + u64 target_gpa_base;
>> + u32 map_flags;
>
> Is there a reserved 32 bit field here? Hyper-V always aligns
> things on 64 bit boundaries.
>
The hypervisor code uses implicit padding here, and I copied it directly.
Yes, it should be 8 byte aligned. I will insert a padding field and add __packed.
>> + u64 source_gpa_page_list[];
>> +};
>> +
>> +struct hv_unmap_gpa_pages {
>> + u64 target_partition_id;
>> + u64 target_gpa_base;
>> + u32 unmap_flags;
>
> Is there a reserved 32 bit field here? Hyper-V always aligns
> things on 64 bit boundaries.
>
ditto as above.
>> +};
>
> Add __packed to the above two structs after sorting out
> the alignment issues.
>
Yep.
>> +
>> #endif
>> diff --git a/include/linux/mshv.h b/include/linux/mshv.h
>> index fc4f35089b2c..91a742f37440 100644
>> --- a/include/linux/mshv.h
>> +++ b/include/linux/mshv.h
>> @@ -7,13 +7,27 @@
>> */
>>
>> #include <linux/spinlock.h>
>> +#include <linux/mutex.h>
>> #include <uapi/linux/mshv.h>
>>
>> #define MSHV_MAX_PARTITIONS 128
>> +#define MSHV_MAX_MEM_REGIONS 64
>> +
>> +struct mshv_mem_region {
>> + u64 size; /* bytes */
>> + u64 guest_pfn;
>> + u64 userspace_addr; /* start of the userspace allocated memory */
>> + struct page **pages;
>> +};
>>
>> struct mshv_partition {
>> u64 id;
>> refcount_t ref_count;
>> + struct mutex mutex;
>> + struct {
>> + u32 count;
>> + struct mshv_mem_region slots[MSHV_MAX_MEM_REGIONS];
>> + } regions;
>> };
>>
>> struct mshv {
>> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-
>> tlfs.h
>> index 7a858226a9c5..e7b09b9f00de 100644
>> --- a/include/uapi/asm-generic/hyperv-tlfs.h
>> +++ b/include/uapi/asm-generic/hyperv-tlfs.h
>> @@ -12,4 +12,13 @@
>> #define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4)
>> #define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13)
>>
>> +/* HV Map GPA (Guest Physical Address) Flags */
>> +#define HV_MAP_GPA_PERMISSIONS_NONE 0x0
>> +#define HV_MAP_GPA_READABLE 0x1
>> +#define HV_MAP_GPA_WRITABLE 0x2
>> +#define HV_MAP_GPA_KERNEL_EXECUTABLE 0x4
>> +#define HV_MAP_GPA_USER_EXECUTABLE 0x8
>> +#define HV_MAP_GPA_EXECUTABLE 0xC
>> +#define HV_MAP_GPA_PERMISSIONS_MASK 0xF
>> +
>> #endif
>> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
>> index 4f8da9a6fde2..47be03ef4e86 100644
>> --- a/include/uapi/linux/mshv.h
>> +++ b/include/uapi/linux/mshv.h
>> @@ -18,10 +18,25 @@ struct mshv_create_partition {
>> struct hv_partition_creation_properties partition_creation_properties;
>> };
>>
>> +/*
>> + * Mappings can't overlap in GPA space or userspace
>> + * To unmap, these fields must match an existing mapping
>> + */
>> +struct mshv_user_mem_region {
>> + __u64 size; /* bytes */
>> + __u64 guest_pfn;
>> + __u64 userspace_addr; /* start of the userspace allocated memory */
>> + __u32 flags; /* ignored on unmap */
>> +};
>> +
>> #define MSHV_IOCTL 0xB8
>>
>> /* mshv device */
>> #define MSHV_REQUEST_VERSION _IOW(MSHV_IOCTL, 0x00, __u32)
>> #define MSHV_CREATE_PARTITION _IOW(MSHV_IOCTL, 0x01, struct mshv_create_partition)
>>
>> +/* partition device */
>> +#define MSHV_MAP_GUEST_MEMORY _IOW(MSHV_IOCTL, 0x02, struct mshv_user_mem_region)
>> +#define MSHV_UNMAP_GUEST_MEMORY _IOW(MSHV_IOCTL, 0x03, struct mshv_user_mem_region)
>> +
>> #endif
>> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
>> index 162a1bb42a4a..ce480598e67f 100644
>> --- a/virt/mshv/mshv_main.c
>> +++ b/virt/mshv/mshv_main.c
>> @@ -60,6 +60,10 @@ static struct miscdevice mshv_dev = {
>>
>> #define HV_WITHDRAW_BATCH_SIZE (PAGE_SIZE / sizeof(u64))
>> #define HV_INIT_PARTITION_DEPOSIT_PAGES 208
>> +#define HV_MAP_GPA_MASK (0x0000000FFFFFFFFFULL)
>> +#define HV_MAP_GPA_BATCH_SIZE \
>> + (PAGE_SIZE / sizeof(struct hv_map_gpa_pages) / sizeof(u64))
>
> Hmmm. Shouldn't this be:
>
> ((HV_HYP_PAGE_SIZE - sizeof(struct hv_map_gpa_pages))/sizeof(u64))
>
>
Yes! Not sure how that happened.
>> +#define PIN_PAGES_BATCH_SIZE (0x10000000 / PAGE_SIZE)
>>
>> static int
>> hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
>> @@ -245,16 +249,318 @@ hv_call_delete_partition(u64 partition_id)
>> return -hv_status_to_errno(status);
>> }
>>
>> +static int
>> +hv_call_map_gpa_pages(u64 partition_id,
>> + u64 gpa_target,
>> + u64 page_count, u32 flags,
>> + struct page **pages)
>> +{
>> + struct hv_map_gpa_pages *input_page;
>> + int status;
>> + int i;
>> + struct page **p;
>> + u32 completed = 0;
>> + u64 hypercall_status;
>> + unsigned long remaining = page_count;
>> + int rep_count;
>> + unsigned long irq_flags;
>> + int ret = 0;
>> +
>> + while (remaining) {
>> +
>> + rep_count = min(remaining, HV_MAP_GPA_BATCH_SIZE);
>> +
>> + local_irq_save(irq_flags);
>> + input_page = (struct hv_map_gpa_pages *)(*this_cpu_ptr(
>> + hyperv_pcpu_input_arg));
>> +
>> + input_page->target_partition_id = partition_id;
>> + input_page->target_gpa_base = gpa_target;
>> + input_page->map_flags = flags;
>> +
>> + for (i = 0, p = pages; i < rep_count; i++, p++)
>> + input_page->source_gpa_page_list[i] =
>> + page_to_pfn(*p) & HV_MAP_GPA_MASK;
>
> The masking seems a bit weird. The mask allows for up to 64G page frames,
> which is 256 Tbytes of total physical memory, which is probably the current
> Hyper-V limit on memory size (48 bit physical address space, though 52 bit
> physical address spaces are coming). So the masking shouldn't ever be doing
> anything. And if it was doing something, that probably should be treated as
> an error rather than simply dropping the high bits.
Good point - It looks like the mask isn't needed.
>
> Note that this code does not handle the case where PAGE_SIZE !=
> HV_HYP_PAGE_SIZE. But maybe we'll never run the root partition with a
> page size other than 4K.
>
For now on x86 it won't happen, but maybe on ARM?
It shouldn't be hard to support this case, especially since
PAGE_SIZE >= HV_HYP_PAGE_SIZE. Do you think we need it in this patch set?
>> + hypercall_status = hv_do_rep_hypercall(
>> + HVCALL_MAP_GPA_PAGES, rep_count, 0, input_page, NULL);
>> + local_irq_restore(irq_flags);
>> +
>> + status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
>> + completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
>> + HV_HYPERCALL_REP_COMP_OFFSET;
>> +
>> + if (status == HV_STATUS_INSUFFICIENT_MEMORY) {
>> + ret = hv_call_deposit_pages(NUMA_NO_NODE,
>> + partition_id, 256);
>
> Why adding 256 pages? I'm just contrasting with other places that add
> 1 page at a time. Maybe a comment to explain ....
>
Empirically determined. I'll add a #define and comment.
>> + if (ret)
>> + break;
>> + } else if (status != HV_STATUS_SUCCESS) {
>> + pr_err("%s: completed %llu out of %llu, %s\n",
>> + __func__,
>> + page_count - remaining, page_count,
>> + hv_status_to_string(status));
>> + ret = -hv_status_to_errno(status);
>> + break;
>> + }
>> +
>> + pages += completed;
>> + remaining -= completed;
>> + gpa_target += completed;
>> + }
>> +
>> + if (ret && completed) {
>
> Is the above the right test? Completed could be zero from the most
> recent iteration, but still could be partially succeeded based on a previous
> successful iteration. I think this needs to check whether remaining equals
> page_count.
>
You're right; I'll change it to (ret && remaining < page_count)
>> + pr_err("%s: Partially succeeded; mapped regions may be in invalid state",
>> + __func__);
>> + ret = -EBADFD;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +hv_call_unmap_gpa_pages(u64 partition_id,
>> + u64 gpa_target,
>> + u64 page_count, u32 flags)
>> +{
>> + struct hv_unmap_gpa_pages *input_page;
>> + int status;
>> + int ret = 0;
>> + u32 completed = 0;
>> + u64 hypercall_status;
>> + unsigned long remaining = page_count;
>> + int rep_count;
>> + unsigned long irq_flags;
>> +
>> + local_irq_save(irq_flags);
>> + input_page = (struct hv_unmap_gpa_pages *)(*this_cpu_ptr(
>> + hyperv_pcpu_input_arg));
>> +
>> + input_page->target_partition_id = partition_id;
>> + input_page->target_gpa_base = gpa_target;
>> + input_page->unmap_flags = flags;
>> +
>> + while (remaining) {
>> + rep_count = min(remaining, HV_MAP_GPA_BATCH_SIZE);
>> + hypercall_status = hv_do_rep_hypercall(
>> + HVCALL_UNMAP_GPA_PAGES, rep_count, 0, input_page, NULL);
>
> Similarly, this code doesn't handle PAGE_SIZE != HV_HYP_PAGE_SIZE.
>
As above - do we need this for this patch set? This won't happen on x86.
>> + status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
>> + completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
>> + HV_HYPERCALL_REP_COMP_OFFSET;
>> + if (status != HV_STATUS_SUCCESS) {
>> + pr_err("%s: completed %llu out of %llu, %s\n",
>> + __func__,
>> + page_count - remaining, page_count,
>> + hv_status_to_string(status));
>> + ret = -hv_status_to_errno(status);
>> + break;
>> + }
>> +
>> + remaining -= completed;
>> + gpa_target += completed;
>> + input_page->target_gpa_base = gpa_target;
>> + }
>> + local_irq_restore(irq_flags);
>
> I have some concern about holding interrupts disabled for this long.
>
How about I move the interrupt enabling/disabling inside the loop? i.e.:
while (remaining) {
local_irq_save(irq_flags);
input_page = (struct hv_unmap_gpa_pages *)(*this_cpu_ptr(
hyperv_pcpu_input_arg));
input_page->target_partition_id = partition_id;
input_page->target_gpa_base = gpa_target;
input_page->unmap_flags = flags;
rep_count = min(remaining, HV_MAP_GPA_BATCH_SIZE);
status = hv_do_rep_hypercall(
HVCALL_UNMAP_GPA_PAGES, rep_count, 0, input_page, NULL);
local_irq_restore(irq_flags);
completed = (status & HV_HYPERCALL_REP_COMP_MASK) >>
HV_HYPERCALL_REP_COMP_OFFSET;
status &= HV_HYPERCALL_RESULT_MASK;
if (status != HV_STATUS_SUCCESS) {
pr_err("%s: completed %llu out of %llu, %s\n",
__func__,
page_count - remaining, page_count,
hv_status_to_string(status));
ret = hv_status_to_errno(status);
break;
}
remaining -= completed;
gpa_target += completed;
}
>> +
>> + if (ret && completed) {
>
> Same comment as before.
>
Ditto as above.
>> + pr_err("%s: Partially succeeded; mapped regions may be in invalid state",
>> + __func__);
>> + ret = -EBADFD;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static long
>> +mshv_partition_ioctl_map_memory(struct mshv_partition *partition,
>> + struct mshv_user_mem_region __user *user_mem)
>> +{
>> + struct mshv_user_mem_region mem;
>> + struct mshv_mem_region *region;
>> + int completed;
>> + unsigned long remaining, batch_size;
>> + int i;
>> + struct page **pages;
>> + u64 page_count, user_start, user_end, gpfn_start, gpfn_end;
>> + u64 region_page_count, region_user_start, region_user_end;
>> + u64 region_gpfn_start, region_gpfn_end;
>> + long ret = 0;
>> +
>> + /* Check we have enough slots*/
>> + if (partition->regions.count == MSHV_MAX_MEM_REGIONS) {
>> + pr_err("%s: not enough memory region slots\n", __func__);
>> + return -ENOSPC;
>> + }
>> +
>> + if (copy_from_user(&mem, user_mem, sizeof(mem)))
>> + return -EFAULT;
>> +
>> + if (!mem.size ||
>> + mem.size & (PAGE_SIZE - 1) ||
>> + mem.userspace_addr & (PAGE_SIZE - 1) ||
>
> There's a PAGE_ALIGNED macro that expresses exactly what
> each of the previous two tests is doing.
>
Since these need to be HV_HYP_PAGE_SIZE aligned, I will add a
HV_HYP_PAGE_ALIGNED macro for this.
>> + !access_ok(mem.userspace_addr, mem.size))
>> + return -EINVAL;
>> +
>> + /* Reject overlapping regions */
>> + page_count = mem.size >> PAGE_SHIFT;
>> + user_start = mem.userspace_addr;
>> + user_end = mem.userspace_addr + mem.size;
>> + gpfn_start = mem.guest_pfn;
>> + gpfn_end = mem.guest_pfn + page_count;
>> + for (i = 0; i < MSHV_MAX_MEM_REGIONS; ++i) {
>> + region = &partition->regions.slots[i];
>> + if (!region->size)
>> + continue;
>> + region_page_count = region->size >> PAGE_SHIFT;
>> + region_user_start = region->userspace_addr;
>> + region_user_end = region->userspace_addr + region->size;
>> + region_gpfn_start = region->guest_pfn;
>> + region_gpfn_end = region->guest_pfn + region_page_count;
>> +
>> + if (!(
>> + (user_end <= region_user_start) ||
>> + (region_user_end <= user_start))) {
>> + return -EEXIST;
>> + }
>> + if (!(
>> + (gpfn_end <= region_gpfn_start) ||
>> + (region_gpfn_end <= gpfn_start))) {
>> + return -EEXIST;
>
> You could apply De Morgan's theorem to the conditions
> in each "if" statement and get rid of the "!". That might make
> these slightly easier to understand, but I have no strong
> preference.
>
I agree, I think that would be a bit clearer. I will change it.
>> + }
>> + }
>> +
>> + /* Pin the userspace pages */
>> + pages = vzalloc(sizeof(struct page *) * page_count);
>> + if (!pages)
>> + return -ENOMEM;
>> +
>> + remaining = page_count;
>> + while (remaining) {
>> + /*
>> + * We need to batch this, as pin_user_pages_fast with the
>> + * FOLL_LONGTERM flag does a big temporary allocation
>> + * of contiguous memory
>> + */
>> + batch_size = min(remaining, PIN_PAGES_BATCH_SIZE);
>> + completed = pin_user_pages_fast(
>> + mem.userspace_addr +
>> + (page_count - remaining) * PAGE_SIZE,
>> + batch_size,
>> + FOLL_WRITE | FOLL_LONGTERM,
>> + &pages[page_count - remaining]);
>> + if (completed < 0) {
>> + pr_err("%s: failed to pin user pages error %i\n",
>> + __func__,
>> + completed);
>> + ret = completed;
>> + goto err_unpin_pages;
>> + }
>> + remaining -= completed;
>> + }
>> +
>> + /* Map the pages to GPA pages */
>> + ret = hv_call_map_gpa_pages(partition->id, mem.guest_pfn,
>> + page_count, mem.flags, pages);
>> + if (ret)
>> + goto err_unpin_pages;
>> +
>> + /* Install the new region */
>> + for (i = 0; i < MSHV_MAX_MEM_REGIONS; ++i) {
>> + if (!partition->regions.slots[i].size) {
>> + region = &partition->regions.slots[i];
>> + break;
>> + }
>> + }
>> + region->pages = pages;
>> + region->size = mem.size;
>> + region->guest_pfn = mem.guest_pfn;
>> + region->userspace_addr = mem.userspace_addr;
>> +
>> + partition->regions.count++;
>> +
>> + return 0;
>> +
>> +err_unpin_pages:
>> + unpin_user_pages(pages, page_count - remaining);
>> + vfree(pages);
>> +
>> + return ret;
>> +}
>> +
>> +static long
>> +mshv_partition_ioctl_unmap_memory(struct mshv_partition *partition,
>> + struct mshv_user_mem_region __user *user_mem)
>> +{
>> + struct mshv_user_mem_region mem;
>> + struct mshv_mem_region *region_ptr;
>> + int i;
>> + u64 page_count;
>> + long ret;
>> +
>> + if (!partition->regions.count)
>> + return -EINVAL;
>> +
>> + if (copy_from_user(&mem, user_mem, sizeof(mem)))
>> + return -EFAULT;
>> +
>> + /* Find matching region */
>> + for (i = 0; i < MSHV_MAX_MEM_REGIONS; ++i) {
>> + if (!partition->regions.slots[i].size)
>> + continue;
>> + region_ptr = &partition->regions.slots[i];
>> + if (region_ptr->userspace_addr == mem.userspace_addr &&
>> + region_ptr->size == mem.size &&
>> + region_ptr->guest_pfn == mem.guest_pfn)
>> + break;
>> + }
>> +
>> + if (i == MSHV_MAX_MEM_REGIONS)
>> + return -EINVAL;
>> +
>> + page_count = region_ptr->size >> PAGE_SHIFT;
>> + ret = hv_call_unmap_gpa_pages(partition->id, region_ptr->guest_pfn,
>> + page_count, 0);
>> + if (ret)
>> + return ret;
>> +
>> + unpin_user_pages(region_ptr->pages, page_count);
>> + vfree(region_ptr->pages);
>> + memset(region_ptr, 0, sizeof(*region_ptr));
>> + partition->regions.count--;
>> +
>> + return 0;
>> +}
>> +
>> static long
>> mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
>> {
>> - return -ENOTTY;
>> + struct mshv_partition *partition = filp->private_data;
>> + long ret;
>> +
>> + if (mutex_lock_killable(&partition->mutex))
>> + return -EINTR;
>> +
>> + switch (ioctl) {
>> + case MSHV_MAP_GUEST_MEMORY:
>> + ret = mshv_partition_ioctl_map_memory(partition,
>> + (void __user *)arg);
>> + break;
>> + case MSHV_UNMAP_GUEST_MEMORY:
>> + ret = mshv_partition_ioctl_unmap_memory(partition,
>> + (void __user *)arg);
>> + break;
>> + default:
>> + ret = -ENOTTY;
>> + }
>> +
>> + mutex_unlock(&partition->mutex);
>> + return ret;
>> }
>>
>> static void
>> destroy_partition(struct mshv_partition *partition)
>> {
>> - unsigned long flags;
>> + unsigned long flags, page_count;
>> + struct mshv_mem_region *region;
>> int i;
>>
>> /* Remove from list of partitions */
>> @@ -286,6 +592,16 @@ destroy_partition(struct mshv_partition *partition)
>>
>> hv_call_delete_partition(partition->id);
>>
>> + /* Remove regions and unpin the pages */
>> + for (i = 0; i < MSHV_MAX_MEM_REGIONS; ++i) {
>> + region = &partition->regions.slots[i];
>> + if (!region->size)
>> + continue;
>> + page_count = region->size >> PAGE_SHIFT;
>> + unpin_user_pages(region->pages, page_count);
>> + vfree(region->pages);
>> + }
>> +
>> kfree(partition);
>> }
>>
>> @@ -353,6 +669,8 @@ mshv_ioctl_create_partition(void __user *user_arg)
>> if (!partition)
>> return -ENOMEM;
>>
>> + mutex_init(&partition->mutex);
>> +
>> fd = get_unused_fd_flags(O_CLOEXEC);
>> if (fd < 0) {
>> ret = fd;
>> --
>> 2.25.1
Powered by blists - more mailing lists