[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0044d83d-e207-50d4-ce71-00d605c6ea7c@linux.microsoft.com>
Date: Fri, 5 Mar 2021 13:01:36 -0800
From: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
To: Michael Kelley <mikelley@...rosoft.com>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>
Cc: "virtualization@...ts.linux-foundation.org"
<virtualization@...ts.linux-foundation.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"viremana@...ux.microsoft.com" <viremana@...ux.microsoft.com>,
Sunil Muthuswamy <sunilmut@...rosoft.com>,
"wei.liu@...nel.org" <wei.liu@...nel.org>,
Lillian Grassin-Drake <Lillian.GrassinDrake@...rosoft.com>,
KY Srinivasan <kys@...rosoft.com>
Subject: Re: [RFC PATCH 07/18] virt/mshv: withdraw memory hypercall
On 2/8/2021 11:44 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@...ux.microsoft.com> Sent: Friday, November 20, 2020 4:30 PM
>>
>> Withdraw the memory from a finalized partition and free the pages.
>> The partition is now cleaned up correctly when the fd is released.
>>
>> Co-developed-by: Lillian Grassin-Drake <ligrassi@...rosoft.com>
>> Signed-off-by: Lillian Grassin-Drake <ligrassi@...rosoft.com>
>> Signed-off-by: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
>> ---
>> include/asm-generic/hyperv-tlfs.h | 10 ++++++
>> virt/mshv/mshv_main.c | 54 ++++++++++++++++++++++++++++++-
>> 2 files changed, 63 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
>> index ab6ae6c164f5..2a49503b7396 100644
>> --- a/include/asm-generic/hyperv-tlfs.h
>> +++ b/include/asm-generic/hyperv-tlfs.h
>> @@ -148,6 +148,7 @@ struct ms_hyperv_tsc_page {
>> #define HVCALL_DELETE_PARTITION 0x0043
>> #define HVCALL_GET_PARTITION_ID 0x0046
>> #define HVCALL_DEPOSIT_MEMORY 0x0048
>> +#define HVCALL_WITHDRAW_MEMORY 0x0049
>> #define HVCALL_CREATE_VP 0x004e
>> #define HVCALL_GET_VP_REGISTERS 0x0050
>> #define HVCALL_SET_VP_REGISTERS 0x0051
>> @@ -472,6 +473,15 @@ union hv_proximity_domain_info {
>> u64 as_uint64;
>> };
>>
>> +struct hv_withdraw_memory_in {
>> + u64 partition_id;
>> + union hv_proximity_domain_info proximity_domain_info;
>> +};
>> +
>> +struct hv_withdraw_memory_out {
>> + u64 gpa_page_list[0];
>
> For a variable size array, the Linux kernel community has an effort
> underway to replace occurrences of [0] and [1] with just []. I think
> [] can be used here.
>
It seems the compiler doesn't like that, because there's no other members in the struct?
./include/asm-generic/hyperv-tlfs.h:452:6: error: flexible array member in a struct with no named members
452 | u64 gpa_page_list[];
I'll add a comment explaining that it's a hack to work around the compiler.
>> +};
>> +
>
> Add __packed to the above two structs.
>
Will do.
>> struct hv_lp_startup_status {
>> u64 hv_status;
>> u64 substatus1;
>> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
>> index c4130a6508e5..162a1bb42a4a 100644
>> --- a/virt/mshv/mshv_main.c
>> +++ b/virt/mshv/mshv_main.c
>> @@ -14,6 +14,7 @@
>> #include <linux/slab.h>
>> #include <linux/file.h>
>> #include <linux/anon_inodes.h>
>> +#include <linux/mm.h>
>> #include <linux/mshv.h>
>> #include <asm/mshyperv.h>
>>
>> @@ -57,8 +58,58 @@ static struct miscdevice mshv_dev = {
>> .mode = 600,
>> };
>>
>> +#define HV_WITHDRAW_BATCH_SIZE (PAGE_SIZE / sizeof(u64))
>
> Use HV_HYP_PAGE_SIZE so that we're explicit that the dependency
> is on the page size used by Hyper-V, which might be different from the
> guest page size (at least on architectures like ARM64).
>
Yes, will do.
>> #define HV_INIT_PARTITION_DEPOSIT_PAGES 208
>>
>> +static int
>> +hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
>> +{
>> + struct hv_withdraw_memory_in *input_page;
>> + struct hv_withdraw_memory_out *output_page;
>> + u16 completed;
>> + u64 hypercall_status;
>> + unsigned long remaining = count;
>> + int status;
>> + int i;
>> + unsigned long flags;
>> +
>> + while (remaining) {
>> + local_irq_save(flags);
>> +
>> + input_page = (struct hv_withdraw_memory_in *)(*this_cpu_ptr(
>> + hyperv_pcpu_input_arg));
>> + output_page = (struct hv_withdraw_memory_out *)(*this_cpu_ptr(
>> + hyperv_pcpu_output_arg));
>> +
>> + input_page->partition_id = partition_id;
>> + input_page->proximity_domain_info.as_uint64 = 0;
>> + hypercall_status = hv_do_rep_hypercall(
>> + HVCALL_WITHDRAW_MEMORY,
>> + min(remaining, HV_WITHDRAW_BATCH_SIZE), 0, input_page,
>> + output_page);
>> +
>> + completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
>> + HV_HYPERCALL_REP_COMP_OFFSET;
>> +
>> + for (i = 0; i < completed; i++)
>> + __free_page(pfn_to_page(output_page->gpa_page_list[i]));
>> +
>> + local_irq_restore(flags);
>
> Seems like there's some risk that we have interrupts disabled for too long.
> We could be calling __free_page() up to 512 times. It might be better for this
> function to allocate its own page to be used as the output page, so that interrupts
> can be enabled immediately after the hypercall completes. Then the __free_page()
> loop can execute with interrupts enabled. We have the per-cpu input and output
> pages to avoid the overhead of allocating/freeing pages for each hypercall, but in this
> case a private output page might be warranted.
>
Good idea, I'll do that.
>> +
>> + status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
>> + if (status != HV_STATUS_SUCCESS) {
>> + if (status != HV_STATUS_NO_RESOURCES)
>> + pr_err("%s: %s\n", __func__,
>> + hv_status_to_string(status));
>> + break;
>> + }
>> +
>> + remaining -= completed;
>> + }
>> +
>> + return -hv_status_to_errno(status);
>> +}
>> +
>> static int
>> hv_call_create_partition(
>> u64 flags,
>> @@ -230,7 +281,8 @@ destroy_partition(struct mshv_partition *partition)
>>
>> /* Deallocates and unmaps everything including vcpus, GPA mappings etc */
>> hv_call_finalize_partition(partition->id);
>> - /* TODO: Withdraw and free all pages we deposited */
>> + /* Withdraw and free all pages we deposited */
>> + hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->id);
>>
>> hv_call_delete_partition(partition->id);
>>
>> --
>> 2.25.1
Powered by blists - more mailing lists