lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e6cc796d-f9ee-5203-95a9-05906f95d3f8@linux.microsoft.com>
Date:   Thu, 4 Mar 2021 15:49:17 -0800
From:   Nuno Das Neves <nunodasneves@...ux.microsoft.com>
To:     Michael Kelley <mikelley@...rosoft.com>,
        "linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>
Cc:     "virtualization@...ts.linux-foundation.org" 
        <virtualization@...ts.linux-foundation.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "viremana@...ux.microsoft.com" <viremana@...ux.microsoft.com>,
        Sunil Muthuswamy <sunilmut@...rosoft.com>,
        "wei.liu@...nel.org" <wei.liu@...nel.org>,
        Lillian Grassin-Drake <Lillian.GrassinDrake@...rosoft.com>,
        KY Srinivasan <kys@...rosoft.com>
Subject: Re: [RFC PATCH 06/18] virt/mshv: create, initialize, finalize, delete
 partition hypercalls

On 2/8/2021 11:42 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@...ux.microsoft.com> Sent: Friday, November 20, 2020 4:30 PM
>>
>> Add hypercalls for fully setting up and mostly tearing down a guest
>> partition.
>> The teardown operation will generate an error as the deposited
>> memory has not been withdrawn.
>> This is fixed in the next patch.
>>
>> Co-developed-by: Lillian Grassin-Drake <ligrassi@...rosoft.com>
>> Signed-off-by: Lillian Grassin-Drake <ligrassi@...rosoft.com>
>> Signed-off-by: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
>> ---
>>  include/asm-generic/hyperv-tlfs.h      |  52 +++++++-
>>  include/uapi/asm-generic/hyperv-tlfs.h |   1 +
>>  include/uapi/linux/mshv.h              |   1 +
>>  virt/mshv/mshv_main.c                  | 169 ++++++++++++++++++++++++-
>>  4 files changed, 220 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
>> index 2ff580780ce4..ab6ae6c164f5 100644
>> --- a/include/asm-generic/hyperv-tlfs.h
>> +++ b/include/asm-generic/hyperv-tlfs.h
>> @@ -142,6 +142,10 @@ struct ms_hyperv_tsc_page {
>>  #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX	0x0013
>>  #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX	0x0014
>>  #define HVCALL_SEND_IPI_EX			0x0015
>> +#define HVCALL_CREATE_PARTITION			0x0040
>> +#define HVCALL_INITIALIZE_PARTITION		0x0041
>> +#define HVCALL_FINALIZE_PARTITION		0x0042
>> +#define HVCALL_DELETE_PARTITION			0x0043
>>  #define HVCALL_GET_PARTITION_ID			0x0046
>>  #define HVCALL_DEPOSIT_MEMORY			0x0048
>>  #define HVCALL_CREATE_VP			0x004e
>> @@ -451,7 +455,7 @@ struct hv_get_partition_id {
>>  struct hv_deposit_memory {
>>  	u64 partition_id;
>>  	u64 gpa_page_list[];
>> -} __packed;
>> +};
> 
> Why remove __packed?
> 

I change it to match the hyperv structures exactly.
I will re-add __packed here and explicitly lay out all the
structures as you have suggested.

>>
>>  struct hv_proximity_domain_flags {
>>  	u32 proximity_preferred : 1;
>> @@ -767,4 +771,50 @@ struct hv_input_unmap_device_interrupt {
>>  #define HV_SOURCE_SHADOW_NONE               0x0
>>  #define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE   0x1
>>
>> +#define HV_MAKE_COMPATIBILITY_VERSION(major_, minor_)                          \
>> +	((u32)((major_) << 8 | (minor_)))
>> +
>> +enum hv_compatibility_version {
>> +	HV_COMPATIBILITY_19_H1 = HV_MAKE_COMPATIBILITY_VERSION(0X6, 0X5),
>> +	HV_COMPATIBILITY_MANGANESE = HV_MAKE_COMPATIBILITY_VERSION(0X6, 0X7),
> 
> Avoid use of "Manganese", which is an internal code name.  I'd suggest calling it
> 20_H1 instead, which at least has some broader meaning.
> 
>> +	HV_COMPATIBILITY_PRERELEASE = HV_MAKE_COMPATIBILITY_VERSION(0XFE, 0X0),
>> +	HV_COMPATIBILITY_EXPERIMENT = HV_MAKE_COMPATIBILITY_VERSION(0XFF, 0X0),
>> +};
>> +
>> +union hv_partition_isolation_properties {
>> +	u64 as_uint64;
>> +	struct {
>> +		u64 isolation_type: 5;
>> +		u64 rsvd_z: 7;
>> +		u64 shared_gpa_boundary_page_number: 52;
>> +	};
>> +};
> 
> Add __packed.
> 

Will do.

>> +
>> +/* Non-userspace-visible partition creation flags */
>> +#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION                    BIT(8)
>> +
>> +struct hv_create_partition_in {
>> +	u64 flags;
>> +	union hv_proximity_domain_info proximity_domain_info;
>> +	enum hv_compatibility_version compatibility_version;
> 
> An "enum" is a 32 bit value in gcc and I would presume that
> Hyper-V is expecting a 64 bit value.  In general, using an enum in a data
> structure with exact layout requirements is problematic because the "C"
> language doesn't specify how big an enum is.  In such cases, it's better
> to use an integer field with an explicit size (like u64) and #defines for
> the possible values.
> 

Will do.

>> +	struct hv_partition_creation_properties partition_creation_properties;
>> +	union hv_partition_isolation_properties isolation_properties;
>> +};
>> +
>> +struct hv_create_partition_out {
>> +	u64 partition_id;
>> +};
>> +
>> +struct hv_initialize_partition {
>> +	u64 partition_id;
>> +};
>> +
>> +struct hv_finalize_partition {
>> +	u64 partition_id;
>> +};
>> +
>> +struct hv_delete_partition {
>> +	u64 partition_id;
>> +};
> 
> All of the above should have __packed for consistency with the other
> Hyper-V data structures.
> 

Will do.

>> +
>>  #endif
>> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-
>> tlfs.h
>> index 140cc0b4f98f..7a858226a9c5 100644
>> --- a/include/uapi/asm-generic/hyperv-tlfs.h
>> +++ b/include/uapi/asm-generic/hyperv-tlfs.h
>> @@ -6,6 +6,7 @@
>>  #define BIT(X)	(1ULL << (X))
>>  #endif
>>
>> +/* Userspace-visible partition creation flags */
> 
> Could this comment be included in the earlier patch with the #defines so
> that you avoid the trivial change here?
> 

Yep, will do.

>>  #define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST                BIT(0)
>>  #define HV_PARTITION_CREATION_FLAG_GPA_LARGE_PAGES_DISABLED         BIT(3)
>>  #define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED          BIT(4)
>> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
>> index 3788f8bc5caa..4f8da9a6fde2 100644
>> --- a/include/uapi/linux/mshv.h
>> +++ b/include/uapi/linux/mshv.h
>> @@ -9,6 +9,7 @@
>>
>>  #include <linux/types.h>
>>  #include <asm/hyperv-tlfs.h>
>> +#include <asm-generic/hyperv-tlfs.h>
> 
> Similarly, consider adding this #include in the earlier patch so that
> this trivial change isn't needed here.
> 

Will do.

>>
>>  #define MSHV_VERSION	0x0
>>
>> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
>> index 4dcbe4907430..c4130a6508e5 100644
>> --- a/virt/mshv/mshv_main.c
>> +++ b/virt/mshv/mshv_main.c
>> @@ -15,6 +15,7 @@
>>  #include <linux/file.h>
>>  #include <linux/anon_inodes.h>
>>  #include <linux/mshv.h>
>> +#include <asm/mshyperv.h>
>>
>>  MODULE_AUTHOR("Microsoft");
>>  MODULE_LICENSE("GPL");
>> @@ -31,7 +32,6 @@ static struct mshv mshv = {};
>>  static void mshv_partition_put(struct mshv_partition *partition);
>>  static int mshv_partition_release(struct inode *inode, struct file *filp);
>>  static long mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
>> -
> 
> Spurious whitespace change?
> 

Yes - removed it.

>>  static int mshv_dev_open(struct inode *inode, struct file *filp);
>>  static int mshv_dev_release(struct inode *inode, struct file *filp);
>>  static long mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
>> @@ -57,6 +57,143 @@ static struct miscdevice mshv_dev = {
>>  	.mode = 600,
>>  };
>>
>> +#define HV_INIT_PARTITION_DEPOSIT_PAGES 208
> 
> A comment about how this value is determined would be useful.
> I'm assuming it was determined empirically.
> 

Correct - I'll add a comment.

>> +
>> +static int
>> +hv_call_create_partition(
>> +		u64 flags,
>> +		struct hv_partition_creation_properties creation_properties,
>> +		u64 *partition_id)
>> +{
>> +	struct hv_create_partition_in *input;
>> +	struct hv_create_partition_out *output;
>> +	int status;
>> +	int ret;
>> +	unsigned long irq_flags;
>> +	int i;
>> +
>> +	do {
>> +		local_irq_save(irq_flags);
>> +		input = (struct hv_create_partition_in *)(*this_cpu_ptr(
>> +			hyperv_pcpu_input_arg));
>> +		output = (struct hv_create_partition_out *)(*this_cpu_ptr(
>> +			hyperv_pcpu_output_arg));
>> +
>> +		input->flags = flags;
>> +		input->proximity_domain_info.as_uint64 = 0;
>> +		input->compatibility_version = HV_COMPATIBILITY_MANGANESE;
>> +		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURE_BANKS; ++i)
>> +			input->partition_creation_properties
>> +				.disabled_processor_features.as_uint64[i] = 0;
>> +		input->partition_creation_properties
>> +			.disabled_processor_xsave_features.as_uint64 = 0;
>> +		input->isolation_properties.as_uint64 = 0;
>> +
>> +		status = hv_do_hypercall(HVCALL_CREATE_PARTITION,
>> +					 input, output);
> 
> hv_do_hypercall returns a u64, which should then be masked with
> HV_HYPERCALL_RESULT_MASK before checking the result.
> 

Yes, I'll fix this everywhere.

>> +		if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
>> +			if (status == HV_STATUS_SUCCESS)
>> +				*partition_id = output->partition_id;
>> +			else
>> +				pr_err("%s: %s\n",
>> +				       __func__, hv_status_to_string(status));
>> +			local_irq_restore(irq_flags);
>> +			ret = -hv_status_to_errno(status);
>> +			break;
>> +		}
>> +		local_irq_restore(irq_flags);
>> +		ret = hv_call_deposit_pages(NUMA_NO_NODE,
>> +					    hv_current_partition_id, 1);
>> +	} while (!ret);
>> +
>> +	return ret;
>> +}
>> +
>> +static int
>> +hv_call_initialize_partition(u64 partition_id)
>> +{
>> +	struct hv_initialize_partition *input;
>> +	int status;
>> +	int ret;
>> +	unsigned long flags;
>> +
>> +	ret = hv_call_deposit_pages(
>> +				NUMA_NO_NODE,
>> +				partition_id,
>> +				HV_INIT_PARTITION_DEPOSIT_PAGES);
>> +	if (ret)
>> +		return ret;
>> +
>> +	do {
>> +		local_irq_save(flags);
>> +		input = (struct hv_initialize_partition *)(*this_cpu_ptr(
>> +			hyperv_pcpu_input_arg));
>> +		input->partition_id = partition_id;
>> +
>> +		status = hv_do_hypercall(
>> +				HVCALL_INITIALIZE_PARTITION,
>> +				input, NULL);
> 
> FWIW, since the input is a single 64 bit value, and there's no output,
> this could use hv_do_fast_hypercall8() instead, and avoid
> needing to use the input arg page and the irq save/restore.  Would have
> to check that the particular hypercall supports the "fast" version.
> 

Good idea! I tested it and confirmed it works.

>> +		local_irq_restore(flags);
>> +
>> +		if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> 
> Same comment about status being u64 and masking.
> 

Will do.

>> +			if (status != HV_STATUS_SUCCESS)
>> +				pr_err("%s: %s\n",
>> +				       __func__, hv_status_to_string(status));
>> +			ret = -hv_status_to_errno(status);
>> +			break;
>> +		}
>> +		ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
>> +	} while (!ret);
>> +
>> +	return ret;
>> +}
>> +
>> +static int
>> +hv_call_finalize_partition(u64 partition_id)
>> +{
>> +	struct hv_finalize_partition *input;
>> +	int status;
>> +	unsigned long flags;
>> +
>> +	local_irq_save(flags);
>> +	input = (struct hv_finalize_partition *)(*this_cpu_ptr(
>> +		hyperv_pcpu_input_arg));
>> +
>> +	input->partition_id = partition_id;
>> +	status = hv_do_hypercall(
>> +			HVCALL_FINALIZE_PARTITION,
>> +			input, NULL);
>> +	local_irq_restore(flags);
> 
> 
> Same comment about hv_do_fast_hypercall8() and about status
> being a u64 and masking.
> 

Will do.

>> +
>> +	if (status != HV_STATUS_SUCCESS)
>> +		pr_err("%s: %s\n", __func__, hv_status_to_string(status));
>> +
>> +	return -hv_status_to_errno(status);
>> +}
>> +
>> +static int
>> +hv_call_delete_partition(u64 partition_id)
>> +{
>> +	struct hv_delete_partition *input;
>> +	int status;
>> +	unsigned long flags;
>> +
>> +	local_irq_save(flags);
>> +	input = (struct hv_delete_partition *)(*this_cpu_ptr(
>> +		hyperv_pcpu_input_arg));
>> +
>> +	input->partition_id = partition_id;
>> +	status = hv_do_hypercall(
>> +			HVCALL_DELETE_PARTITION,
>> +			input, NULL);
>> +	local_irq_restore(flags);
> 
> Same comments about hv_do_fast_hypercall8(), and
> the status and masking.
> 

Will do.

>> +
>> +	if (status != HV_STATUS_SUCCESS)
>> +		pr_err("%s: %s\n", __func__, hv_status_to_string(status));
>> +
>> +	return -hv_status_to_errno(status);
>> +}
>> +
>>  static long
>>  mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
>>  {
>> @@ -86,6 +223,17 @@ destroy_partition(struct mshv_partition *partition)
>>
>>  	spin_unlock_irqrestore(&mshv.partitions.lock, flags);
>>
>> +	/*
>> +	 * There are no remaining references to the partition or vps,
>> +	 * so the remaining cleanup can be lockless
>> +	 */
>> +
>> +	/* Deallocates and unmaps everything including vcpus, GPA mappings etc */
>> +	hv_call_finalize_partition(partition->id);
>> +	/* TODO: Withdraw and free all pages we deposited */
>> +
>> +	hv_call_delete_partition(partition->id);
>> +
>>  	kfree(partition);
>>  }
>>
>> @@ -146,6 +294,9 @@ mshv_ioctl_create_partition(void __user *user_arg)
>>  	if (copy_from_user(&args, user_arg, sizeof(args)))
>>  		return -EFAULT;
>>
>> +	/* Only support EXO partitions */
>> +	args.flags |= HV_PARTITION_CREATION_FLAG_EXO_PARTITION;
>> +
>>  	partition = kzalloc(sizeof(*partition), GFP_KERNEL);
>>  	if (!partition)
>>  		return -ENOMEM;
>> @@ -156,11 +307,21 @@ mshv_ioctl_create_partition(void __user *user_arg)
>>  		goto free_partition;
>>  	}
>>
>> +	ret = hv_call_create_partition(args.flags,
>> +				       args.partition_creation_properties,
>> +				       &partition->id);
>> +	if (ret)
>> +		goto put_fd;
>> +
>> +	ret = hv_call_initialize_partition(partition->id);
>> +	if (ret)
>> +		goto delete_partition;
>> +
>>  	file = anon_inode_getfile("mshv_partition", &mshv_partition_fops,
>>  				  partition, O_RDWR);
>>  	if (IS_ERR(file)) {
>>  		ret = PTR_ERR(file);
>> -		goto put_fd;
>> +		goto finalize_partition;
>>  	}
>>  	refcount_set(&partition->ref_count, 1);
>>
>> @@ -174,6 +335,10 @@ mshv_ioctl_create_partition(void __user *user_arg)
>>
>>  release_file:
>>  	file->f_op->release(file->f_inode, file);
>> +finalize_partition:
>> +	hv_call_finalize_partition(partition->id);
>> +delete_partition:
>> +	hv_call_delete_partition(partition->id);
>>  put_fd:
>>  	put_unused_fd(fd);
>>  free_partition:
>> --
>> 2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ