linux-kernel - Re: [RFC PATCH 10/18] virt/mshv: get and set vcpu registers ioctls

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c7972607-7d99-5dad-354d-6ca8e8be0d8a@linux.microsoft.com>
Date:   Mon, 8 Mar 2021 17:39:28 -0800
From:   Nuno Das Neves <nunodasneves@...ux.microsoft.com>
To:     Michael Kelley <mikelley@...rosoft.com>,
        "linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>
Cc:     "virtualization@...ts.linux-foundation.org" 
        <virtualization@...ts.linux-foundation.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "viremana@...ux.microsoft.com" <viremana@...ux.microsoft.com>,
        Sunil Muthuswamy <sunilmut@...rosoft.com>,
        "wei.liu@...nel.org" <wei.liu@...nel.org>,
        Lillian Grassin-Drake <Lillian.GrassinDrake@...rosoft.com>,
        KY Srinivasan <kys@...rosoft.com>
Subject: Re: [RFC PATCH 10/18] virt/mshv: get and set vcpu registers ioctls

On 2/8/2021 11:47 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@...ux.microsoft.com> Sent: Friday, November 20, 2020 4:30 PM
>>
>> Add ioctls for getting and setting virtual processor registers.
>>
>> Co-developed-by: Lillian Grassin-Drake <ligrassi@...rosoft.com>
>> Signed-off-by: Lillian Grassin-Drake <ligrassi@...rosoft.com>
>> Signed-off-by: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
>> ---
>>  Documentation/virt/mshv/api.rst         |  11 +
>>  arch/x86/include/uapi/asm/hyperv-tlfs.h | 601 ++++++++++++++++++++++++
>>  include/asm-generic/hyperv-tlfs.h       |  65 +--
>>  include/linux/mshv.h                    |   1 +
>>  include/uapi/linux/mshv.h               |  12 +
>>  virt/mshv/mshv_main.c                   | 258 +++++++++-
>>  6 files changed, 903 insertions(+), 45 deletions(-)
>>
[snip]
>> +
>> +union hv_register_value {
>> +	struct hv_u128 reg128;
>> +	__u64 reg64;
>> +	__u32 reg32;
>> +	__u16 reg16;
>> +	__u8 reg8;
>> +	union hv_x64_fp_register fp;
>> +	union hv_x64_fp_control_status_register fp_control_status;
>> +	union hv_x64_xmm_control_status_register xmm_control_status;
>> +	struct hv_x64_segment_register segment;
>> +	struct hv_x64_table_register table;
>> +	union hv_explicit_suspend_register explicit_suspend;
>> +	union hv_intercept_suspend_register intercept_suspend;
>> +	union hv_dispatch_suspend_register dispatch_suspend;
>> +	union hv_x64_interrupt_state_register interrupt_state;
>> +	union hv_x64_pending_interruption_register pending_interruption;
>> +	union hv_x64_msr_npiep_config_contents npiep_config;
>> +	union hv_x64_pending_exception_event pending_exception_event;
>> +	union hv_x64_pending_virtualization_fault_event
>> +		pending_virtualization_fault_event;
>> +};
>> +
>>  #endif
>> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
>> index 6e5072e29897..b9295400c20b 100644
>> --- a/include/asm-generic/hyperv-tlfs.h
>> +++ b/include/asm-generic/hyperv-tlfs.h
>> @@ -622,53 +622,30 @@ struct hv_retarget_device_interrupt {
>>  } __packed __aligned(8);
>>
>>
>> -/* HvGetVpRegisters hypercall input with variable size reg name list*/
>> -struct hv_get_vp_registers_input {
>> -	struct {
>> -		u64 partitionid;
>> -		u32 vpindex;
>> -		u8  inputvtl;
>> -		u8  padding[3];
>> -	} header;
>> -	struct input {
>> -		u32 name0;
>> -		u32 name1;
>> -	} element[];
>> -} __packed;
>> -
>> +/* HvGetVpRegisters hypercall with variable size reg name list*/
>> +struct hv_get_vp_registers {
>> +	u64 partition_id;
>> +	u32 vp_index;
>> +	u8  input_vtl;
>> +	u8  rsvd_z8;
>> +	u16 rsvd_z16;
>> +	__aligned(8) enum hv_register_name names[];
>> +} __aligned(8);
>>
>> -/* HvGetVpRegisters returns an array of these output elements */
>> -struct hv_get_vp_registers_output {
>> -	union {
>> -		struct {
>> -			u32 a;
>> -			u32 b;
>> -			u32 c;
>> -			u32 d;
>> -		} as32 __packed;
>> -		struct {
>> -			u64 low;
>> -			u64 high;
>> -		} as64 __packed;
>> -	};
>> +/* HvSetVpRegisters hypercall with variable size reg name/value list*/
>> +struct hv_register_assoc {
>> +	enum hv_register_name name;
>> +	__aligned(16) union hv_register_value value;
>>  };
>>
>> -/* HvSetVpRegisters hypercall with variable size reg name/value list*/
>> -struct hv_set_vp_registers_input {
>> -	struct {
>> -		u64 partitionid;
>> -		u32 vpindex;
>> -		u8  inputvtl;
>> -		u8  padding[3];
>> -	} header;
>> -	struct {
>> -		u32 name;
>> -		u32 padding1;
>> -		u64 padding2;
>> -		u64 valuelow;
>> -		u64 valuehigh;
>> -	} element[];
>> -} __packed;
>> +struct hv_set_vp_registers {
>> +	u64 partition_id;
>> +	u32 vp_index;
>> +	u8  input_vtl;
>> +	u8  rsvd_z8;
>> +	u16 rsvd_z16;
>> +	struct hv_register_assoc elements[];
>> +} __aligned(16);
> 
> Throughout these structures, I think the approach needs to be more
> explicit about the memory layout.  The current definitions assume that
> the compiler is inserting padding in the expected places, and not in
> any unexpected places.  My previous concerns about use of enum
> also apply.
> 
> The code also removes some layouts that are used in the
> not-yet-accepted patches for ARM64.   Let sync on how to get
> those back in.
> 

I'll add __packed to all these structures.
The hv_register_name enum can be replaced by #defines, and the type can be
u32 or u64 (it only needs 32 bits).

I'll sync with you on the ARM64 structs.

>>
>>  enum hv_device_type {
>>  	HV_DEVICE_TYPE_LOGICAL = 0,
>> diff --git a/include/linux/mshv.h b/include/linux/mshv.h
>> index 50521c5f7948..dfe469f573f9 100644
>> --- a/include/linux/mshv.h
>> +++ b/include/linux/mshv.h
>> @@ -17,6 +17,7 @@
>>  struct mshv_vp {
>>  	u32 index;
>>  	struct mshv_partition *partition;
>> +	struct mutex mutex;
>>  };
>>
>>  struct mshv_mem_region {
>> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
>> index 1f053eae68a6..5d53ed655429 100644
>> --- a/include/uapi/linux/mshv.h
>> +++ b/include/uapi/linux/mshv.h
>> @@ -33,6 +33,14 @@ struct mshv_create_vp {
>>  	__u32 vp_index;
>>  };
>>
>> +#define MSHV_VP_MAX_REGISTERS	128
>> +
>> +struct mshv_vp_registers {
>> +	int count; /* at most MSHV_VP_MAX_REGISTERS */
>> +	enum hv_register_name *names;
>> +	union hv_register_value *values;
>> +};
> 
> Having separate arrays for the names and values results in an extra
> copy of the data down in the ioctl code.  Any reason the caller couldn't
> supply the data as an array, where each entry is already a name/value
> pair?
> 

I initially thought it would not make a difference to the number of copies,
but it turns out it does. I will change it to use hv_register_assoc everywhere.

>> +
>>  #define MSHV_IOCTL 0xB8
>>
>>  /* mshv device */
>> @@ -44,4 +52,8 @@ struct mshv_create_vp {
>>  #define MSHV_UNMAP_GUEST_MEMORY	_IOW(MSHV_IOCTL, 0x03, struct
>> mshv_user_mem_region)
>>  #define MSHV_CREATE_VP		_IOW(MSHV_IOCTL, 0x04, struct mshv_create_vp)
>>
>> +/* vp device */
>> +#define MSHV_GET_VP_REGISTERS   _IOWR(MSHV_IOCTL, 0x05, struct
>> mshv_vp_registers)
>> +#define MSHV_SET_VP_REGISTERS   _IOW(MSHV_IOCTL, 0x06, struct mshv_vp_registers)
>> +
>>  #endif
>> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
>> index 3be9d9a468c1..2a10137a1e84 100644
>> --- a/virt/mshv/mshv_main.c
>> +++ b/virt/mshv/mshv_main.c
>> @@ -74,6 +74,12 @@ static struct miscdevice mshv_dev = {
>>  #define HV_MAP_GPA_BATCH_SIZE	\
>>  		(PAGE_SIZE / sizeof(struct hv_map_gpa_pages) / sizeof(u64))
>>  #define PIN_PAGES_BATCH_SIZE	(0x10000000 / PAGE_SIZE)
>> +#define HV_GET_REGISTER_BATCH_SIZE	\
>> +	(PAGE_SIZE / \
>> +	 sizeof(struct hv_get_vp_registers) / sizeof(enum hv_register_name))
>> +#define HV_SET_REGISTER_BATCH_SIZE	\
>> +	(PAGE_SIZE / \
>> +	 sizeof(struct hv_set_vp_registers) / sizeof(struct hv_register_assoc))
> 
> These new size calculations have the same bug as HV_MAP_GPA_BATCH_SIZE.
> The first divide operations should be subtraction.
> 

Yep, I'll fix it.

> With the correct calculation, HV_GET_REGISTER_BATCH_SIZE  will be
> too large.  The input page will accommodate more 32 bit register names
> than the output page will accommodate 128 bit register values.  The limit
> should be based on the latter, not the former.  Or calculate both the
> input and output limit and use the minimum.
> 

I didn't think about this previously! Will fix.

>>
>>  static int
>>  hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
>> @@ -380,10 +386,258 @@ hv_call_unmap_gpa_pages(u64 partition_id,
>>  	return ret;
>>  }
>>
>> +static int
>> +hv_call_get_vp_registers(u32 vp_index,
>> +			 u64 partition_id,
>> +			 u16 count,
>> +			 const enum hv_register_name *names,
>> +			 union hv_register_value *values)
>> +{
>> +	struct hv_get_vp_registers *input_page;
>> +	union hv_register_value *output_page;
>> +	u16 completed = 0;
>> +	u64 hypercall_status;
>> +	unsigned long remaining = count;
>> +	int rep_count;
>> +	int status;
>> +	unsigned long flags;
>> +
>> +	local_irq_save(flags);
>> +
>> +	input_page = (struct hv_get_vp_registers *)(*this_cpu_ptr(
>> +		hyperv_pcpu_input_arg));
>> +	output_page = (union hv_register_value *)(*this_cpu_ptr(
>> +		hyperv_pcpu_output_arg));
>> +
>> +	input_page->partition_id = partition_id;
>> +	input_page->vp_index = vp_index;
>> +	input_page->input_vtl = 0;
>> +	input_page->rsvd_z8 = 0;
>> +	input_page->rsvd_z16 = 0;
>> +
>> +	while (remaining) {
>> +		rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);
>> +		memcpy(input_page->names, names,
>> +			sizeof(enum hv_register_name) * rep_count);
>> +
>> +		hypercall_status =
>> +			hv_do_rep_hypercall(HVCALL_GET_VP_REGISTERS, rep_count,
>> +					    0, input_page, output_page);
>> +		status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
>> +		if (status != HV_STATUS_SUCCESS) {
>> +			pr_err("%s: completed %li out of %u, %s\n",
>> +			       __func__,
>> +			       count - remaining, count,
>> +			       hv_status_to_string(status));
>> +			break;
>> +		}
>> +		completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
>> +			    HV_HYPERCALL_REP_COMP_OFFSET;
>> +		memcpy(values, output_page,
>> +			sizeof(union hv_register_value) * completed);
>> +
>> +		names += completed;
>> +		values += completed;
>> +		remaining -= completed;
>> +	}
>> +	local_irq_restore(flags);
>> +
>> +	return -hv_status_to_errno(status);
>> +}
>> +
>> +static int
>> +hv_call_set_vp_registers(u32 vp_index,
>> +			 u64 partition_id,
>> +			 u16 count,
>> +			 struct hv_register_assoc *registers)
>> +{
>> +	struct hv_set_vp_registers *input_page;
>> +	u16 completed = 0;
>> +	u64 hypercall_status;
>> +	unsigned long remaining = count;
>> +	int rep_count;
>> +	int status;
>> +	unsigned long flags;
>> +
>> +	local_irq_save(flags);
>> +	input_page = (struct hv_set_vp_registers *)(*this_cpu_ptr(
>> +		hyperv_pcpu_input_arg));
>> +
>> +	input_page->partition_id = partition_id;
>> +	input_page->vp_index = vp_index;
>> +	input_page->input_vtl = 0;
>> +	input_page->rsvd_z8 = 0;
>> +	input_page->rsvd_z16 = 0;
>> +
>> +	while (remaining) {
>> +		rep_count = min(remaining, HV_SET_REGISTER_BATCH_SIZE);
>> +		memcpy(input_page->elements, registers,
>> +			sizeof(struct hv_register_assoc) * rep_count);
>> +
>> +		hypercall_status =
>> +			hv_do_rep_hypercall(HVCALL_SET_VP_REGISTERS, rep_count,
>> +					    0, input_page, NULL);
>> +		status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
>> +		if (status != HV_STATUS_SUCCESS) {
>> +			pr_err("%s: completed %li out of %u, %s\n",
>> +			       __func__,
>> +			       count - remaining, count,
>> +			       hv_status_to_string(status));
>> +			break;
>> +		}
>> +		completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
>> +			    HV_HYPERCALL_REP_COMP_OFFSET;
>> +		registers += completed;
>> +		remaining -= completed;
>> +	}
>> +
>> +	local_irq_restore(flags);
>> +
>> +	return -hv_status_to_errno(status);
>> +}
>> +
>> +static long
>> +mshv_vp_ioctl_get_regs(struct mshv_vp *vp, void __user *user_args)
>> +{
>> +	struct mshv_vp_registers args;
>> +	enum hv_register_name *names;
>> +	union hv_register_value *values;
>> +	long ret;
>> +
>> +	if (copy_from_user(&args, user_args, sizeof(args)))
>> +		return -EFAULT;
>> +
>> +	if (args.count > MSHV_VP_MAX_REGISTERS)
>> +		return -EINVAL;
>> +
>> +	names = kmalloc_array(args.count,
>> +			      sizeof(enum hv_register_name),
>> +			      GFP_KERNEL);
>> +	if (!names)
>> +		return -ENOMEM;
>> +
>> +	values = kmalloc_array(args.count,
>> +			       sizeof(union hv_register_value),
>> +			       GFP_KERNEL);
>> +	if (!values) {
>> +		kfree(names);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	if (copy_from_user(names, args.names,
>> +			   sizeof(enum hv_register_name) * args.count)) {
>> +		ret = -EFAULT;
>> +		goto free_return;
>> +	}
>> +
>> +	ret = hv_call_get_vp_registers(vp->index, vp->partition->id,
>> +				       args.count, names, values);
>> +	if (ret)
>> +		goto free_return;
>> +
>> +	if (copy_to_user(args.values, values,
>> +			 sizeof(union hv_register_value) * args.count)) {
>> +		ret = -EFAULT;
>> +	}
>> +
>> +free_return:
>> +	kfree(names);
>> +	kfree(values);
>> +	return ret;
>> +}
>> +
>> +static long
>> +mshv_vp_ioctl_set_regs(struct mshv_vp *vp, void __user *user_args)
>> +{
>> +	int i;
>> +	struct mshv_vp_registers args;
>> +	struct hv_register_assoc *registers;
>> +	enum hv_register_name *names;
>> +	union hv_register_value *values;
>> +	long ret;
>> +
>> +	if (copy_from_user(&args, user_args, sizeof(args)))
>> +		return -EFAULT;
>> +
>> +	if (args.count > MSHV_VP_MAX_REGISTERS)
>> +		return -EINVAL;
>> +
>> +	names = kmalloc_array(args.count,
>> +			      sizeof(enum hv_register_name),
>> +			      GFP_KERNEL);
>> +	if (!names)
>> +		return -ENOMEM;
>> +
>> +	values = kmalloc_array(args.count,
>> +			       sizeof(union hv_register_value),
>> +			       GFP_KERNEL);
>> +	if (!values) {
>> +		kfree(names);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	registers = kmalloc_array(args.count,
>> +				  sizeof(struct hv_register_assoc),
>> +				  GFP_KERNEL);
>> +	if (!registers) {
>> +		kfree(values);
>> +		kfree(names);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	if (copy_from_user(names, args.names,
>> +			   sizeof(enum hv_register_name) * args.count)) {
>> +		ret = -EFAULT;
>> +		goto free_return;
>> +	}
>> +
>> +	if (copy_from_user(values, args.values,
>> +			   sizeof(union hv_register_value) * args.count)) {
>> +		ret = -EFAULT;
>> +		goto free_return;
>> +	}
>> +
>> +	for (i = 0; i < args.count; i++) {
>> +		memcpy(&registers[i].name, &names[i],
>> +		       sizeof(enum hv_register_name));
>> +		memcpy(&registers[i].value, &values[i],
>> +		       sizeof(union hv_register_value));
>> +	}
> 
> The above will result in uninitialized memory being sent to
> Hyper-V, since there is implicit padding associated with the
> 32 bit name field.
> 

This shouldn't be an issue after I change this to use hv_register_assoc,
instead of separate names and values buffers.

>> +
>> +	ret = hv_call_set_vp_registers(vp->index, vp->partition->id,
>> +				       args.count, registers);
>> +
>> +free_return:
>> +	kfree(names);
>> +	kfree(values);
>> +	kfree(registers);
>> +	return ret;
>> +}
>> +
>> +
>>  static long
>>  mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
>>  {
>> -	return -ENOTTY;
>> +	struct mshv_vp *vp = filp->private_data;
>> +	long r = 0;
>> +
>> +	if (mutex_lock_killable(&vp->mutex))
>> +		return -EINTR;
>> +
>> +	switch (ioctl) {
>> +	case MSHV_GET_VP_REGISTERS:
>> +		r = mshv_vp_ioctl_get_regs(vp, (void __user *)arg);
>> +		break;
>> +	case MSHV_SET_VP_REGISTERS:
>> +		r = mshv_vp_ioctl_set_regs(vp, (void __user *)arg);
>> +		break;
>> +	default:
>> +		r = -ENOTTY;
>> +		break;
>> +	}
>> +	mutex_unlock(&vp->mutex);
>> +
>> +	return r;
>>  }
>>
>>  static int
>> @@ -420,6 +674,8 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
>>  	if (!vp)
>>  		return -ENOMEM;
>>
>> +	mutex_init(&vp->mutex);
>> +
>>  	vp->index = args.vp_index;
>>  	vp->partition = mshv_partition_get(partition);
>>  	if (!vp->partition) {
>> --
>> 2.25.1