linux-kernel - Re: [PATCH v6 2/2] Drivers: hv: Introduce mshv

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <03c90b7d-e9b8-4f8f-9267-c273791077c2@linux.microsoft.com>
Date: Fri, 25 Jul 2025 11:24:20 +0530
From: Naman Jain <namjain@...ux.microsoft.com>
To: Michael Kelley <mhklinux@...look.com>,
 "K . Y . Srinivasan" <kys@...rosoft.com>,
 Haiyang Zhang <haiyangz@...rosoft.com>, Wei Liu <wei.liu@...nel.org>,
 Dexuan Cui <decui@...rosoft.com>
Cc: Roman Kisel <romank@...ux.microsoft.com>,
 Anirudh Rayabharam <anrayabh@...ux.microsoft.com>,
 Saurabh Sengar <ssengar@...ux.microsoft.com>,
 Stanislav Kinsburskii <skinsburskii@...ux.microsoft.com>,
 Nuno Das Neves <nunodasneves@...ux.microsoft.com>,
 ALOK TIWARI <alok.a.tiwari@...cle.com>,
 Markus Elfring <Markus.Elfring@....de>,
 "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
 "linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>
Subject: Re: [PATCH v6 2/2] Drivers: hv: Introduce mshv_vtl driver



On 7/25/2025 8:52 AM, Michael Kelley wrote:
> From: Naman Jain <namjain@...ux.microsoft.com> Sent: Thursday, July 24, 2025 1:26 AM
>>
> 
> Overall, this is looking really good. I have just a few comments embedded below.
> 
>> Provide an interface for Virtual Machine Monitor like OpenVMM and its
>> use as OpenHCL paravisor to control VTL0 (Virtual trust Level).
>> Expose devices and support IOCTLs for features like VTL creation,
>> VTL0 memory management, context switch, making hypercalls,
>> mapping VTL0 address space to VTL2 userspace, getting new VMBus
>> messages and channel events in VTL2 etc.
>>
>> Co-developed-by: Roman Kisel <romank@...ux.microsoft.com>
>> Signed-off-by: Roman Kisel <romank@...ux.microsoft.com>
>> Co-developed-by: Saurabh Sengar <ssengar@...ux.microsoft.com>
>> Signed-off-by: Saurabh Sengar <ssengar@...ux.microsoft.com>
>> Reviewed-by: Roman Kisel <romank@...ux.microsoft.com>
>> Reviewed-by: Alok Tiwari <alok.a.tiwari@...cle.com>
>> Message-ID: <20250512140432.2387503-3-namjain@...ux.microsoft.com>
> 
> This "Message-ID" line looks misplaced or just spurious.

Removed it, not sure where it got introduced, but I see it is not used 
commonly.

> 
>> Reviewed-by: Saurabh Sengar <ssengar@...ux.microsoft.com>
>> Reviewed-by: Nuno Das Neves <nunodasneves@...ux.microsoft.com>
>> Signed-off-by: Naman Jain <namjain@...ux.microsoft.com>
>> ---
>>   drivers/hv/Kconfig          |   22 +
>>   drivers/hv/Makefile         |    7 +-
>>   drivers/hv/mshv_vtl.h       |   52 ++
>>   drivers/hv/mshv_vtl_main.c  | 1508 +++++++++++++++++++++++++++++++++++
>>   include/hyperv/hvgdk_mini.h |  106 +++
>>   include/uapi/linux/mshv.h   |   80 ++
>>   6 files changed, 1774 insertions(+), 1 deletion(-)
> 
> Nice! 254 fewer lines inserted than in the previous version!

Yes :) thanks a lot for your review comments.

> 
>>   create mode 100644 drivers/hv/mshv_vtl.h
>>   create mode 100644 drivers/hv/mshv_vtl_main.c
>>
>> diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
>> index 57623ca7f350..2e8df09db599 100644
>> --- a/drivers/hv/Kconfig
>> +++ b/drivers/hv/Kconfig
>> @@ -73,4 +73,26 @@ config MSHV_ROOT
>>

<snip>

>> +
>> +static struct tasklet_struct msg_dpc;
>> +static wait_queue_head_t fd_wait_queue;
>> +static bool has_message;
>> +static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT];
>> +static DEFINE_MUTEX(flag_lock);
>> +static bool __read_mostly mshv_has_reg_page;
>> +
>> +/* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */
>> +#define MAX_BITMAP_SIZE BIT(16)
> 
> Or maybe per my comment below, use
> 
> #define MAX_BITMAP_BYTES ((U16_MAX + 1)/8)

I think I ignored the u8 part of the bitmap array. Will change it.

> 
>> +
>> +struct mshv_vtl_hvcall_fd {
>> +	u8 allow_bitmap[MAX_BITMAP_SIZE];
> 
> This is still too big. :-(  You'll get 64K bytes, which is 512K bits. You only need
> 64K bits.

True.

> 
>> +	bool allow_map_initialized;
>> +	/*
>> +	 * Used to protect hvcall setup in IOCTLs
>> +	 */
>> +	struct mutex init_mutex;
>> +	struct miscdevice *dev;
>> +};
>> +
>> +struct mshv_vtl_poll_file {
>> +	struct file *file;
>> +	wait_queue_entry_t wait;
>> +	wait_queue_head_t *wqh;
>> +	poll_table pt;
>> +	int cpu;
>> +};
>> +
>> +struct mshv_vtl {
>> +	struct device *module_dev;
>> +	u64 id;
>> +};
>> +
>> +struct mshv_vtl_per_cpu {
>> +	struct mshv_vtl_run *run;
>> +	struct page *reg_page;
>> +};
>> +
>> +/* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
>> +union hv_synic_overlay_page_msr {
>> +	u64 as_uint64;
>> +	struct {
>> +		u64 enabled: 1;
>> +		u64 reserved: 11;
>> +		u64 pfn: 52;
>> +	};
> 
> Add __packed to exactly match hv_synic_simp.

Acked.

> 
>> +};
>> +
>> +static struct mutex mshv_vtl_poll_file_lock;
>> +static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
>> +static union hv_register_vsm_capabilities mshv_vsm_capabilities;
>> +
>> +static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file);
>> +static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions);
>> +static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu);
>> +

<snip>

>> +static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg)
>> +{
>> +	struct mshv_vtl_ram_disposition vtl0_mem;
>> +	struct dev_pagemap *pgmap;
>> +	void *addr;
>> +
>> +	if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
>> +		return -EFAULT;
>> +	/* vlt0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design */
> 
> s/vlt0_mem/vtl0_mem/

Acked.

> 
>> +	if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
>> +		dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n",
>> +			vtl0_mem.start_pfn, vtl0_mem.last_pfn);
>> +		return -EFAULT;
>> +	}
>> +
>> +	pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
>> +	if (!pgmap)
>> +		return -ENOMEM;
>> +
>> +	pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
>> +	pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
>> +	pgmap->nr_range = 1;
>> +	pgmap->type = MEMORY_DEVICE_GENERIC;
>> +
>> +	/*
>> +	 * Determine the highest page order that can be used for the given memory range.
>> +	 * This works best when the range is aligned; i.e. both the start and the length.
>> +	 */
>> +	pgmap->vmemmap_shift = count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn);
>> +	dev_dbg(vtl->module_dev,
>> +		"Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n",
>> +		vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
>> +
>> +	addr = devm_memremap_pages(mem_dev, pgmap);
>> +	if (IS_ERR(addr)) {
>> +		dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr));
>> +		kfree(pgmap);
>> +		return -EFAULT;
>> +	}
>> +
>> +	/* Don't free pgmap, since it has to stick around until the memory
>> +	 * is unmapped, which will never happen as there is no scenario
>> +	 * where VTL0 can be released/shutdown without bringing down VTL2.
>> +	 */
>> +	return 0;
>> +}
>> +
>> +static void mshv_vtl_cancel(int cpu)
>> +{
>> +	int here = get_cpu();
>> +
>> +	if (here != cpu) {
>> +		if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1))
>> +			smp_send_reschedule(cpu);
>> +	} else {
>> +		WRITE_ONCE(mshv_vtl_this_run()->cancel, 1);
>> +	}
>> +	put_cpu();
>> +}
>> +
>> +static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
>> +{
>> +	struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait);
>> +
>> +	mshv_vtl_cancel(poll_file->cpu);
>> +
>> +	return 0;
>> +}
>> +
>> +static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
>> +{
>> +	struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt);
>> +
>> +	WARN_ON(poll_file->wqh);
>> +	poll_file->wqh = wqh;
>> +	add_wait_queue(wqh, &poll_file->wait);
>> +}
>> +
>> +static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input)
>> +{
>> +	struct file *file, *old_file;
>> +	struct mshv_vtl_poll_file *poll_file;
>> +	struct mshv_vtl_set_poll_file input;
>> +
>> +	if (copy_from_user(&input, user_input, sizeof(input)))
>> +		return -EFAULT;
>> +
>> +	if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu))
>> +		return -EINVAL;
>> +	/*
>> +	 * Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
> 
> More precisely, you mean "CPU hotplug" as opposed to "memory hotplug", though
> that should be evident from the context. (Memory hotplug may not be supported
> either, but that's not relevant here.)

Acked.

> 
>> +	 * CPU is expected to remain online after above cpu_online() check.
>> +	 */
>> +
>> +	file = NULL;
>> +	file = fget(input.fd);
>> +	if (!file)
>> +		return -EBADFD;
>> +
>> +	poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu));
>> +	if (!poll_file)
>> +		return -EINVAL;
>> +
>> +	guard(mutex)(&mshv_vtl_poll_file_lock);
>> +
>> +	if (poll_file->wqh)
>> +		remove_wait_queue(poll_file->wqh, &poll_file->wait);
>> +	poll_file->wqh = NULL;
>> +
>> +	old_file = poll_file->file;
>> +	poll_file->file = file;
>> +	poll_file->cpu = input.cpu;
>> +
>> +	if (file) {
>> +		init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake);
>> +		init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc);
>> +		vfs_poll(file, &poll_file->pt);
>> +	}
>> +
>> +	if (old_file)
>> +		fput(old_file);
> 
> Is it safe to call fput() while holding mshv_vtl_poll_file_lock? I don't know
> the answer, but the change to using "guard" has made the fput() within
> the scope of the lock, whereas previously it was not. My inclination would
> be to *not* hold mshv_vtl_poll_file_lock when calling fput(), but I can't
> immediately point to a problem that occur if the lock is held.
> 

I was also confused about it, but I think you are right, I can revert 
this to the previous implementation.

>> +
>> +	return 0;
>> +}
>> +
>> +/* Static table mapping register names to their corresponding actions */
>> +static const struct {
>> +	enum hv_register_name reg_name;
>> +	int debug_reg_num;  /* -1 if not a debug register */
>> +	u32 msr_addr;       /* 0 if not an MSR */
>> +} reg_table[] = {
>> +	/* Debug registers */
>> +	{HV_X64_REGISTER_DR0, 0, 0},
>> +	{HV_X64_REGISTER_DR1, 1, 0},
>> +	{HV_X64_REGISTER_DR2, 2, 0},
>> +	{HV_X64_REGISTER_DR3, 3, 0},
>> +	{HV_X64_REGISTER_DR6, 6, 0},
>> +	/* MTRR MSRs */
>> +	{HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
>> +	{HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
>> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
>> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
>> +};
>> +
>> +static int mshv_vtl_set_reg(struct hv_register_assoc *regs)
>> +{
>> +	u64 reg64;
>> +	enum hv_register_name gpr_name;
>> +	int i;
>> +
>> +	gpr_name = regs->name;
>> +	reg64 = regs->value.reg64;
>> +
>> +	/* Search for the register in the table */
>> +	for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
>> +		if (reg_table[i].reg_name == gpr_name) {
>> +			if (reg_table[i].debug_reg_num != -1) {
>> +				/* Handle debug registers */
>> +				if (gpr_name == HV_X64_REGISTER_DR6 &&
>> +				    !mshv_vsm_capabilities.dr6_shared)
>> +					goto hypercall;
>> +				native_set_debugreg(reg_table[i].debug_reg_num, reg64);
>> +			} else {
>> +				/* Handle MSRs */
>> +				wrmsrl(reg_table[i].msr_addr, reg64);
>> +			}
>> +			return 0;
>> +		}
>> +	}
>> +
>> +hypercall:
>> +	return 1;
>> +}
>> +
>> +static int mshv_vtl_get_reg(struct hv_register_assoc *regs)
>> +{
>> +	u64 *reg64;
>> +	enum hv_register_name gpr_name;
>> +	int i;
>> +
>> +	gpr_name = regs->name;
>> +	reg64 = (u64 *)&regs->value.reg64;
>> +
>> +	/* Search for the register in the table */
>> +	for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
>> +		if (reg_table[i].reg_name == gpr_name) {
>> +			if (reg_table[i].debug_reg_num != -1) {
>> +				/* Handle debug registers */
>> +				if (gpr_name == HV_X64_REGISTER_DR6 &&
>> +				    !mshv_vsm_capabilities.dr6_shared)
>> +					goto hypercall;
>> +				*reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
>> +			} else {
>> +				/* Handle MSRs */
>> +				rdmsrl(reg_table[i].msr_addr, *reg64);
>> +			}
>> +			return 0;
>> +		}
>> +	}
>> +
>> +hypercall:
>> +	return 1;
>> +}
> 
> Nice! You incorporated the debug registers as well into the static
> table and lookup functions. I count 224 fewer source code lines.

Yes. Thanks.

> 
>> +
>> +static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
>> +{


<snip>

>> +
>> +static long
>> +mshv_vtl_ioctl_get_regs(void __user *user_args)
>> +{
>> +	struct mshv_vp_registers args;
>> +	struct hv_register_assoc *reg;
>> +	long ret;
>> +
>> +	if (copy_from_user(&args, user_args, sizeof(args)))
>> +		return -EFAULT;
>> +
>> +	/*  This IOCTL supports processing only one register at a time. */
>> +	if (args.count != 1)
>> +		return -EINVAL;
>> +
>> +	reg = kmalloc(sizeof(*reg), GFP_KERNEL);
> 
> If handling only one register, there's no need to kmalloc. Just
> declare the struct hv_register_assoc directly on the stack
> instead of a pointer to such. Then the error paths are
> simpler because memory doesn't need to be freed.

Acked. Makes sense.

> 
>> +	if (!reg)
>> +		return -ENOMEM;
>> +
>> +	if (copy_from_user(reg, (void __user *)args.regs_ptr,
>> +			   sizeof(*reg))) {
>> +		ret = -EFAULT;
>> +		goto free_return;
>> +	}
>> +
>> +	ret = mshv_vtl_get_reg(reg);
>> +	if (!ret)
>> +		goto copy_args; /* No need of hypercall */
>> +	ret = vtl_get_vp_register(reg);
>> +	if (ret)
>> +		goto free_return;
>> +
>> +copy_args:
>> +	if (copy_to_user((void __user *)args.regs_ptr, reg, sizeof(*reg)))
>> +		ret = -EFAULT;
>> +free_return:
>> +	kfree(reg);
>> +
>> +	return ret;
>> +}
>> +
>> +static long
>> +mshv_vtl_ioctl_set_regs(void __user *user_args)
>> +{
>> +	struct mshv_vp_registers args;
>> +	struct hv_register_assoc *reg;
>> +	long ret;
>> +
>> +	if (copy_from_user(&args, user_args, sizeof(args)))
>> +		return -EFAULT;
>> +
>> +	/*  This IOCTL supports processing only one register at a time. */
>> +	if (args.count != 1)
>> +		return -EINVAL;
>> +
>> +	reg = kmalloc(sizeof(*reg), GFP_KERNEL);
> 
> Same here.  Declare struct hv_register_assoc on the stack.

Acked.

> 
>> +	if (!reg)
>> +		return -ENOMEM;
>> +
>> +	if (copy_from_user(reg, (void __user *)args.regs_ptr, sizeof(*reg))) {
>> +		ret = -EFAULT;
>> +		goto free_return;
>> +	}
>> +
>> +	ret = mshv_vtl_set_reg(reg);
>> +	if (!ret)
>> +		goto free_return; /* No need of hypercall */
>> +	ret = vtl_set_vp_register(reg);
>> +
>> +free_return:
>> +	kfree(reg);
>> +
>> +	return ret;
>> +}
>> +

<snip>

>> +
>> +static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg)
>> +{
>> +	u64 input, status;
>> +	struct mshv_vtl_signal_event signal_event;
>> +
>> +	if (copy_from_user(&signal_event, arg, sizeof(signal_event)))
>> +		return -EFAULT;
>> +
>> +	input = signal_event.connection_id | ((u64)signal_event.flag << 32);
>> +
>> +	status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input) & HV_HYPERCALL_RESULT_MASK;
>> +	if (status)
> 
> Don't AND with HV_HYPERCALL_RESULT_MASK and then test the result.
> You can just do "return hv_result_to_errno(status)". The function will
> return zero if there's no error. See other uses of hv_result_to_errno() for
> the various patterns. We want to get away from AND'ing with
> HV_HYPERCALL_RESULT_MASK and instead always use the helper
> functions.

Acked.

> 
>> +		return hv_result_to_errno(status);
>> +	return 0;
>> +}
>> +
>> +static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg)
>> +{
>> +	struct mshv_vtl_set_eventfd set_eventfd;
>> +	struct eventfd_ctx *eventfd, *old_eventfd;
>> +
>> +	if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd)))
>> +		return -EFAULT;
>> +	if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT)
>> +		return -EINVAL;
>> +
>> +	eventfd = NULL;
>> +	if (set_eventfd.fd >= 0) {
>> +		eventfd = eventfd_ctx_fdget(set_eventfd.fd);
>> +		if (IS_ERR(eventfd))
>> +			return PTR_ERR(eventfd);
>> +	}
>> +
>> +	guard(mutex)(&flag_lock);
>> +	old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]);
>> +	WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd);
>> +
>> +	if (old_eventfd) {
>> +		synchronize_rcu();
>> +		eventfd_ctx_put(old_eventfd);
> 
> Again, I wonder if is OK to do eventfd_ctx_put() while holding
> flag_lock, since the use of guard() changes the scope of the lock
> compared with the previous version of this patch.
> 

I didn't find eventfd_ctx_put() to be a blocking operation, so I thought
of keeping guard() here. Although, synchronize_rcu() is a blocking
operation. Please advise, I am Ok with removing the guard, as the lock
is just being used here, and automatic cleanup should not be an issue
here.


>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int mshv_vtl_sint_ioctl_pause_message_stream(struct mshv_sint_mask __user *arg)
>> +{
>> +	static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex);
>> +	struct mshv_sint_mask mask;
>> +
>> +	if (copy_from_user(&mask, arg, sizeof(mask)))
>> +		return -EFAULT;
>> +	guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
>> +	on_each_cpu((smp_call_func_t)mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
>> +	WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
>> +	if (mask.mask)
>> +		wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
> 
> Doing this wakeup is probably safe to do while holding the lock.

Acked.

> 
>> +
>> +	return 0;
>> +}
>> +

<snip>

>> +};
>> +
>> +struct mshv_vtl_set_poll_file {
>> +	__u32 cpu;
>> +	__u32 fd;
>> +};
>> +
>> +struct mshv_vtl_hvcall_setup {
>> +	__u64 bitmap_array_size;
>> +	__u64 allow_bitmap_ptr; /* pointer to __u64 */
> 
> I don't think the comment is relevant. The unit of
> memory allocation in user space doesn't affect kernel
> code. And perhaps add a comment that bitmap_array_size
> is a *byte* count! :-)

Acked.

> 
>> +};
>> +
>> +struct mshv_vtl_hvcall {
>> +	__u64 control;      /* Hypercall control code */
>> +	__u64 input_size;   /* Size of the input data */
>> +	__u64 input_ptr;    /* Pointer to the input struct */
>> +	__u64 status;       /* Status of the hypercall (output) */
>> +	__u64 output_size;  /* Size of the output data */
>> +	__u64 output_ptr;   /* Pointer to the output struct */
>> +};
>> +
>> +struct mshv_sint_mask {
>> +	__u8 mask;
>> +	__u8 reserved[7];
>> +};
>> +
>> +/* /dev/mshv device IOCTL */
>> +#define MSHV_CHECK_EXTENSION    _IOW(MSHV_IOCTL, 0x00, __u32)
>> +
>> +/* vtl device */
>> +#define MSHV_CREATE_VTL			_IOR(MSHV_IOCTL, 0x1D, char)
>> +#define MSHV_ADD_VTL0_MEMORY	_IOW(MSHV_IOCTL, 0x21, struct mshv_vtl_ram_disposition)
>> +#define MSHV_SET_POLL_FILE		_IOW(MSHV_IOCTL, 0x25, struct mshv_vtl_set_poll_file)
>> +#define MSHV_RETURN_TO_LOWER_VTL	_IO(MSHV_IOCTL, 0x27)
>> +#define MSHV_GET_VP_REGISTERS		_IOWR(MSHV_IOCTL, 0x05, struct mshv_vp_registers)
>> +#define MSHV_SET_VP_REGISTERS		_IOW(MSHV_IOCTL, 0x06, struct mshv_vp_registers)
>> +
>> +/* VMBus device IOCTLs */
>> +#define MSHV_SINT_SIGNAL_EVENT    _IOW(MSHV_IOCTL, 0x22, struct mshv_vtl_signal_event)
>> +#define MSHV_SINT_POST_MESSAGE    _IOW(MSHV_IOCTL, 0x23, struct mshv_vtl_sint_post_msg)
>> +#define MSHV_SINT_SET_EVENTFD     _IOW(MSHV_IOCTL, 0x24, struct mshv_vtl_set_eventfd)
>> +#define MSHV_SINT_PAUSE_MESSAGE_STREAM     _IOW(MSHV_IOCTL, 0x25, struct mshv_sint_mask)
>> +
>> +/* hv_hvcall device */
>> +#define MSHV_HVCALL_SETUP        _IOW(MSHV_IOCTL, 0x1E, struct mshv_vtl_hvcall_setup)
>> +#define MSHV_HVCALL              _IOWR(MSHV_IOCTL, 0x1F, struct mshv_vtl_hvcall)
>>   #endif
>> --
>> 2.34.1

Thanks for the review comments.

Regards,
Naman