linux-kernel - Re: [PATCH v2 15/15] Drivers: hv: Add modules to expose /dev/mshv to VMMs running on Hyper-V

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <664aec4c-7ea9-447f-afab-9e31e9e106c1@linux.microsoft.com>
Date:   Fri, 18 Aug 2023 11:59:34 -0700
From:   Nuno Das Neves <nunodasneves@...ux.microsoft.com>
To:     Saurabh Singh Sengar <ssengar@...rosoft.com>,
        "linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "x86@...nel.org" <x86@...nel.org>,
        "linux-arm-kernel@...ts.infradead.org" 
        <linux-arm-kernel@...ts.infradead.org>,
        "linux-arch@...r.kernel.org" <linux-arch@...r.kernel.org>
Cc:     "patches@...ts.linux.dev" <patches@...ts.linux.dev>,
        "Michael Kelley (LINUX)" <mikelley@...rosoft.com>,
        KY Srinivasan <kys@...rosoft.com>,
        "wei.liu@...nel.org" <wei.liu@...nel.org>,
        Haiyang Zhang <haiyangz@...rosoft.com>,
        Dexuan Cui <decui@...rosoft.com>,
        "apais@...ux.microsoft.com" <apais@...ux.microsoft.com>,
        Tianyu Lan <Tianyu.Lan@...rosoft.com>,
        "ssengar@...ux.microsoft.com" <ssengar@...ux.microsoft.com>,
        MUKESH RATHOR <mukeshrathor@...rosoft.com>,
        "stanislav.kinsburskiy@...il.com" <stanislav.kinsburskiy@...il.com>,
        "jinankjain@...ux.microsoft.com" <jinankjain@...ux.microsoft.com>,
        vkuznets <vkuznets@...hat.com>,
        "tglx@...utronix.de" <tglx@...utronix.de>,
        "mingo@...hat.com" <mingo@...hat.com>,
        "bp@...en8.de" <bp@...en8.de>,
        "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
        "hpa@...or.com" <hpa@...or.com>,
        "will@...nel.org" <will@...nel.org>,
        "catalin.marinas@....com" <catalin.marinas@....com>
Subject: Re: [PATCH v2 15/15] Drivers: hv: Add modules to expose /dev/mshv to
 VMMs running on Hyper-V

On 8/18/2023 6:08 AM, Saurabh Singh Sengar wrote:
>> +
>> +config MSHV_VTL
>> +	tristate "Microsoft Hyper-V VTL driver"
>> +	depends on MSHV
>> +	select HYPERV_VTL_MODE
>> +	select TRANSPARENT_HUGEPAGE
> 
> TRANSPARENT_HUGEPAGE can be avoided for now.
> 

I will remove it in the next version. Thanks.
>> +
>> +#define HV_GET_REGISTER_BATCH_SIZE	\
>> +	(HV_HYP_PAGE_SIZE / sizeof(union hv_register_value))
>> +#define HV_SET_REGISTER_BATCH_SIZE	\
>> +	((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_set_vp_registers)) \
>> +		/ sizeof(struct hv_register_assoc))
>> +
>> +int hv_call_get_vp_registers(
>> +		u32 vp_index,
>> +		u64 partition_id,
>> +		u16 count,
>> +		union hv_input_vtl input_vtl,
>> +		struct hv_register_assoc *registers)
>> +{
>> +	struct hv_input_get_vp_registers *input_page;
>> +	union hv_register_value *output_page;
>> +	u16 completed = 0;
>> +	unsigned long remaining = count;
>> +	int rep_count, i;
>> +	u64 status;
>> +	unsigned long flags;
>> +
>> +	local_irq_save(flags);
>> +
>> +	input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
>> +	output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
>> +
>> +	input_page->partition_id = partition_id;
>> +	input_page->vp_index = vp_index;
>> +	input_page->input_vtl.as_uint8 = input_vtl.as_uint8;
>> +	input_page->rsvd_z8 = 0;
>> +	input_page->rsvd_z16 = 0;
>> +
>> +	while (remaining) {
>> +		rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);
>> +		for (i = 0; i < rep_count; ++i)
>> +			input_page->names[i] = registers[i].name;
>> +
>> +		status = hv_do_rep_hypercall(HVCALL_GET_VP_REGISTERS,
>> rep_count,
>> +					     0, input_page, output_page);
> 
> Is there any possibility that count value is passed 0 by mistake ? In that case
> status will remain uninitialized. 
> 

These lines ensure rep_count is never 0 here:

	while (remaining) {
		rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);

Remaining can't be 0 or the loop would exit, and HV_GET_REGISTER_BATCH_SIZE
is not 0, or we would never get any registers.

>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>> index 13f972e72375..ccd76f30a638 100644
>> --- a/drivers/hv/hv_common.c
>> +++ b/drivers/hv/hv_common.c
>> @@ -62,7 +62,11 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
>>   */
>>  static inline bool hv_output_arg_exists(void)
>>  {
>> +#ifdef CONFIG_MSHV_VTL
> 
> Although today both the option works together. But thinking
> which is more accurate CONFIG_HYPERV_VTL_MODE or
> CONFIG_MSHV_VTL here for scalability of VTL modules.
> 

Good point. Though I'm not sure it matters too much right now,
since as you mention they will always be enabled together.

Does CONFIG_HYPERV_VTL_MODE use the output arg?

>> diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
>> new file mode 100644
>> index 000000000000..166480a73f3f
>> --- /dev/null
>> +++ b/drivers/hv/mshv.h
>> @@ -0,0 +1,156 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2023, Microsoft Corporation.
>> + */
>> +
>> +#ifndef _MSHV_H_
>> +#define _MSHV_H_
>> +
>> +#include <linux/spinlock.h>
>> +#include <linux/mutex.h>
>> +#include <linux/semaphore.h>
>> +#include <linux/sched.h>
>> +#include <linux/srcu.h>
>> +#include <linux/wait.h>
>> +#include <uapi/linux/mshv.h>
>> +
>> +/*
>> + * Hyper-V hypercalls
>> + */
>> +
>> +int hv_call_withdraw_memory(u64 count, int node, u64 partition_id);
>> +int hv_call_create_partition(
>> +		u64 flags,
>> +		struct hv_partition_creation_properties creation_properties,
>> +		union hv_partition_isolation_properties isolation_properties,
>> +		u64 *partition_id);
>> +int hv_call_initialize_partition(u64 partition_id);
>> +int hv_call_finalize_partition(u64 partition_id);
>> +int hv_call_delete_partition(u64 partition_id);
>> +int hv_call_map_gpa_pages(
>> +		u64 partition_id,
>> +		u64 gpa_target,
>> +		u64 page_count, u32 flags,
>> +		struct page **pages);
>> +int hv_call_unmap_gpa_pages(
>> +		u64 partition_id,
>> +		u64 gpa_target,
>> +		u64 page_count, u32 flags);
>> +int hv_call_get_vp_registers(
>> +		u32 vp_index,
>> +		u64 partition_id,
>> +		u16 count,
>> +		union hv_input_vtl input_vtl,
>> +		struct hv_register_assoc *registers);
>> +int hv_call_get_gpa_access_states(
>> +		u64 partition_id,
>> +		u32 count,
>> +		u64 gpa_base_pfn,
>> +		u64 state_flags,
>> +		int *written_total,
>> +		union hv_gpa_page_access_state *states);
>> +
>> +int hv_call_set_vp_registers(
>> +		u32 vp_index,
>> +		u64 partition_id,
>> +		u16 count,
>> +		union hv_input_vtl input_vtl,
>> +		struct hv_register_assoc *registers);
> 
> Nit: Opportunity to fix many of the checkpatch.pl related to line break here
> and many other places.
> 

checkpatch.pl doesn't complain about anything in this file.

>> +static int
>> +mshv_assign_ioeventfd(struct mshv_partition *partition,
>> +		      struct mshv_ioeventfd *args)
>> +	__must_hold(&partition->mutex)
>> +{
>> +	struct kernel_mshv_ioeventfd *p;
>> +	struct eventfd_ctx *eventfd;
>> +	u64 doorbell_flags = 0;
>> +	int ret;
>> +
>> +	/* This mutex is currently protecting ioeventfd.items list */
>> +	WARN_ON_ONCE(!mutex_is_locked(&partition->mutex));
>> +
>> +	if (args->flags & MSHV_IOEVENTFD_FLAG_PIO)
>> +		return -EOPNOTSUPP;
>> +
>> +	/* must be natural-word sized */
>> +	switch (args->len) {
>> +	case 0:
>> +		doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY;
>> +		break;
>> +	case 1:
>> +		doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE;
>> +		break;
>> +	case 2:
>> +		doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD;
>> +		break;
>> +	case 4:
>> +		doorbell_flags =
>> HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD;
>> +		break;
>> +	case 8:
>> +		doorbell_flags =
>> HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD;
>> +		break;
>> +	default:
>> +		pr_warn("ioeventfd: invalid length specified\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	/* check for range overflow */
>> +	if (args->addr + args->len < args->addr)
>> +		return -EINVAL;
>> +
>> +	/* check for extra flags that we don't understand */
>> +	if (args->flags & ~MSHV_IOEVENTFD_VALID_FLAG_MASK)
>> +		return -EINVAL;
>> +
>> +	eventfd = eventfd_ctx_fdget(args->fd);
>> +	if (IS_ERR(eventfd))
>> +		return PTR_ERR(eventfd);
>> +
>> +	p = kzalloc(sizeof(*p), GFP_KERNEL);
>> +	if (!p) {
>> +		ret = -ENOMEM;
>> +		goto fail;
>> +	}
>> +
>> +	p->addr    = args->addr;
>> +	p->length  = args->len;
>> +	p->eventfd = eventfd;
>> +
>> +	/* The datamatch feature is optional, otherwise this is a wildcard */
>> +	if (args->flags & MSHV_IOEVENTFD_FLAG_DATAMATCH)
>> +		p->datamatch = args->datamatch;
>> +	else {
>> +		p->wildcard = true;
>> +		doorbell_flags |=
>> HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE;
>> +	}
>> +
>> +	if (ioeventfd_check_collision(partition, p)) {
>> +		ret = -EEXIST;
>> +		goto unlock_fail;
>> +	}
>> +
>> +	ret = mshv_register_doorbell(partition->id, ioeventfd_mmio_write,
>> +				     (void *)partition, p->addr,
>> +				     p->datamatch, doorbell_flags);
>> +	if (ret < 0) {
>> +		pr_err("Failed to register ioeventfd doorbell!\n");
> 
> Nit: Do we like to print function name at the start of pr_err. 
> 

Yes, we should. I will fix it. Thanks!