lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <064f51db-8114-b2b0-21dd-e47eb03da0be@linux.microsoft.com>
Date:   Fri, 2 Dec 2022 12:35:03 +0530
From:   Jinank Jain <jinankjain@...ux.microsoft.com>
To:     "Michael Kelley (LINUX)" <mikelley@...rosoft.com>,
        Jinank Jain <jinankjain@...rosoft.com>
Cc:     KY Srinivasan <kys@...rosoft.com>,
        Haiyang Zhang <haiyangz@...rosoft.com>,
        "wei.liu@...nel.org" <wei.liu@...nel.org>,
        Dexuan Cui <decui@...rosoft.com>,
        "tglx@...utronix.de" <tglx@...utronix.de>,
        "mingo@...hat.com" <mingo@...hat.com>,
        "bp@...en8.de" <bp@...en8.de>,
        "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
        "x86@...nel.org" <x86@...nel.org>, "hpa@...or.com" <hpa@...or.com>,
        "arnd@...db.de" <arnd@...db.de>,
        "peterz@...radead.org" <peterz@...radead.org>,
        "jpoimboe@...nel.org" <jpoimboe@...nel.org>,
        "seanjc@...gle.com" <seanjc@...gle.com>,
        "kirill.shutemov@...ux.intel.com" <kirill.shutemov@...ux.intel.com>,
        "ak@...ux.intel.com" <ak@...ux.intel.com>,
        "sathyanarayanan.kuppuswamy@...ux.intel.com" 
        <sathyanarayanan.kuppuswamy@...ux.intel.com>,
        "linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "linux-arch@...r.kernel.org" <linux-arch@...r.kernel.org>,
        "anrayabh@...ux.microsoft.com" <anrayabh@...ux.microsoft.com>
Subject: Re: [PATCH v7 2/5] Drivers: hv: Setup synic registers in case of
 nested root partition


On 12/2/2022 9:30 AM, Michael Kelley (LINUX) wrote:
> From: Jinank Jain <jinankjain@...ux.microsoft.com> Sent: Thursday, December 1, 2022 3:04 AM
>> Child partitions are free to allocate SynIC message and event page but in
>> case of root partition it must use the pages allocated by Microsoft
>> Hypervisor (MSHV). Base address for these pages can be found using
>> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
>> for nested vs non-nested root partition.
>>
>> Signed-off-by: Jinank Jain <jinankjain@...ux.microsoft.com>
>> ---
>>   arch/x86/include/asm/hyperv-tlfs.h | 11 ++++
>>   arch/x86/include/asm/mshyperv.h    | 30 ++-------
>>   arch/x86/kernel/cpu/mshyperv.c     | 69 +++++++++++++++++++++
>>   drivers/hv/hv.c                    | 99 ++++++++++++++++++++++--------
>>   include/asm-generic/mshyperv.h     |  5 +-
>>   5 files changed, 165 insertions(+), 49 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
>> index 58c03d18c235..b5019becb618 100644
>> --- a/arch/x86/include/asm/hyperv-tlfs.h
>> +++ b/arch/x86/include/asm/hyperv-tlfs.h
>> @@ -225,6 +225,17 @@ enum hv_isolation_type {
>>   #define HV_REGISTER_SINT14			0x4000009E
>>   #define HV_REGISTER_SINT15			0x4000009F
>>
>> +/*
>> + * Define synthetic interrupt controller model specific registers for
>> + * nested hypervisor.
>> + */
>> +#define HV_REGISTER_NESTED_SCONTROL            0x40001080
>> +#define HV_REGISTER_NESTED_SVERSION            0x40001081
>> +#define HV_REGISTER_NESTED_SIEFP               0x40001082
>> +#define HV_REGISTER_NESTED_SIMP                0x40001083
>> +#define HV_REGISTER_NESTED_EOM                 0x40001084
>> +#define HV_REGISTER_NESTED_SINT0               0x40001090
>> +
>>   /*
>>    * Synthetic Timer MSRs. Four timers per vcpu.
>>    */
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index 61f0c206bff0..3197d49c888c 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
>>   	return false;
>>   }
>>
>> -static inline u64 hv_get_register(unsigned int reg)
>> -{
>> -	u64 value;
>> -
>> -	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
>> -		hv_ghcb_msr_read(reg, &value);
>> -	else
>> -		rdmsrl(reg, value);
>> -	return value;
>> -}
>> -
>> -static inline void hv_set_register(unsigned int reg, u64 value)
>> -{
>> -	if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
>> -		hv_ghcb_msr_write(reg, value);
>> -
>> -		/* Write proxy bit via wrmsl instruction */
>> -		if (reg >= HV_REGISTER_SINT0 &&
>> -		    reg <= HV_REGISTER_SINT15)
>> -			wrmsrl(reg, value | 1 << 20);
>> -	} else {
>> -		wrmsrl(reg, value);
>> -	}
>> -}
>> +u64 hv_get_register(unsigned int reg);
>> +void hv_set_register(unsigned int reg, u64 value);
>> +u64 hv_get_nested_register(unsigned int reg);
>> +void hv_set_nested_register(unsigned int reg, u64 value);
>>
>>   #else /* CONFIG_HYPERV */
>>   static inline void hyperv_init(void) {}
>> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
>>   }
>>   static inline void hv_set_register(unsigned int reg, u64 value) { }
>>   static inline u64 hv_get_register(unsigned int reg) { return 0; }
>> +static inline void hv_set_nested_register(unsigned int reg, u64 value) { }
>> +static inline u64 hv_get_nested_register(unsigned int reg) { return 0; }
>>   static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
>>   					     bool visible)
>>   {
>> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
>> index f9b78d4829e3..f2f6e10301a8 100644
>> --- a/arch/x86/kernel/cpu/mshyperv.c
>> +++ b/arch/x86/kernel/cpu/mshyperv.c
>> @@ -41,7 +41,76 @@ bool hv_root_partition;
>>   bool hv_nested;
>>   struct ms_hyperv_info ms_hyperv;
>>
>> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
>> +{
>> +	switch (reg) {
>> +	case HV_REGISTER_SIMP:
>> +		return HV_REGISTER_NESTED_SIMP;
>> +	case HV_REGISTER_NESTED_SIEFP:
>> +		return HV_REGISTER_SIEFP;
>> +	case HV_REGISTER_SCONTROL:
>> +		return HV_REGISTER_NESTED_SCONTROL;
>> +	case HV_REGISTER_SINT0:
>> +		return HV_REGISTER_NESTED_SINT0;
>> +	case HV_REGISTER_EOM:
>> +		return HV_REGISTER_NESTED_EOM;
>> +	default:
>> +		return reg;
>> +	}
> Just a question:  You added #defines for 6 nested registers.  But
> the switch statement above maps only 5 registers.  Is it intentional
> that there's not a mapping for HV_REGISTER_SVERSION?

Good catch! Will fix it in the next revision.

>
>> +}
>> +
>>   #if IS_ENABLED(CONFIG_HYPERV)
>> +static u64 _hv_get_register(unsigned int reg, bool nested)
>> +{
>> +	u64 value;
>> +
>> +	if (nested)
>> +		reg = hv_get_nested_reg(reg);
>> +
>> +	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
>> +		hv_ghcb_msr_read(reg, &value);
>> +	else
>> +		rdmsrl(reg, value);
>> +	return value;
>> +}
>> +
>> +static void _hv_set_register(unsigned int reg, u64 value, bool nested)
>> +{
>> +	if (nested)
>> +		reg = hv_get_nested_reg(reg);
>> +
>> +	if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
>> +		hv_ghcb_msr_write(reg, value);
>> +
>> +		/* Write proxy bit via wrmsl instruction */
>> +		if (reg >= HV_REGISTER_SINT0 &&
>> +		    reg <= HV_REGISTER_SINT15)
>> +			wrmsrl(reg, value | 1 << 20);
>> +	} else {
>> +		wrmsrl(reg, value);
>> +	}
>> +}
>> +
>> +u64 hv_get_register(unsigned int reg)
>> +{
>> +	return _hv_get_register(reg, false);
>> +}
>> +
>> +void hv_set_register(unsigned int reg, u64 value)
>> +{
>> +	_hv_set_register(reg, value, false);
>> +}
>> +
>> +u64 hv_get_nested_register(unsigned int reg)
>> +{
>> +	return _hv_get_register(reg, true);
>> +}
>> +
>> +void hv_set_nested_register(unsigned int reg, u64 value)
>> +{
>> +	_hv_set_register(reg, value, true);
>> +}
>> +
>>   static void (*vmbus_handler)(void);
>>   static void (*hv_stimer0_handler)(void);
>>   static void (*hv_kexec_handler)(void);
>> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
>> index 4d6480d57546..0ed052f2423e 100644
>> --- a/drivers/hv/hv.c
>> +++ b/drivers/hv/hv.c
>> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
>>   		 * Synic message and event pages are allocated by paravisor.
>>   		 * Skip these pages allocation here.
>>   		 */
>> -		if (!hv_isolation_type_snp()) {
>> +		if (!hv_isolation_type_snp() && !hv_root_partition) {
>>   			hv_cpu->synic_message_page =
>>   				(void *)get_zeroed_page(GFP_ATOMIC);
>>   			if (hv_cpu->synic_message_page == NULL) {
>> @@ -188,8 +188,16 @@ void hv_synic_free(void)
>>   		struct hv_per_cpu_context *hv_cpu
>>   			= per_cpu_ptr(hv_context.cpu_context, cpu);
>>
>> -		free_page((unsigned long)hv_cpu->synic_event_page);
>> -		free_page((unsigned long)hv_cpu->synic_message_page);
>> +		if (hv_root_partition) {
>> +			if (hv_cpu->synic_event_page != NULL)
>> +				memunmap(hv_cpu->synic_event_page);
>> +
>> +			if (hv_cpu->synic_message_page != NULL)
>> +				memunmap(hv_cpu->synic_message_page);
>> +		} else {
>> +			free_page((unsigned long)hv_cpu->synic_event_page);
>> +			free_page((unsigned long)hv_cpu->synic_message_page);
>> +		}
>>   		free_page((unsigned long)hv_cpu->post_msg_page);
>>   	}
>>
>> @@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu)
>>   	union hv_synic_scontrol sctrl;
>>
>>   	/* Setup the Synic's message page */
>> -	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
>> +	simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
>> +				     hv_get_register(HV_REGISTER_SIMP);
> Unfortunately, this code and the similar places below will run into
> problems on ARM64.  Drivers/hv/hv.c is common code on all architectures
> so it needs to compile and run on ARM64 as well as x86/x64.  But there's
> no hv_get_nested_register() defined or implemented on the ARM64 side,
> so the code will fail to compile.
>
> I think there's a better way to do this.   Based on Nuno's comments, it
> seems like there are two hv_get_register() functions needed:
>
> 1)  Get the value of the register or its nested cousin, based on the value
> of hv_nested.  That's what you are explicitly coding here.
> 2) Get the value of the register.  Don't access the nested cousin, regardless
> of the value of hv_nested.
>
> Based on how you coded things earlier, I'm assuming #1 is what you want to
> use in most cases, and specifically here in drivers/hv/hv.c.  That's good,
> because #1 can hide the testing of hv_nested in the x86-specific
> implementation of hv_get_register(), while the ARM64 version of
> hv_get_register() continues to do whatever it does now with no changes.
>
> I'm also assuming that #2 may be used in particular cases in the code
> that is specifically related to nesting.  Give the #2 version a different
> name --- maybe hv_get_nonnested_register(), or something like that --
> and use it only in code under arch/x86 that is related to nesting.  That
> way, ARM64 won't be affected.
>
> Of course, the same approach applies to hv_set_register().
>
> hv_get_register() and hv_get_nonnested_register() will obviously
> share some code.  But rather than calling a common function starting
> with underscore like you've done above, let me suggest that
> hv_get_register() test hv_nested and potentially do the translation,
> then call hv_get_nonnested_register().  That way you'll end up
> with just two functions instead of three as above with
> hv_get_register(), hv_get_nested_register(), and _hv_get_register().

I tried the way you suggested and it worked for ARM64 this time. But 
still I would have three functions. Because the base function 
_hv_get_register() would still be required in order to avoid code 
duplication in hv_get_non_nested_register().

>
> I haven't coded up any of this, so take it as a suggestion.  There
> could be some problem with it that I haven't seen, or my assumptions
> might be wrong.  But give it a try and see if it works out.  I'm hoping
> it can all be handled on the x86 side without having to add complexity
> on the ARM64 side.
>
> Michael
>
>> +
>>   	simp.simp_enabled = 1;
>>
>> -	if (hv_isolation_type_snp()) {
>> +	if (hv_isolation_type_snp() || hv_root_partition) {
>>   		hv_cpu->synic_message_page
>>   			= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
>>   				   HV_HYP_PAGE_SIZE, MEMREMAP_WB);
>> @@ -227,13 +237,18 @@ void hv_synic_enable_regs(unsigned int cpu)
>>   			>> HV_HYP_PAGE_SHIFT;
>>   	}
>>
>> -	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>> +	if (hv_nested)
>> +		hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
>> +	else
>> +		hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>>
>>   	/* Setup the Synic's event page */
>> -	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
>> +	siefp.as_uint64 = hv_nested ?
>> +				  hv_get_nested_register(HV_REGISTER_SIEFP) :
>> +				  hv_get_register(HV_REGISTER_SIEFP);
>>   	siefp.siefp_enabled = 1;
>>
>> -	if (hv_isolation_type_snp()) {
>> +	if (hv_isolation_type_snp() || hv_root_partition) {
>>   		hv_cpu->synic_event_page =
>>   			memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
>>   				 HV_HYP_PAGE_SIZE, MEMREMAP_WB);
>> @@ -245,13 +260,19 @@ void hv_synic_enable_regs(unsigned int cpu)
>>   			>> HV_HYP_PAGE_SHIFT;
>>   	}
>>
>> -	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>> +	if (hv_nested)
>> +		hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>> +	else
>> +		hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>>
>>   	/* Setup the shared SINT. */
>>   	if (vmbus_irq != -1)
>>   		enable_percpu_irq(vmbus_irq, 0);
>> -	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
>> -					VMBUS_MESSAGE_SINT);
>> +	shared_sint.as_uint64 =
>> +		hv_nested ?
>> +			hv_get_nested_register(HV_REGISTER_SINT0 +
>> +					       VMBUS_MESSAGE_SINT) :
>> +			hv_get_register(HV_REGISTER_SINT0 +
>> VMBUS_MESSAGE_SINT);
>>
>>   	shared_sint.vector = vmbus_interrupt;
>>   	shared_sint.masked = false;
>> @@ -266,14 +287,22 @@ void hv_synic_enable_regs(unsigned int cpu)
>>   #else
>>   	shared_sint.auto_eoi = 0;
>>   #endif
>> -	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
>> +	if (hv_nested)
>> +		hv_set_nested_register(HV_REGISTER_SINT0 +
>> VMBUS_MESSAGE_SINT,
>> +				       shared_sint.as_uint64);
>> +	else
>> +		hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
>>   				shared_sint.as_uint64);
>> -
>>   	/* Enable the global synic bit */
>> -	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
>> +	sctrl.as_uint64 = hv_nested ?
>> +				  hv_get_nested_register(HV_REGISTER_SCONTROL) :
>> +				  hv_get_register(HV_REGISTER_SCONTROL);
>>   	sctrl.enable = 1;
>>
>> -	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> +	if (hv_nested)
>> +		hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> +	else
>> +		hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>>   }
>>
>>   int hv_synic_init(unsigned int cpu)
>> @@ -297,17 +326,25 @@ void hv_synic_disable_regs(unsigned int cpu)
>>   	union hv_synic_siefp siefp;
>>   	union hv_synic_scontrol sctrl;
>>
>> -	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
>> -					VMBUS_MESSAGE_SINT);
>> +	shared_sint.as_uint64 =
>> +		hv_nested ?
>> +			hv_get_nested_register(HV_REGISTER_SINT0 +
>> +					       VMBUS_MESSAGE_SINT) :
>> +			hv_get_register(HV_REGISTER_SINT0 +
>> VMBUS_MESSAGE_SINT);
>>
>>   	shared_sint.masked = 1;
>>
>>   	/* Need to correctly cleanup in the case of SMP!!! */
>>   	/* Disable the interrupt */
>> -	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
>> +	if (hv_nested)
>> +		hv_set_nested_register(HV_REGISTER_SINT0 +
>> VMBUS_MESSAGE_SINT,
>> +				       shared_sint.as_uint64);
>> +	else
>> +		hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
>>   				shared_sint.as_uint64);
>>
>> -	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
>> +	simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
>> +				     hv_get_register(HV_REGISTER_SIMP);
>>   	/*
>>   	 * In Isolation VM, sim and sief pages are allocated by
>>   	 * paravisor. These pages also will be used by kdump
>> @@ -320,9 +357,14 @@ void hv_synic_disable_regs(unsigned int cpu)
>>   	else
>>   		simp.base_simp_gpa = 0;
>>
>> -	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>> +	if (hv_nested)
>> +		hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
>> +	else
>> +		hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>>
>> -	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
>> +	siefp.as_uint64 = hv_nested ?
>> +				  hv_get_nested_register(HV_REGISTER_SIEFP) :
>> +				  hv_get_register(HV_REGISTER_SIEFP);
>>   	siefp.siefp_enabled = 0;
>>
>>   	if (hv_isolation_type_snp())
>> @@ -330,12 +372,21 @@ void hv_synic_disable_regs(unsigned int cpu)
>>   	else
>>   		siefp.base_siefp_gpa = 0;
>>
>> -	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>> +	if (hv_nested)
>> +		hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>> +	else
>> +		hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>>
>>   	/* Disable the global synic bit */
>> -	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
>> +	sctrl.as_uint64 = hv_nested ?
>> +				  hv_get_nested_register(HV_REGISTER_SCONTROL) :
>> +				  hv_get_register(HV_REGISTER_SCONTROL);
>>   	sctrl.enable = 0;
>> -	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> +
>> +	if (hv_nested)
>> +		hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> +	else
>> +		hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>>
>>   	if (vmbus_irq != -1)
>>   		disable_percpu_irq(vmbus_irq);
>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index f131027830c3..db0b5be1e087 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -147,7 +147,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg,
>> u32 old_msg_type)
>>   		 * possibly deliver another msg from the
>>   		 * hypervisor
>>   		 */
>> -		hv_set_register(HV_REGISTER_EOM, 0);
>> +		if (hv_nested)
>> +			hv_set_nested_register(HV_REGISTER_EOM, 0);
>> +		else
>> +			hv_set_register(HV_REGISTER_EOM, 0);
>>   	}
>>   }
>>
>> --
>> 2.25.1


Regards,

Jinank

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ