lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BYAPR21MB1688A97F70DBF8BECE92FB7CD7F39@BYAPR21MB1688.namprd21.prod.outlook.com>
Date:   Thu, 29 Dec 2022 20:57:03 +0000
From:   "Michael Kelley (LINUX)" <mikelley@...rosoft.com>
To:     Jinank Jain <jinankjain@...ux.microsoft.com>,
        Jinank Jain <jinankjain@...rosoft.com>
CC:     KY Srinivasan <kys@...rosoft.com>,
        Haiyang Zhang <haiyangz@...rosoft.com>,
        "wei.liu@...nel.org" <wei.liu@...nel.org>,
        Dexuan Cui <decui@...rosoft.com>,
        "tglx@...utronix.de" <tglx@...utronix.de>,
        "mingo@...hat.com" <mingo@...hat.com>,
        "bp@...en8.de" <bp@...en8.de>,
        "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
        "x86@...nel.org" <x86@...nel.org>, "hpa@...or.com" <hpa@...or.com>,
        "arnd@...db.de" <arnd@...db.de>,
        "peterz@...radead.org" <peterz@...radead.org>,
        "jpoimboe@...nel.org" <jpoimboe@...nel.org>,
        "seanjc@...gle.com" <seanjc@...gle.com>,
        "kirill.shutemov@...ux.intel.com" <kirill.shutemov@...ux.intel.com>,
        "ak@...ux.intel.com" <ak@...ux.intel.com>,
        "sathyanarayanan.kuppuswamy@...ux.intel.com" 
        <sathyanarayanan.kuppuswamy@...ux.intel.com>,
        "linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "linux-arch@...r.kernel.org" <linux-arch@...r.kernel.org>,
        "anrayabh@...ux.microsoft.com" <anrayabh@...ux.microsoft.com>
Subject: RE: [PATCH v9 2/5] Drivers: hv: Setup synic registers in case of
 nested root partition

From: Jinank Jain <jinankjain@...ux.microsoft.com> Sent: Tuesday, December 13, 2022 10:33 PM
> 
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
> 
> Signed-off-by: Jinank Jain <jinankjain@...ux.microsoft.com>

You have addressed all my previous comments and those areas
look good.  But I see a new issue that I had not noticed before.
See comment below.

> ---
>  arch/x86/include/asm/hyperv-tlfs.h | 11 +++++
>  arch/x86/include/asm/mshyperv.h    | 30 +++-----------
>  arch/x86/kernel/cpu/mshyperv.c     | 65 ++++++++++++++++++++++++++++++
>  drivers/hv/hv.c                    | 19 ++++++---
>  4 files changed, 96 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 58c03d18c235..b5019becb618 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
>  #define HV_REGISTER_SINT14			0x4000009E
>  #define HV_REGISTER_SINT15			0x4000009F
> 
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL            0x40001080
> +#define HV_REGISTER_NESTED_SVERSION            0x40001081
> +#define HV_REGISTER_NESTED_SIEFP               0x40001082
> +#define HV_REGISTER_NESTED_SIMP                0x40001083
> +#define HV_REGISTER_NESTED_EOM                 0x40001084
> +#define HV_REGISTER_NESTED_SINT0               0x40001090
> +
>  /*
>   * Synthetic Timer MSRs. Four timers per vcpu.
>   */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 61f0c206bff0..c38e4c66a3ac 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
>  	return false;
>  }
> 
> -static inline u64 hv_get_register(unsigned int reg)
> -{
> -	u64 value;
> -
> -	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> -		hv_ghcb_msr_read(reg, &value);
> -	else
> -		rdmsrl(reg, value);
> -	return value;
> -}
> -
> -static inline void hv_set_register(unsigned int reg, u64 value)
> -{
> -	if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> -		hv_ghcb_msr_write(reg, value);
> -
> -		/* Write proxy bit via wrmsl instruction */
> -		if (reg >= HV_REGISTER_SINT0 &&
> -		    reg <= HV_REGISTER_SINT15)
> -			wrmsrl(reg, value | 1 << 20);
> -	} else {
> -		wrmsrl(reg, value);
> -	}
> -}
> +u64 hv_get_register(unsigned int reg);
> +void hv_set_register(unsigned int reg, u64 value);
> +u64 hv_get_non_nested_register(unsigned int reg);
> +void hv_set_non_nested_register(unsigned int reg, u64 value);
> 
>  #else /* CONFIG_HYPERV */
>  static inline void hyperv_init(void) {}
> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
>  }
>  static inline void hv_set_register(unsigned int reg, u64 value) { }
>  static inline u64 hv_get_register(unsigned int reg) { return 0; }
> +static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
> +static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
>  static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
>  					     bool visible)
>  {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index f9b78d4829e3..938fc82edf05 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -41,7 +41,72 @@ bool hv_root_partition;
>  bool hv_nested;
>  struct ms_hyperv_info ms_hyperv;
> 
> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> +{
> +	switch (reg) {
> +	case HV_REGISTER_SIMP:
> +		return HV_REGISTER_NESTED_SIMP;
> +	case HV_REGISTER_SIEFP:
> +		return HV_REGISTER_NESTED_SIEFP;
> +	case HV_REGISTER_SVERSION:
> +		return HV_REGISTER_NESTED_SVERSION;
> +	case HV_REGISTER_SCONTROL:
> +		return HV_REGISTER_NESTED_SCONTROL;
> +	case HV_REGISTER_SINT0:
> +		return HV_REGISTER_NESTED_SINT0;
> +	case HV_REGISTER_EOM:
> +		return HV_REGISTER_NESTED_EOM;
> +	default:
> +		return reg;
> +	}
> +}
> +
>  #if IS_ENABLED(CONFIG_HYPERV)
> +u64 hv_get_non_nested_register(unsigned int reg)
> +{
> +	u64 value;
> +
> +	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> +		hv_ghcb_msr_read(reg, &value);
> +	else
> +		rdmsrl(reg, value);
> +	return value;
> +}
> +EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
> +
> +void hv_set_non_nested_register(unsigned int reg, u64 value)
> +{
> +	if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> +		hv_ghcb_msr_write(reg, value);
> +
> +		/* Write proxy bit via wrmsl instruction */
> +		if (reg >= HV_REGISTER_SINT0 &&
> +		    reg <= HV_REGISTER_SINT15)
> +			wrmsrl(reg, value | 1 << 20);
> +	} else {
> +		wrmsrl(reg, value);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(hv_set_non_nested_register);
> +
> +u64 hv_get_register(unsigned int reg)
> +{
> +	if (hv_nested)
> +		reg = hv_get_nested_reg(reg);
> +
> +	return hv_get_non_nested_register(reg);
> +}
> +EXPORT_SYMBOL_GPL(hv_get_register);
> +
> +void hv_set_register(unsigned int reg, u64 value)
> +{
> +	if (hv_nested)
> +		reg = hv_get_nested_reg(reg);
> +
> +	hv_set_non_nested_register(reg, value);
> +}
> +EXPORT_SYMBOL_GPL(hv_set_register);
> +
>  static void (*vmbus_handler)(void);
>  static void (*hv_stimer0_handler)(void);
>  static void (*hv_kexec_handler)(void);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..986814a903ee 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
>  		 * Synic message and event pages are allocated by paravisor.
>  		 * Skip these pages allocation here.
>  		 */
> -		if (!hv_isolation_type_snp()) {
> +		if (!hv_isolation_type_snp() && !hv_root_partition) {
>  			hv_cpu->synic_message_page =
>  				(void *)get_zeroed_page(GFP_ATOMIC);
>  			if (hv_cpu->synic_message_page == NULL) {
> @@ -188,8 +188,16 @@ void hv_synic_free(void)
>  		struct hv_per_cpu_context *hv_cpu
>  			= per_cpu_ptr(hv_context.cpu_context, cpu);
> 
> -		free_page((unsigned long)hv_cpu->synic_event_page);
> -		free_page((unsigned long)hv_cpu->synic_message_page);
> +		if (hv_root_partition) {
> +			if (hv_cpu->synic_event_page != NULL)
> +				memunmap(hv_cpu->synic_event_page);
> +
> +			if (hv_cpu->synic_message_page != NULL)
> +				memunmap(hv_cpu->synic_message_page);

These memunmap() calls seem to be done in the wrong place.  There are two
pairs of functions that should be symmetrical unless there's a really good
reason otherwise:

1. hv_synic_alloc() and hv_synic_free()
2. hv_synic_enable_regs() and hv_synic_disable_regs()

The functions in #1 handle the allocation and freeing of these three
pages: synic_event_page, synic_message_page, and post_msg_page.
If the synic_event_page and synic_message_page don't need to be
allocated because they are provided by the hypervisor or paravisor,
then the allocation is skipped in hv_synic_alloc(), and the free_page
operations in hv_synic_free() are no-ops if the corresponding pointer
is NULL.

The functions in #2 should handle the mapping and unmapping in the
case of pages provided by the hypervisor or paravisor.   It appears
that the hv_isolation_type_snp() case does this (mostly) correctly.
But your code does the unmap in hv_synic_free(), which isn't
symmetrical.  Unless there's something unique about the situation
when running in the root partition, the unmap should be done in
hv_synic_disable_regs() like it is for the SNP case.

My "mostly" correctly comment above is because the current code
in hv_synic_disable_regs() should be setting hv_cpu->synic_message_page
and hv_cpu->synic_event_page to NULL after the unmap is done.
Those pointers must be reverted to NULL so that if hv_synic_free() is
then run, it won't try to free pages that were provided by the hypervisor
or paravisor.

Michael

> +		} else {
> +			free_page((unsigned long)hv_cpu->synic_event_page);
> +			free_page((unsigned long)hv_cpu->synic_message_page);
> +		}
>  		free_page((unsigned long)hv_cpu->post_msg_page);
>  	}
> 
> @@ -214,9 +222,10 @@ void hv_synic_enable_regs(unsigned int cpu)
> 
>  	/* Setup the Synic's message page */
>  	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> +
>  	simp.simp_enabled = 1;
> 
> -	if (hv_isolation_type_snp()) {
> +	if (hv_isolation_type_snp() || hv_root_partition) {
>  		hv_cpu->synic_message_page
>  			= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
>  				   HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -233,7 +242,7 @@ void hv_synic_enable_regs(unsigned int cpu)
>  	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
>  	siefp.siefp_enabled = 1;
> 
> -	if (hv_isolation_type_snp()) {
> +	if (hv_isolation_type_snp() || hv_root_partition) {
>  		hv_cpu->synic_event_page =
>  			memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
>  				 HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> --
> 2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ