lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 13 Aug 2021 19:31:08 +0000
From:   Michael Kelley <mikelley@...rosoft.com>
To:     Tianyu Lan <ltykernel@...il.com>,
        KY Srinivasan <kys@...rosoft.com>,
        Haiyang Zhang <haiyangz@...rosoft.com>,
        Stephen Hemminger <sthemmin@...rosoft.com>,
        "wei.liu@...nel.org" <wei.liu@...nel.org>,
        Dexuan Cui <decui@...rosoft.com>,
        "tglx@...utronix.de" <tglx@...utronix.de>,
        "mingo@...hat.com" <mingo@...hat.com>,
        "bp@...en8.de" <bp@...en8.de>, "x86@...nel.org" <x86@...nel.org>,
        "hpa@...or.com" <hpa@...or.com>,
        "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
        "luto@...nel.org" <luto@...nel.org>,
        "peterz@...radead.org" <peterz@...radead.org>,
        "konrad.wilk@...cle.com" <konrad.wilk@...cle.com>,
        "boris.ostrovsky@...cle.com" <boris.ostrovsky@...cle.com>,
        "jgross@...e.com" <jgross@...e.com>,
        "sstabellini@...nel.org" <sstabellini@...nel.org>,
        "joro@...tes.org" <joro@...tes.org>,
        "will@...nel.org" <will@...nel.org>,
        "davem@...emloft.net" <davem@...emloft.net>,
        "kuba@...nel.org" <kuba@...nel.org>,
        "jejb@...ux.ibm.com" <jejb@...ux.ibm.com>,
        "martin.petersen@...cle.com" <martin.petersen@...cle.com>,
        "arnd@...db.de" <arnd@...db.de>, "hch@....de" <hch@....de>,
        "m.szyprowski@...sung.com" <m.szyprowski@...sung.com>,
        "robin.murphy@....com" <robin.murphy@....com>,
        "thomas.lendacky@....com" <thomas.lendacky@....com>,
        "brijesh.singh@....com" <brijesh.singh@....com>,
        "ardb@...nel.org" <ardb@...nel.org>,
        Tianyu Lan <Tianyu.Lan@...rosoft.com>,
        "pgonda@...gle.com" <pgonda@...gle.com>,
        "martin.b.radev@...il.com" <martin.b.radev@...il.com>,
        "akpm@...ux-foundation.org" <akpm@...ux-foundation.org>,
        "kirill.shutemov@...ux.intel.com" <kirill.shutemov@...ux.intel.com>,
        "rppt@...nel.org" <rppt@...nel.org>,
        "sfr@...b.auug.org.au" <sfr@...b.auug.org.au>,
        "saravanand@...com" <saravanand@...com>,
        "krish.sadhukhan@...cle.com" <krish.sadhukhan@...cle.com>,
        "aneesh.kumar@...ux.ibm.com" <aneesh.kumar@...ux.ibm.com>,
        "xen-devel@...ts.xenproject.org" <xen-devel@...ts.xenproject.org>,
        "rientjes@...gle.com" <rientjes@...gle.com>,
        "hannes@...xchg.org" <hannes@...xchg.org>,
        "tj@...nel.org" <tj@...nel.org>
CC:     "iommu@...ts.linux-foundation.org" <iommu@...ts.linux-foundation.org>,
        "linux-arch@...r.kernel.org" <linux-arch@...r.kernel.org>,
        "linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "linux-scsi@...r.kernel.org" <linux-scsi@...r.kernel.org>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        vkuznets <vkuznets@...hat.com>,
        "parri.andrea@...il.com" <parri.andrea@...il.com>,
        "dave.hansen@...el.com" <dave.hansen@...el.com>
Subject: RE: [PATCH V3 05/13] HV: Add Write/Read MSR registers via ghcb page

From: Tianyu Lan <ltykernel@...il.com> Sent: Monday, August 9, 2021 10:56 AM
> Subject: [PATCH V3 05/13] HV: Add Write/Read MSR registers via ghcb page

See previous comments about tag in the Subject line.

> Hyper-V provides GHCB protocol to write Synthetic Interrupt
> Controller MSR registers in Isolation VM with AMD SEV SNP
> and these registers are emulated by hypervisor directly.
> Hyper-V requires to write SINTx MSR registers twice. First
> writes MSR via GHCB page to communicate with hypervisor
> and then writes wrmsr instruction to talk with paravisor
> which runs in VMPL0. Guest OS ID MSR also needs to be set
> via GHCB.
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@...rosoft.com>
> ---
> Change since v1:
>          * Introduce sev_es_ghcb_hv_call_simple() and share code
>            between SEV and Hyper-V code.
> ---
>  arch/x86/hyperv/hv_init.c       |  33 ++-------
>  arch/x86/hyperv/ivm.c           | 110 +++++++++++++++++++++++++++++
>  arch/x86/include/asm/mshyperv.h |  78 +++++++++++++++++++-
>  arch/x86/include/asm/sev.h      |   3 +
>  arch/x86/kernel/cpu/mshyperv.c  |   3 +
>  arch/x86/kernel/sev-shared.c    |  63 ++++++++++-------
>  drivers/hv/hv.c                 | 121 ++++++++++++++++++++++----------
>  include/asm-generic/mshyperv.h  |  12 +++-
>  8 files changed, 329 insertions(+), 94 deletions(-)
> 
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index b3683083208a..ab0b33f621e7 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -423,7 +423,7 @@ void __init hyperv_init(void)
>  		goto clean_guest_os_id;
> 
>  	if (hv_isolation_type_snp()) {
> -		ms_hyperv.ghcb_base = alloc_percpu(void *);
> +		ms_hyperv.ghcb_base = alloc_percpu(union hv_ghcb __percpu *);

union hv_ghcb isn't defined.  It is not added until patch 6 of the series.

>  		if (!ms_hyperv.ghcb_base)
>  			goto clean_guest_os_id;
> 
> @@ -432,6 +432,9 @@ void __init hyperv_init(void)
>  			ms_hyperv.ghcb_base = NULL;
>  			goto clean_guest_os_id;
>  		}
> +
> +		/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
> +		hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
>  	}
> 
>  	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> @@ -523,6 +526,7 @@ void hyperv_cleanup(void)
> 
>  	/* Reset our OS id */
>  	wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
> +	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
> 
>  	/*
>  	 * Reset hypercall page reference before reset the page,
> @@ -596,30 +600,3 @@ bool hv_is_hyperv_initialized(void)
>  	return hypercall_msr.enable;
>  }
>  EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
> -
> -enum hv_isolation_type hv_get_isolation_type(void)
> -{
> -	if (!(ms_hyperv.priv_high & HV_ISOLATION))
> -		return HV_ISOLATION_TYPE_NONE;
> -	return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
> -}
> -EXPORT_SYMBOL_GPL(hv_get_isolation_type);
> -
> -bool hv_is_isolation_supported(void)
> -{
> -	if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
> -		return 0;
> -
> -	if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
> -		return 0;
> -
> -	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
> -}
> -
> -DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
> -
> -bool hv_isolation_type_snp(void)
> -{
> -	return static_branch_unlikely(&isolation_type_snp);
> -}
> -EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 8c905ffdba7f..ec0e5c259740 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -6,6 +6,8 @@
>   *  Tianyu Lan <Tianyu.Lan@...rosoft.com>
>   */
> 
> +#include <linux/types.h>
> +#include <linux/bitfield.h>
>  #include <linux/hyperv.h>
>  #include <linux/types.h>
>  #include <linux/bitfield.h>
> @@ -13,6 +15,114 @@
>  #include <asm/io.h>
>  #include <asm/mshyperv.h>
> 
> +void hv_ghcb_msr_write(u64 msr, u64 value)
> +{
> +	union hv_ghcb *hv_ghcb;
> +	void **ghcb_base;
> +	unsigned long flags;
> +
> +	if (!ms_hyperv.ghcb_base)
> +		return;
> +
> +	WARN_ON(in_nmi());
> +
> +	local_irq_save(flags);
> +	ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> +	hv_ghcb = (union hv_ghcb *)*ghcb_base;
> +	if (!hv_ghcb) {
> +		local_irq_restore(flags);
> +		return;
> +	}
> +
> +	ghcb_set_rcx(&hv_ghcb->ghcb, msr);
> +	ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
> +	ghcb_set_rdx(&hv_ghcb->ghcb, value >> 32);

Having used lower_32_bits() in the previous line, perhaps use
upper_32_bits() here?

> +
> +	if (sev_es_ghcb_hv_call_simple(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0))
> +		pr_warn("Fail to write msr via ghcb %llx.\n", msr);
> +
> +	local_irq_restore(flags);
> +}
> +
> +void hv_ghcb_msr_read(u64 msr, u64 *value)
> +{
> +	union hv_ghcb *hv_ghcb;
> +	void **ghcb_base;
> +	unsigned long flags;
> +
> +	if (!ms_hyperv.ghcb_base)
> +		return;
> +
> +	WARN_ON(in_nmi());
> +
> +	local_irq_save(flags);
> +	ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> +	hv_ghcb = (union hv_ghcb *)*ghcb_base;
> +	if (!hv_ghcb) {
> +		local_irq_restore(flags);
> +		return;
> +	}
> +
> +	ghcb_set_rcx(&hv_ghcb->ghcb, msr);
> +	if (sev_es_ghcb_hv_call_simple(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0))
> +		pr_warn("Fail to read msr via ghcb %llx.\n", msr);
> +	else
> +		*value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
> +			| ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
> +	local_irq_restore(flags);
> +}
> +
> +void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value)
> +{
> +	hv_ghcb_msr_read(msr, value);
> +}
> +EXPORT_SYMBOL_GPL(hv_sint_rdmsrl_ghcb);
> +
> +void hv_sint_wrmsrl_ghcb(u64 msr, u64 value)
> +{
> +	hv_ghcb_msr_write(msr, value);
> +
> +	/* Write proxy bit vua wrmsrl instruction. */

s/vua/via/

> +	if (msr >= HV_X64_MSR_SINT0 && msr <= HV_X64_MSR_SINT15)
> +		wrmsrl(msr, value | 1 << 20);
> +}
> +EXPORT_SYMBOL_GPL(hv_sint_wrmsrl_ghcb);
> +
> +void hv_signal_eom_ghcb(void)
> +{
> +	hv_sint_wrmsrl_ghcb(HV_X64_MSR_EOM, 0);
> +}
> +EXPORT_SYMBOL_GPL(hv_signal_eom_ghcb);

Is there a reason for this to be a function instead of a #define?
Seemingly parallel calls such as  hv_set_synic_state_ghcb()
are #defines.

> +
> +enum hv_isolation_type hv_get_isolation_type(void)
> +{
> +	if (!(ms_hyperv.priv_high & HV_ISOLATION))
> +		return HV_ISOLATION_TYPE_NONE;
> +	return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
> +}
> +EXPORT_SYMBOL_GPL(hv_get_isolation_type);
> +
> +/*
> + * hv_is_isolation_supported - Check system runs in the Hyper-V
> + * isolation VM.
> + */
> +bool hv_is_isolation_supported(void)
> +{
> +	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
> +}
> +
> +DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
> +
> +/*
> + * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based
> + * isolation VM.
> + */
> +bool hv_isolation_type_snp(void)
> +{
> +	return static_branch_unlikely(&isolation_type_snp);
> +}
> +EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
> +

hv_isolation_type_snp() is implemented here in a file under arch/x86,
but it is called from architecture independent code in drivers/hv, so it
needs to do the right thing on ARM64 as well as x86.  For an example,
see the handling of hv_is_isolation_supported() in the latest linux-next
tree.

>  /*
>   * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
>   *
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 87a386fa97f7..730985676ea3 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -30,6 +30,63 @@ static inline u64 hv_get_register(unsigned int reg)
>  	return value;
>  }
> 
> +#define hv_get_sint_reg(val, reg) {		\
> +	if (hv_isolation_type_snp())		\
> +		hv_get_##reg##_ghcb(&val);	\
> +	else					\
> +		rdmsrl(HV_X64_MSR_##reg, val);	\
> +	}
> +
> +#define hv_set_sint_reg(val, reg) {		\
> +	if (hv_isolation_type_snp())		\
> +		hv_set_##reg##_ghcb(val);	\
> +	else					\
> +		wrmsrl(HV_X64_MSR_##reg, val);	\
> +	}
> +
> +
> +#define hv_get_simp(val) hv_get_sint_reg(val, SIMP)
> +#define hv_get_siefp(val) hv_get_sint_reg(val, SIEFP)
> +
> +#define hv_set_simp(val) hv_set_sint_reg(val, SIMP)
> +#define hv_set_siefp(val) hv_set_sint_reg(val, SIEFP)
> +
> +#define hv_get_synic_state(val) {			\
> +	if (hv_isolation_type_snp())			\
> +		hv_get_synic_state_ghcb(&val);		\
> +	else						\
> +		rdmsrl(HV_X64_MSR_SCONTROL, val);	\

Many of these registers that exist on x86 and ARM64 architectures
have new names without the "X64_MSR" portion.  For
example, include/asm-generic/hyperv-tlfs.h defines
HV_REGISTER_SCONTROL.  The x86-specific version of 
hyperv-tlfs.h currently has a #define for HV_X64_MSR_SCONTROL,
but we would like to get rid of these temporary aliases.
So prefer to use HV_REGISTER_SCONTROL.

Same comment applies several places in this code for other
similar registers.

> +	}
> +#define hv_set_synic_state(val) {			\
> +	if (hv_isolation_type_snp())			\
> +		hv_set_synic_state_ghcb(val);		\
> +	else						\
> +		wrmsrl(HV_X64_MSR_SCONTROL, val);	\
> +	}
> +
> +#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
> +
> +#define hv_signal_eom() {			 \
> +	if (hv_isolation_type_snp() &&		 \
> +	    old_msg_type != HVMSG_TIMER_EXPIRED) \

Why is a test of the old_msg_type embedded in this macro?
And given that old_msg_type isn't a parameter of the
macro, its use is really weird.

> +		hv_signal_eom_ghcb();		 \
> +	else					 \
> +		wrmsrl(HV_X64_MSR_EOM, 0);	 \
> +	}
> +
> +#define hv_get_synint_state(int_num, val) {		\
> +	if (hv_isolation_type_snp())			\0
> +		hv_get_synint_state_ghcb(int_num, &val);\
> +	else						\
> +		rdmsrl(HV_X64_MSR_SINT0 + int_num, val);\
> +	}
> +#define hv_set_synint_state(int_num, val) {		\
> +	if (hv_isolation_type_snp())			\
> +		hv_set_synint_state_ghcb(int_num, val);	\
> +	else						\
> +		wrmsrl(HV_X64_MSR_SINT0 + int_num, val);\
> +	}
> +
>  #define hv_get_raw_timer() rdtsc_ordered()
> 
>  void hyperv_vector_handler(struct pt_regs *regs);
> @@ -193,6 +250,25 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
>  int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
>  			   enum hv_mem_host_visibility visibility);
>  int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
> +void hv_sint_wrmsrl_ghcb(u64 msr, u64 value);
> +void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
> +void hv_signal_eom_ghcb(void);
> +void hv_ghcb_msr_write(u64 msr, u64 value);
> +void hv_ghcb_msr_read(u64 msr, u64 *value);
> +
> +#define hv_get_synint_state_ghcb(int_num, val)			\
> +	hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
> +#define hv_set_synint_state_ghcb(int_num, val) \
> +	hv_sint_wrmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
> +
> +#define hv_get_SIMP_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIMP, val)
> +#define hv_set_SIMP_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIMP, val)
> +
> +#define hv_get_SIEFP_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIEFP, val)
> +#define hv_set_SIEFP_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIEFP, val)
> +
> +#define hv_get_synic_state_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
> +#define hv_set_synic_state_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
>  #else /* CONFIG_HYPERV */
>  static inline void hyperv_init(void) {}
>  static inline void hyperv_setup_mmu_ops(void) {}
> @@ -209,9 +285,9 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
>  {
>  	return -1;
>  }
> +static inline void hv_signal_eom_ghcb(void) { };
>  #endif /* CONFIG_HYPERV */
> 
> -
>  #include <asm-generic/mshyperv.h>
> 
>  #endif
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index fa5cd05d3b5b..81beb2a8031b 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -81,6 +81,9 @@ static __always_inline void sev_es_nmi_complete(void)
>  		__sev_es_nmi_complete();
>  }
>  extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
> +extern enum es_result sev_es_ghcb_hv_call_simple(struct ghcb *ghcb,
> +				   u64 exit_code, u64 exit_info_1,
> +				   u64 exit_info_2);
>  #else
>  static inline void sev_es_ist_enter(struct pt_regs *regs) { }
>  static inline void sev_es_ist_exit(void) { }
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 2b7f396ef1a5..3633f871ac1e 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -318,6 +318,9 @@ static void __init ms_hyperv_init_platform(void)
> 
>  		pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
>  			ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
> +
> +		if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
> +			static_branch_enable(&isolation_type_snp);
>  	}
> 
>  	if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
> diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
> index 9f90f460a28c..dd7f37de640b 100644
> --- a/arch/x86/kernel/sev-shared.c
> +++ b/arch/x86/kernel/sev-shared.c
> @@ -94,10 +94,9 @@ static void vc_finish_insn(struct es_em_ctxt *ctxt)
>  	ctxt->regs->ip += ctxt->insn.length;
>  }
> 
> -static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
> -					  struct es_em_ctxt *ctxt,
> -					  u64 exit_code, u64 exit_info_1,
> -					  u64 exit_info_2)
> +enum es_result sev_es_ghcb_hv_call_simple(struct ghcb *ghcb,
> +				   u64 exit_code, u64 exit_info_1,
> +				   u64 exit_info_2)
>  {
>  	enum es_result ret;
> 
> @@ -109,29 +108,45 @@ static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
>  	ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
>  	ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
> 
> -	sev_es_wr_ghcb_msr(__pa(ghcb));
>  	VMGEXIT();
> 
> -	if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) {
> -		u64 info = ghcb->save.sw_exit_info_2;
> -		unsigned long v;
> -
> -		info = ghcb->save.sw_exit_info_2;
> -		v = info & SVM_EVTINJ_VEC_MASK;
> -
> -		/* Check if exception information from hypervisor is sane. */
> -		if ((info & SVM_EVTINJ_VALID) &&
> -		    ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
> -		    ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
> -			ctxt->fi.vector = v;
> -			if (info & SVM_EVTINJ_VALID_ERR)
> -				ctxt->fi.error_code = info >> 32;
> -			ret = ES_EXCEPTION;
> -		} else {
> -			ret = ES_VMM_ERROR;
> -		}
> -	} else {
> +	if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1)
> +		ret = ES_VMM_ERROR;
> +	else
>  		ret = ES_OK;
> +
> +	return ret;
> +}
> +
> +static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
> +				   struct es_em_ctxt *ctxt,
> +				   u64 exit_code, u64 exit_info_1,
> +				   u64 exit_info_2)
> +{
> +	unsigned long v;
> +	enum es_result ret;
> +	u64 info;
> +
> +	sev_es_wr_ghcb_msr(__pa(ghcb));
> +
> +	ret = sev_es_ghcb_hv_call_simple(ghcb, exit_code, exit_info_1,
> +					 exit_info_2);
> +	if (ret == ES_OK)
> +		return ret;
> +
> +	info = ghcb->save.sw_exit_info_2;
> +	v = info & SVM_EVTINJ_VEC_MASK;
> +
> +	/* Check if exception information from hypervisor is sane. */
> +	if ((info & SVM_EVTINJ_VALID) &&
> +	    ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
> +	    ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
> +		ctxt->fi.vector = v;
> +		if (info & SVM_EVTINJ_VALID_ERR)
> +			ctxt->fi.error_code = info >> 32;
> +		ret = ES_EXCEPTION;
> +	} else {
> +		ret = ES_VMM_ERROR;
>  	}
> 
>  	return ret;
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index e83507f49676..59f7173c4d9f 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -8,6 +8,7 @@
>   */
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> 
> +#include <linux/io.h>
>  #include <linux/kernel.h>
>  #include <linux/mm.h>
>  #include <linux/slab.h>
> @@ -136,17 +137,24 @@ int hv_synic_alloc(void)
>  		tasklet_init(&hv_cpu->msg_dpc,
>  			     vmbus_on_msg_dpc, (unsigned long) hv_cpu);
> 
> -		hv_cpu->synic_message_page =
> -			(void *)get_zeroed_page(GFP_ATOMIC);
> -		if (hv_cpu->synic_message_page == NULL) {
> -			pr_err("Unable to allocate SYNIC message page\n");
> -			goto err;
> -		}
> +		/*
> +		 * Synic message and event pages are allocated by paravisor.
> +		 * Skip these pages allocation here.
> +		 */
> +		if (!hv_isolation_type_snp()) {
> +			hv_cpu->synic_message_page =
> +				(void *)get_zeroed_page(GFP_ATOMIC);
> +			if (hv_cpu->synic_message_page == NULL) {
> +				pr_err("Unable to allocate SYNIC message page\n");
> +				goto err;
> +			}
> 
> -		hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
> -		if (hv_cpu->synic_event_page == NULL) {
> -			pr_err("Unable to allocate SYNIC event page\n");
> -			goto err;
> +			hv_cpu->synic_event_page =
> +				(void *)get_zeroed_page(GFP_ATOMIC);
> +			if (hv_cpu->synic_event_page == NULL) {
> +				pr_err("Unable to allocate SYNIC event page\n");
> +				goto err;
> +			}
>  		}
> 
>  		hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
> @@ -173,10 +181,17 @@ void hv_synic_free(void)
>  	for_each_present_cpu(cpu) {
>  		struct hv_per_cpu_context *hv_cpu
>  			= per_cpu_ptr(hv_context.cpu_context, cpu);
> +		free_page((unsigned long)hv_cpu->post_msg_page);
> +
> +		/*
> +		 * Synic message and event pages are allocated by paravisor.
> +		 * Skip free these pages here.
> +		 */
> +		if (hv_isolation_type_snp())
> +			continue;
> 
>  		free_page((unsigned long)hv_cpu->synic_event_page);
>  		free_page((unsigned long)hv_cpu->synic_message_page);
> -		free_page((unsigned long)hv_cpu->post_msg_page);

You could skip making these changes to hv_synic_free().  If the message
and event pages aren't allocated, the pointers will be NULL and
free_page() will happily do nothing.

>  	}
> 
>  	kfree(hv_context.hv_numa_map);
> @@ -199,26 +214,43 @@ void hv_synic_enable_regs(unsigned int cpu)
>  	union hv_synic_scontrol sctrl;
> 
>  	/* Setup the Synic's message page */
> -	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> +	hv_get_simp(simp.as_uint64);

This code is intended to be architecture independent and builds for
x86 and for ARM64.  Changing the use of hv_get_register() and hv_set_register()
will fail badly when built for ARM64.   I haven't completely thought through
what the best solution might be, but the current set of mappings from
hv_get_simp() down to hv_ghcb_msr_read() isn't going to work on ARM64.

Is it possible to hide all the x86-side complexity in the implementation of
hv_get_register()?   Certain MSRs would have to be special-cased when
SNP isolation is enabled, but that might be easier than trying to no-op out
the ghcb machinery on the ARM64 side. 

>  	simp.simp_enabled = 1;
> -	simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
> -		>> HV_HYP_PAGE_SHIFT;
> 
> -	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> +	if (hv_isolation_type_snp()) {
> +		hv_cpu->synic_message_page
> +			= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> +				   HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> +		if (!hv_cpu->synic_message_page)
> +			pr_err("Fail to map syinc message page.\n");
> +	} else {
> +		simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
> +			>> HV_HYP_PAGE_SHIFT;
> +	}
> +
> +	hv_set_simp(simp.as_uint64);
> 
>  	/* Setup the Synic's event page */
> -	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> +	hv_get_siefp(siefp.as_uint64);
>  	siefp.siefp_enabled = 1;
> -	siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
> -		>> HV_HYP_PAGE_SHIFT;
> 
> -	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> +	if (hv_isolation_type_snp()) {
> +		hv_cpu->synic_event_page =
> +			memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> +				 HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> +
> +		if (!hv_cpu->synic_event_page)
> +			pr_err("Fail to map syinc event page.\n");
> +	} else {
> +		siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
> +			>> HV_HYP_PAGE_SHIFT;
> +	}
> +	hv_set_siefp(siefp.as_uint64);
> 
>  	/* Setup the shared SINT. */
>  	if (vmbus_irq != -1)
>  		enable_percpu_irq(vmbus_irq, 0);
> -	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
> -					VMBUS_MESSAGE_SINT);
> +	hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> 
>  	shared_sint.vector = vmbus_interrupt;
>  	shared_sint.masked = false;
> @@ -233,14 +265,12 @@ void hv_synic_enable_regs(unsigned int cpu)
>  #else
>  	shared_sint.auto_eoi = 0;
>  #endif
> -	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> -				shared_sint.as_uint64);
> +	hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> 
>  	/* Enable the global synic bit */
> -	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
> +	hv_get_synic_state(sctrl.as_uint64);
>  	sctrl.enable = 1;
> -
> -	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> +	hv_set_synic_state(sctrl.as_uint64);
>  }
> 
>  int hv_synic_init(unsigned int cpu)
> @@ -257,37 +287,50 @@ int hv_synic_init(unsigned int cpu)
>   */
>  void hv_synic_disable_regs(unsigned int cpu)
>  {
> +	struct hv_per_cpu_context *hv_cpu
> +		= per_cpu_ptr(hv_context.cpu_context, cpu);
>  	union hv_synic_sint shared_sint;
>  	union hv_synic_simp simp;
>  	union hv_synic_siefp siefp;
>  	union hv_synic_scontrol sctrl;
> 
> -	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
> -					VMBUS_MESSAGE_SINT);
> -
> +	hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>  	shared_sint.masked = 1;
> +	hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> +
> 
>  	/* Need to correctly cleanup in the case of SMP!!! */
>  	/* Disable the interrupt */
> -	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> -				shared_sint.as_uint64);
> +	hv_get_simp(simp.as_uint64);
> 
> -	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> +	/*
> +	 * In Isolation VM, sim and sief pages are allocated by
> +	 * paravisor. These pages also will be used by kdump
> +	 * kernel. So just reset enable bit here and keep page
> +	 * addresses.
> +	 */
>  	simp.simp_enabled = 0;
> -	simp.base_simp_gpa = 0;
> +	if (hv_isolation_type_snp())
> +		memunmap(hv_cpu->synic_message_page);
> +	else
> +		simp.base_simp_gpa = 0;
> 
> -	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> +	hv_set_simp(simp.as_uint64);
> 
> -	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> +	hv_get_siefp(siefp.as_uint64);
>  	siefp.siefp_enabled = 0;
> -	siefp.base_siefp_gpa = 0;
> 
> -	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> +	if (hv_isolation_type_snp())
> +		memunmap(hv_cpu->synic_event_page);
> +	else
> +		siefp.base_siefp_gpa = 0;
> +
> +	hv_set_siefp(siefp.as_uint64);
> 
>  	/* Disable the global synic bit */
> -	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
> +	hv_get_synic_state(sctrl.as_uint64);
>  	sctrl.enable = 0;
> -	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> +	hv_set_synic_state(sctrl.as_uint64);
> 
>  	if (vmbus_irq != -1)
>  		disable_percpu_irq(vmbus_irq);
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index 079988ed45b9..90dac369a2dc 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -23,9 +23,16 @@
>  #include <linux/bitops.h>
>  #include <linux/cpumask.h>
>  #include <linux/nmi.h>
> +#include <asm/svm.h>
> +#include <asm/sev.h>
>  #include <asm/ptrace.h>
> +#include <asm/mshyperv.h>
>  #include <asm/hyperv-tlfs.h>
> 
> +union hv_ghcb {
> +	struct ghcb ghcb;
> +} __packed __aligned(PAGE_SIZE);
> +
>  struct ms_hyperv_info {
>  	u32 features;
>  	u32 priv_high;
> @@ -45,7 +52,7 @@ struct ms_hyperv_info {
>  			u32 Reserved12 : 20;
>  		};
>  	};
> -	void  __percpu **ghcb_base;
> +	union hv_ghcb __percpu **ghcb_base;
>  	u64 shared_gpa_boundary;
>  };
>  extern struct ms_hyperv_info ms_hyperv;
> @@ -55,6 +62,7 @@ extern void  __percpu  **hyperv_pcpu_output_arg;
> 
>  extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
>  extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
> +extern bool hv_isolation_type_snp(void);
> 
>  /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
>  static inline int hv_result(u64 status)
> @@ -149,7 +157,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
>  		 * possibly deliver another msg from the
>  		 * hypervisor
>  		 */
> -		hv_set_register(HV_REGISTER_EOM, 0);
> +		hv_signal_eom();
>  	}
>  }
> 
> --
> 2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ