linux-kernel - Re: [PATCH 07/13] KVM: SVM: Add VNMI support in get/set_nmi

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y3aDTvglaSfhG8Tg@google.com>
Date:   Thu, 17 Nov 2022 18:54:06 +0000
From:   Sean Christopherson <seanjc@...gle.com>
To:     Maxim Levitsky <mlevitsk@...hat.com>
Cc:     kvm@...r.kernel.org, Paolo Bonzini <pbonzini@...hat.com>,
        Ingo Molnar <mingo@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        linux-kernel@...r.kernel.org,
        Peter Zijlstra <peterz@...radead.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Sandipan Das <sandipan.das@....com>,
        Daniel Sneddon <daniel.sneddon@...ux.intel.com>,
        Jing Liu <jing2.liu@...el.com>,
        Josh Poimboeuf <jpoimboe@...nel.org>,
        Wyes Karny <wyes.karny@....com>,
        Borislav Petkov <bp@...en8.de>,
        Babu Moger <babu.moger@....com>,
        Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
        Jim Mattson <jmattson@...gle.com>, x86@...nel.org,
        Santosh Shukla <santosh.shukla@....com>
Subject: Re: [PATCH 07/13] KVM: SVM: Add VNMI support in get/set_nmi_mask

On Thu, Nov 17, 2022, Maxim Levitsky wrote:
> From: Santosh Shukla <santosh.shukla@....com>
> 
> VMCB intr_ctrl bit12 (V_NMI_MASK) is set by the processor when handling
> NMI in guest and is cleared after the NMI is handled. Treat V_NMI_MASK
> as read-only in the hypervisor except for the SMM case where hypervisor
> before entring and after leaving SMM mode requires to set and unset
> V_NMI_MASK.
> 
> Adding API(get_vnmi_vmcb) in order to return the correct vmcb for L1 or
> L2.
> 
> Maxim:
>    - made set_vnmi_mask/clear_vnmi_mask/is_vnmi_mask warn if called
>      without vNMI enabled
>    - clear IRET intercept in svm_set_nmi_mask even with vNMI
> 
> Signed-off-by: Santosh Shukla <santosh.shukla@....com>
> Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
> ---
>  arch/x86/kvm/svm/svm.c | 18 ++++++++++++++-
>  arch/x86/kvm/svm/svm.h | 52 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 69 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 08a7b2a0a29f3a..c16f68f6c4f7d7 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3618,13 +3618,29 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
>  
>  static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
>  {
> -	return !!(vcpu->arch.hflags & HF_NMI_MASK);
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	if (is_vnmi_enabled(svm))
> +		return is_vnmi_mask_set(svm);
> +	else
> +		return !!(vcpu->arch.hflags & HF_NMI_MASK);
>  }
>  
>  static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> +	if (is_vnmi_enabled(svm)) {
> +		if (masked)
> +			set_vnmi_mask(svm);

I believe not setting INTERCEPT_IRET is correct, but only because the existing
code is unnecessary.  And this all very subtly relies on KVM_REQ_EVENT being set
and/or KVM already being in kvm_check_and_inject_events().

When NMIs become unblocked, INTERCEPT_IRET can be cleared, but KVM should also
pending KVM_REQ_EVENT.  AFAICT, that doesn't happen when this is called via the
emulator.  Ah, because em_iret() only handles RM for Intel's restricted guest
crap.  I.e. it "works" only because it never happens.  All other flows set
KVM_REQ_EVENT when toggling NMI blocking, e.g. the RSM path of kvm_smm_changed().

And when NMIs become blocked, there's no need to force INTERCEPT_IRET in this
code because kvm_check_and_inject_events() will request an NMI window and set the
intercept if necessary, and all paths that set NMI blocking are guaranteed to
reach kvm_check_and_inject_events() before entering the guest.

  1. RSM => kvm_smm_changed() sets KVM_REQ_EVENT
  2. enter_smm() is only called from within kvm_check_and_inject_events(),
     before pending NMIs are processed (yay priority)
  3. emulator_set_nmi_mask() never blocks NMIs, only does the half-baked IRET emulation
  4. kvm_vcpu_ioctl_x86_set_vcpu_event() sets KVM_REQ_EVENT

So, can you add a prep patch to drop the forced INTERCEPT_IRET?  That way the
logic for vNMI and !vNMI is the same.

> +		else {
> +			clear_vnmi_mask(svm);

This is the only code that sets/clears the vNMI mask, so rather than have set/clear
helpers, what about a single helper to do the dirty work? 

> +			if (!sev_es_guest(vcpu->kvm))
> +				svm_clr_intercept(svm, INTERCEPT_IRET);
> +		}
> +		return;
> +	}
> +
>  	if (masked) {
>  		vcpu->arch.hflags |= HF_NMI_MASK;
>  		if (!sev_es_guest(vcpu->kvm))
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index f5383104d00580..bf7f4851dee204 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -35,6 +35,7 @@ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
>  extern bool npt_enabled;
>  extern int vgif;
>  extern bool intercept_smi;
> +extern bool vnmi;
>  
>  enum avic_modes {
>  	AVIC_MODE_NONE = 0,
> @@ -531,6 +532,57 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
>  	       (msr < (APIC_BASE_MSR + 0x100));
>  }
>  
> +static inline struct vmcb *get_vnmi_vmcb(struct vcpu_svm *svm)
> +{
> +	if (!vnmi)
> +		return NULL;
> +
> +	if (is_guest_mode(&svm->vcpu))
> +		return svm->nested.vmcb02.ptr;
> +	else
> +		return svm->vmcb01.ptr;
> +}
> +
> +static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
> +{
> +	struct vmcb *vmcb = get_vnmi_vmcb(svm);
> +
> +	if (vmcb)
> +		return !!(vmcb->control.int_ctl & V_NMI_ENABLE);
> +	else
> +		return false;

Maybe just this?

	return vmcb && (vmcb->control.int_ctl & V_NMI_ENABLE);

Or if an inner helper is added:

	return vmcb && __is_vnmi_enabled(vmcb);

> +}
> +
> +static inline bool is_vnmi_mask_set(struct vcpu_svm *svm)
> +{
> +	struct vmcb *vmcb = get_vnmi_vmcb(svm);
> +
> +	if (!WARN_ON_ONCE(!vmcb))

Rather than WARN, add an inner __is_vnmi_enabled() that takes the vnmi_vmcb.
Actually, if you do that, the test/set/clear helpers can go away entirely.

> +		return false;
> +
> +	return !!(vmcb->control.int_ctl & V_NMI_MASK);
> +}
> +
> +static inline void set_vnmi_mask(struct vcpu_svm *svm)
> +{
> +	struct vmcb *vmcb = get_vnmi_vmcb(svm);
> +
> +	if (!WARN_ON_ONCE(!vmcb))
> +		return;
> +
> +	vmcb->control.int_ctl |= V_NMI_MASK;
> +}
> +
> +static inline void clear_vnmi_mask(struct vcpu_svm *svm)
> +{
> +	struct vmcb *vmcb = get_vnmi_vmcb(svm);
> +
> +	if (!WARN_ON_ONCE(!vmcb))
> +		return;
> +
> +	vmcb->control.int_ctl &= ~V_NMI_MASK;
> +}

These helpers can all go in svm.  There are no users oustide of svm.c, and
unless I'm misunderstanding how nested works, there should never be oustide users.

E.g. with HF_NMI_MASK => svm->nmi_masked, the end result can be something like:

static bool __is_vnmi_enabled(struct *vmcb)
{
	return !!(vmcb->control.int_ctl & V_NMI_ENABLE);
}

static bool is_vnmi_enabled(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = get_vnmi_vmcb(svm);

	return vmcb && __is_vnmi_enabled(vmcb);
}

static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vmcb *vmcb = get_vnmi_vmcb(svm);

	if (vmcb && __is_vnmi_enabled(vmcb))
		return !!(vmcb->control.int_ctl & V_NMI_MASK);
	else
		return !!(vcpu->arch.hflags & HF_NMI_MASK);
}

static void svm_set_or_clear_vnmi_mask(struct vmcb *vmcb, bool set)
{
	if (set)
		vmcb->control.int_ctl |= V_NMI_MASK;
	else
		vmcb->control.int_ctl &= ~V_NMI_MASK;
}

static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vmcb *vmcb = get_vnmi_vmcb(svm);

	if (vmcb && __is_vnmi_enabled(vmcb)) {
		if (masked)
			vmcb->control.int_ctl |= V_NMI_MASK;
		else
			vmcb->control.int_ctl &= ~V_NMI_MASK;
	} else {
		svm->nmi_masked = masked;
	}

	if (!masked)
		svm_disable_iret_interception(svm);
}