linux-kernel - Re: [PATCH 07/11] KVM: x86: Disable SMM for TDX

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YY6stGpsmZawyRy5@google.com>
Date:   Fri, 12 Nov 2021 18:04:36 +0000
From:   Sean Christopherson <seanjc@...gle.com>
To:     Xiaoyao Li <xiaoyao.li@...el.com>
Cc:     Paolo Bonzini <pbonzini@...hat.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, erdemaktas@...gle.com,
        Connor Kuehl <ckuehl@...hat.com>, x86@...nel.org,
        linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        isaku.yamahata@...el.com, Kai Huang <kai.huang@...el.com>
Subject: Re: [PATCH 07/11] KVM: x86: Disable SMM for TDX

On Fri, Nov 12, 2021, Xiaoyao Li wrote:
> SMM is not supported for TDX VM, nor can KVM emulate it for TDX VM.
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@...el.com>
> ---
>  arch/x86/kvm/irq_comm.c | 2 ++
>  arch/x86/kvm/x86.c      | 6 ++++++
>  arch/x86/kvm/x86.h      | 5 +++++
>  3 files changed, 13 insertions(+)
> 
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index f9f643e31893..705fc0dc0272 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -128,6 +128,8 @@ static inline bool kvm_msi_route_invalid(struct kvm *kvm,
>  			       .data = e->msi.data };
>  	return  (kvm_eoi_intercept_disallowed(kvm) &&
>  		 msg.arch_data.is_level) ||
> +		(kvm_smm_unsupported(kvm) &&
> +		 msg.arch_data.delivery_mode == APIC_DELIVERY_MODE_SMI) ||

This patch neglects to disallow SMI via IPI.  Ditto for INIT+SIPI in the next
patch.  And no small part of me thinks we shouldn't even bother handling the
delivery mode in the MSI routing.  If we reject MSI configuration, then to be
consistent we should also technically reject guest attempts to configure LVT
entries.  Sadly, KVM doesn't handle any of that stuff correctly as there are
assumptions left and right about how the guest will configure things like LVTT,
but from an architctural perspective it is legal to configure LVT0, LVT1, LVTT,
etc... to send SMI, NMI, INIT, etc...

The kvm_eoi_intercept_disallowed() part is a little different, since KVM can
deliver the interrupt, it just can handle the backend correctly.  Dropping an
event on the floor is a bit gross, but on the other hand I really don't want to
sign up for a game of whack-a-mole for all the paths that can get to
__apic_accept_irq().

E.g. I'm thinking:

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 76fb00921203..33364d3e4d02 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1112,6 +1112,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                break;

        case APIC_DM_SMI:
+               if (kvm_smi_disallowed(vcpu->kvm))
+                       break;
+
                result = 1;
                kvm_make_request(KVM_REQ_SMI, vcpu);
                kvm_vcpu_kick(vcpu);
@@ -1124,6 +1127,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                break;

        case APIC_DM_INIT:
+               if (kvm_init_disallowed(vcpu->kvm))
+                       break;
+
                if (!trig_mode || level) {
                        result = 1;
                        /* assumes that there are only KVM_APIC_INIT/SIPI */
@@ -1134,6 +1140,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                break;

        case APIC_DM_STARTUP:
+               if (kvm_sipi_disallowed(vcpu->kvm))
+                       break;
+
                result = 1;
                apic->sipi_vector = vector;
                /* make sure sipi_vector is visible for the receiver */


>  		(kvm->arch.x2apic_format && (msg.address_hi & 0xff));
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 113ed9aa5c82..1f3cc2a2d844 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4132,6 +4132,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  			r |= KVM_X86_DISABLE_EXITS_MWAIT;
>  		break;
>  	case KVM_CAP_X86_SMM:
> +		if (kvm && kvm_smm_unsupported(kvm))
> +			break;
> +
>  		/* SMBASE is usually relocated above 1M on modern chipsets,
>  		 * and SMM handlers might indeed rely on 4G segment limits,
>  		 * so do not report SMM to be available if real mode is
> @@ -4500,6 +4503,9 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
>  
>  static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
>  {
> +	if (kvm_smm_unsupported(vcpu->kvm))
> +		return -EINVAL;
> +
>  	kvm_make_request(KVM_REQ_SMI, vcpu);
>  
>  	return 0;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 65c8c77e507b..ab7c91ca2478 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -456,6 +456,11 @@ static __always_inline bool kvm_eoi_intercept_disallowed(struct kvm *kvm)
>  	return kvm->arch.vm_type == KVM_X86_TDX_VM;
>  }
>  
> +static __always_inline bool kvm_smm_unsupported(struct kvm *kvm)

This should be "kvm_smi_disallowed" to be consistent with the other helpers.  Also,
why are these all __always_inline?  Generally speaking, __always_inline should
really only be used if there is a hard dependency on the function being inlined.
I would be extremely surprised if it actually changed anything in this case, but
it's odd and unnecessary.

> +{
> +	return kvm->arch.vm_type == KVM_X86_TDX_VM;

There really needs to be a helper for this:

static inline bool is_tdx_guest(struct kvm *kvm*)
{
	return kvm->arch.vm_type == KVM_X86_TDX_VM;
}

And I think we should bite the bullet and expose SEV-ES status in x86.  Ideally,
we would not have had to do that, but TDX and SEV diverge just enough that a single
guest_state_protected doesn't suffice :-(  Whining aside, exposing TDX in x86 but
not SEV-ES will create a weird split where some things are handled in common x86
and others are deferred to vendor code.

And I think it would make sense to tie the "smi disallowed" part to whether or
not KVM can emulate an instruction, because that's really the issue.  E.g.

static inline bool kvm_smi_disallowed(struct kvm *kvm)
{
	/* SMM requires emulation in KVM. */
	return __kvm_can_emulate_instruction(kvm);
}


And then the existing kvm_x86_ops.can_emulation_instruction() can be folded into
a helper that checks both the "can this VM emulating _anything_" as well as the
"can this specific instruction be emulated".

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 21bb81710e0f..7af4393ccecd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4465,12 +4465,6 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i
        bool smep, smap, is_user;
        unsigned long cr4;

-       /*
-        * When the guest is an SEV-ES guest, emulation is not possible.
-        */
-       if (sev_es_guest(vcpu->kvm))
-               return false;
-
        /*
         * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
         *
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a0440e22ede..c34f653e2546 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6717,6 +6717,18 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);

+static bool __kvm_can_emulate_instruction(struct kvm *kvm)
+{
+       return !is_sev_guest(kvm) && !is_tdx_guest(kvm);
+}
+
+static bool kvm_can_emulate_instruction(struct kvm_vcpu *vcpu,
+                                       void *insn, int insn_len)
+{
+       return __kvm_can_emulate_instruction(vcpu->kvm) &&
+              static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0);
+}
+
 int handle_ud(struct kvm_vcpu *vcpu)
 {
        static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
@@ -6724,7 +6736,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
        char sig[5]; /* ud2; .ascii "kvm" */
        struct x86_exception e;

-       if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0)))
+       if (unlikely(!kvm_can_emulate_instruction(vcpu, NULL, 0)))
                return 1;

        if (force_emulation_prefix &&
@@ -8071,7 +8083,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
        bool writeback = true;
        bool write_fault_to_spt;

-       if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len)))
+       if (unlikely(!kvm_can_emulate_instruction(vcpu, insn, insn_len)))
                return 1;

        vcpu->arch.l1tf_flush_l1d = true;