lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <92e19d74-447f-19e0-d9ec-8a3f12f04927@intel.com>
Date:   Wed, 7 Jun 2023 07:24:23 -0700
From:   Dave Hansen <dave.hansen@...el.com>
To:     Kai Huang <kai.huang@...el.com>, linux-kernel@...r.kernel.org,
        kvm@...r.kernel.org
Cc:     linux-mm@...ck.org, kirill.shutemov@...ux.intel.com,
        tony.luck@...el.com, peterz@...radead.org, tglx@...utronix.de,
        seanjc@...gle.com, pbonzini@...hat.com, david@...hat.com,
        dan.j.williams@...el.com, rafael.j.wysocki@...el.com,
        ying.huang@...el.com, reinette.chatre@...el.com,
        len.brown@...el.com, ak@...ux.intel.com, isaku.yamahata@...el.com,
        chao.gao@...el.com, sathyanarayanan.kuppuswamy@...ux.intel.com,
        bagasdotme@...il.com, sagis@...gle.com, imammedo@...hat.com
Subject: Re: [PATCH v11 05/20] x86/virt/tdx: Add SEAMCALL infrastructure

On 6/4/23 07:27, Kai Huang wrote:
> TDX introduces a new CPU mode: Secure Arbitration Mode (SEAM).  This
> mode runs only the TDX module itself or other code to load the TDX
> module.
> 
> The host kernel communicates with SEAM software via a new SEAMCALL
> instruction.  This is conceptually similar to a guest->host hypercall,
> except it is made from the host to SEAM software instead.  The TDX
> module establishes a new SEAMCALL ABI which allows the host to
> initialize the module and to manage VMs.
> 
> Add infrastructure to make SEAMCALLs.  The SEAMCALL ABI is very similar
> to the TDCALL ABI and leverages much TDCALL infrastructure.
> 
> SEAMCALL instruction causes #GP when TDX isn't BIOS enabled, and #UD
> when CPU is not in VMX operation.  Currently, only KVM code mocks with

"mocks"?  Did you mean "mucks"?

> VMX enabling, and KVM is the only user of TDX.  This implementation
> chooses to make KVM itself responsible for enabling VMX before using
> TDX and let the rest of the kernel stay blissfully unaware of VMX.
> 
> The current TDX_MODULE_CALL macro handles neither #GP nor #UD.  The
> kernel would hit Oops if SEAMCALL were mistakenly made w/o enabling VMX
> first.  Architecturally, there is no CPU flag to check whether the CPU
> is in VMX operation.  Also, if a BIOS were buggy, it could still report
> valid TDX private KeyIDs when TDX actually couldn't be enabled.

I'm not sure this is a great justification.  If the BIOS is lying to the
OS, we _should_ oops.

How else can this happen other than silly kernel bugs.  It's OK to oops
in the face of silly kernel bugs.

...
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 4dfe2e794411..b489b5b9de5d 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -8,6 +8,8 @@
>  #include <asm/ptrace.h>
>  #include <asm/shared/tdx.h>
>  
> +#include <asm/trapnr.h>
> +
>  /*
>   * SW-defined error codes.
>   *
> @@ -18,6 +20,9 @@
>  #define TDX_SW_ERROR			(TDX_ERROR | GENMASK_ULL(47, 40))
>  #define TDX_SEAMCALL_VMFAILINVALID	(TDX_SW_ERROR | _UL(0xFFFF0000))
>  
> +#define TDX_SEAMCALL_GP			(TDX_SW_ERROR | X86_TRAP_GP)
> +#define TDX_SEAMCALL_UD			(TDX_SW_ERROR | X86_TRAP_UD)
> +
>  #ifndef __ASSEMBLY__
>  
>  /* TDX supported page sizes from the TDX module ABI. */
> diff --git a/arch/x86/virt/vmx/tdx/Makefile b/arch/x86/virt/vmx/tdx/Makefile
> index 93ca8b73e1f1..38d534f2c113 100644
> --- a/arch/x86/virt/vmx/tdx/Makefile
> +++ b/arch/x86/virt/vmx/tdx/Makefile
> @@ -1,2 +1,2 @@
>  # SPDX-License-Identifier: GPL-2.0-only
> -obj-y += tdx.o
> +obj-y += tdx.o seamcall.o
> diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S
> new file mode 100644
> index 000000000000..f81be6b9c133
> --- /dev/null
> +++ b/arch/x86/virt/vmx/tdx/seamcall.S
> @@ -0,0 +1,52 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#include <linux/linkage.h>
> +#include <asm/frame.h>
> +
> +#include "tdxcall.S"
> +
> +/*
> + * __seamcall() - Host-side interface functions to SEAM software module
> + *		  (the P-SEAMLDR or the TDX module).
> + *
> + * Transform function call register arguments into the SEAMCALL register
> + * ABI.  Return TDX_SEAMCALL_VMFAILINVALID if the SEAMCALL itself fails,
> + * or the completion status of the SEAMCALL leaf function.  Additional
> + * output operands are saved in @out (if it is provided by the caller).
> + *
> + *-------------------------------------------------------------------------
> + * SEAMCALL ABI:
> + *-------------------------------------------------------------------------
> + * Input Registers:
> + *
> + * RAX                 - SEAMCALL Leaf number.
> + * RCX,RDX,R8-R9       - SEAMCALL Leaf specific input registers.
> + *
> + * Output Registers:
> + *
> + * RAX                 - SEAMCALL completion status code.
> + * RCX,RDX,R8-R11      - SEAMCALL Leaf specific output registers.
> + *
> + *-------------------------------------------------------------------------
> + *
> + * __seamcall() function ABI:
> + *
> + * @fn  (RDI)          - SEAMCALL Leaf number, moved to RAX
> + * @rcx (RSI)          - Input parameter 1, moved to RCX
> + * @rdx (RDX)          - Input parameter 2, moved to RDX
> + * @r8  (RCX)          - Input parameter 3, moved to R8
> + * @r9  (R8)           - Input parameter 4, moved to R9
> + *
> + * @out (R9)           - struct tdx_module_output pointer
> + *			 stored temporarily in R12 (not
> + *			 used by the P-SEAMLDR or the TDX
> + *			 module). It can be NULL.
> + *
> + * Return (via RAX) the completion status of the SEAMCALL, or
> + * TDX_SEAMCALL_VMFAILINVALID.
> + */
> +SYM_FUNC_START(__seamcall)
> +	FRAME_BEGIN
> +	TDX_MODULE_CALL host=1
> +	FRAME_END
> +	RET
> +SYM_FUNC_END(__seamcall)
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index 2d91e7120c90..e82713dd5d54 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -12,14 +12,70 @@
>  #include <linux/init.h>
>  #include <linux/errno.h>
>  #include <linux/printk.h>
> +#include <linux/smp.h>
>  #include <asm/msr-index.h>
>  #include <asm/msr.h>
>  #include <asm/tdx.h>
> +#include "tdx.h"
>  
>  static u32 tdx_global_keyid __ro_after_init;
>  static u32 tdx_guest_keyid_start __ro_after_init;
>  static u32 tdx_nr_guest_keyids __ro_after_init;
>  
> +/*
> + * Wrapper of __seamcall() to convert SEAMCALL leaf function error code
> + * to kernel error code.  @seamcall_ret and @out contain the SEAMCALL
> + * leaf function return code and the additional output respectively if
> + * not NULL.
> + */
> +static int __always_unused seamcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
> +				    u64 *seamcall_ret,
> +				    struct tdx_module_output *out)
> +{
> +	int cpu, ret = 0;
> +	u64 sret;
> +
> +	/* Need a stable CPU id for printing error message */
> +	cpu = get_cpu();
> +
> +	sret = __seamcall(fn, rcx, rdx, r8, r9, out);
> +
> +	/* Save SEAMCALL return code if the caller wants it */
> +	if (seamcall_ret)
> +		*seamcall_ret = sret;
> +
> +	/* SEAMCALL was successful */
> +	if (!sret)
> +		goto out;
> +
> +	switch (sret) {
> +	case TDX_SEAMCALL_GP:
> +		pr_err_once("[firmware bug]: TDX is not enabled by BIOS.\n");
> +		ret = -ENODEV;
> +		break;
> +	case TDX_SEAMCALL_VMFAILINVALID:
> +		pr_err_once("TDX module is not loaded.\n");
> +		ret = -ENODEV;
> +		break;
> +	case TDX_SEAMCALL_UD:
> +		pr_err_once("SEAMCALL failed: CPU %d is not in VMX operation.\n",
> +				cpu);
> +		ret = -EINVAL;
> +		break;
> +	default:
> +		pr_err_once("SEAMCALL failed: CPU %d: leaf %llu, error 0x%llx.\n",
> +				cpu, fn, sret);
> +		if (out)
> +			pr_err_once("additional output: rcx 0x%llx, rdx 0x%llx, r8 0x%llx, r9 0x%llx, r10 0x%llx, r11 0x%llx.\n",
> +					out->rcx, out->rdx, out->r8,
> +					out->r9, out->r10, out->r11);
> +		ret = -EIO;
> +	}
> +out:
> +	put_cpu();
> +	return ret;
> +}

This fails to distinguish two very different things:

 * A SEAMCALL error
and
 * A SEAMCALL *failure*

"Errors" are normal.  Hypercalls can return errors and so can SEAMCALLs.
 No biggie.

But SEAMCALL failures are another matter.  They mean that something
really fundamental is *BROKEN*.

Just saying "SEAMCALL was successful" is a bit ambiguous for me.

> diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
> new file mode 100644
> index 000000000000..48ad1a1ba737
> --- /dev/null
> +++ b/arch/x86/virt/vmx/tdx/tdx.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _X86_VIRT_TDX_H
> +#define _X86_VIRT_TDX_H
> +
> +#include <linux/types.h>
> +
> +struct tdx_module_output;
> +u64 __seamcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
> +	       struct tdx_module_output *out);
> +#endif
> diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
> index 49a54356ae99..757b0c34be10 100644
> --- a/arch/x86/virt/vmx/tdx/tdxcall.S
> +++ b/arch/x86/virt/vmx/tdx/tdxcall.S
> @@ -1,6 +1,7 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
>  #include <asm/asm-offsets.h>
>  #include <asm/tdx.h>
> +#include <asm/asm.h>
>  
>  /*
>   * TDCALL and SEAMCALL are supported in Binutils >= 2.36.
> @@ -45,6 +46,7 @@
>  	/* Leave input param 2 in RDX */
>  
>  	.if \host
> +1:
>  	seamcall
>  	/*
>  	 * SEAMCALL instruction is essentially a VMExit from VMX root
> @@ -57,10 +59,23 @@
>  	 * This value will never be used as actual SEAMCALL error code as
>  	 * it is from the Reserved status code class.
>  	 */
> -	jnc .Lno_vmfailinvalid
> +	jnc .Lseamcall_out
>  	mov $TDX_SEAMCALL_VMFAILINVALID, %rax
> -.Lno_vmfailinvalid:
> +	jmp .Lseamcall_out
> +2:
> +	/*
> +	 * SEAMCALL caused #GP or #UD.  By reaching here %eax contains
> +	 * the trap number.  Convert the trap number to the TDX error
> +	 * code by setting TDX_SW_ERROR to the high 32-bits of %rax.
> +	 *
> +	 * Note cannot OR TDX_SW_ERROR directly to %rax as OR instruction
> +	 * only accepts 32-bit immediate at most.
> +	 */
> +	mov $TDX_SW_ERROR, %r12
> +	orq %r12, %rax

I think the justification for doing the #UD/#GP handling is a bit weak.
In the end, it gets us a nicer error message.  Is that error message
*REALLY* needed?  Or is an oops OK in the very rare circumstance that
the BIOS is totally buggy?

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ