lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230628211132.GS38236@hirez.programming.kicks-ass.net>
Date:   Wed, 28 Jun 2023 23:11:32 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Kai Huang <kai.huang@...el.com>
Cc:     linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        linux-mm@...ck.org, x86@...nel.org, dave.hansen@...el.com,
        kirill.shutemov@...ux.intel.com, tony.luck@...el.com,
        tglx@...utronix.de, bp@...en8.de, mingo@...hat.com, hpa@...or.com,
        seanjc@...gle.com, pbonzini@...hat.com, david@...hat.com,
        dan.j.williams@...el.com, rafael.j.wysocki@...el.com,
        ashok.raj@...el.com, reinette.chatre@...el.com,
        len.brown@...el.com, ak@...ux.intel.com, isaku.yamahata@...el.com,
        ying.huang@...el.com, chao.gao@...el.com,
        sathyanarayanan.kuppuswamy@...ux.intel.com, nik.borisov@...e.com,
        bagasdotme@...il.com, sagis@...gle.com, imammedo@...hat.com
Subject: Re: [PATCH v12 20/22] x86/virt/tdx: Allow SEAMCALL to handle #UD and
 #GP

On Wed, Jun 28, 2023 at 10:38:23PM +0200, Peter Zijlstra wrote:
> On Wed, Jun 28, 2023 at 05:29:01PM +0200, Peter Zijlstra wrote:
> > On Tue, Jun 27, 2023 at 02:12:50AM +1200, Kai Huang wrote:
> > > diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
> > > index 49a54356ae99..757b0c34be10 100644
> > > --- a/arch/x86/virt/vmx/tdx/tdxcall.S
> > > +++ b/arch/x86/virt/vmx/tdx/tdxcall.S
> > > @@ -1,6 +1,7 @@
> > >  /* SPDX-License-Identifier: GPL-2.0 */
> > >  #include <asm/asm-offsets.h>
> > >  #include <asm/tdx.h>
> > > +#include <asm/asm.h>
> > >  
> > >  /*
> > >   * TDCALL and SEAMCALL are supported in Binutils >= 2.36.
> > > @@ -45,6 +46,7 @@
> > >  	/* Leave input param 2 in RDX */
> > >  
> > >  	.if \host
> > > +1:
> > >  	seamcall
> > 
> > So what registers are actually clobbered by SEAMCALL ? There's a
> > distinct lack of it in SDM Vol.2 instruction list :-(
> 
> With the exception of the abomination that is TDH.VP.ENTER all SEAMCALLs
> seem to be limited to the set presented here (c,d,8,9,10,11) and all
> other registers should be available.
> 
> Can we please make that a hard requirement, SEAMCALL must not use
> registers outside this? We can hardly program to random future
> extentions; we need hard ABI guarantees here.
> 
> That also means we should be able to use si,di for the cmovc below.
> 
> Kirill, back when we did __tdx_hypercall() we got bp removed as a valid
> register, the 1.0 spec still lists that, and it is also listed in
> TDH.VP.ENTER, I'm assuming it will be removed there too?
> 
> bp must not be used -- it violates the pre-existing calling convention.

How's this then? Utterly untested. Not been near a compiler even.

--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -109,10 +109,26 @@ EXPORT_SYMBOL_GPL(tdx_kvm_hypercall);
  * should only be used for calls that have no legitimate reason to fail
  * or where the kernel can not survive the call failing.
  */
-static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
-				   struct tdx_module_output *out)
+static inline void _tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9)
 {
-	if (__tdx_module_call(fn, rcx, rdx, r8, r9, out))
+	struct tdx_module_args args = {
+		.rcx = rcx,
+		.rdx = rdx,
+		.r8  = r8,
+		.r9  = r9,
+	};
+	return __tdx_module_call(fn, &args);
+}
+
+static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9)
+{
+	if (_tdx_module_call(fn, rcx, rdx, r8, r9))
+		panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
+}
+
+static inline void tdx_module_call_ret(u64 fn, struct tdx_module_args *args)
+{
+	if (__tdx_module_call(fn, args))
 		panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
 }
 
@@ -134,9 +150,9 @@ int tdx_mcall_get_report0(u8 *reportdata
 {
 	u64 ret;
 
-	ret = __tdx_module_call(TDX_GET_REPORT, virt_to_phys(tdreport),
-				virt_to_phys(reportdata), TDREPORT_SUBTYPE_0,
-				0, NULL);
+	ret = _tdx_module_call(TDX_GET_REPORT,
+			       virt_to_phys(tdreport), virt_to_phys(reportdata),
+			       TDREPORT_SUBTYPE_0, 0);
 	if (ret) {
 		if (TDCALL_RETURN_CODE(ret) == TDCALL_INVALID_OPERAND)
 			return -EINVAL;
@@ -184,7 +200,7 @@ static void __noreturn tdx_panic(const c
 
 static void tdx_parse_tdinfo(u64 *cc_mask)
 {
-	struct tdx_module_output out;
+	struct tdx_module_args args = {};
 	unsigned int gpa_width;
 	u64 td_attr;
 
@@ -195,7 +211,7 @@ static void tdx_parse_tdinfo(u64 *cc_mas
 	 * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL
 	 * [TDG.VP.INFO].
 	 */
-	tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out);
+	tdx_module_call_ret(TDX_GET_INFO, &args);
 
 	/*
 	 * The highest bit of a guest physical address is the "sharing" bit.
@@ -204,7 +220,7 @@ static void tdx_parse_tdinfo(u64 *cc_mas
 	 * The GPA width that comes out of this call is critical. TDX guests
 	 * can not meaningfully run without it.
 	 */
-	gpa_width = out.rcx & GENMASK(5, 0);
+	gpa_width = args.rcx & GENMASK(5, 0);
 	*cc_mask = BIT_ULL(gpa_width - 1);
 
 	/*
@@ -212,7 +228,7 @@ static void tdx_parse_tdinfo(u64 *cc_mas
 	 * memory.  Ensure that no #VE will be delivered for accesses to
 	 * TD-private memory.  Only VMM-shared memory (MMIO) will #VE.
 	 */
-	td_attr = out.rdx;
+	td_attr = args.rdx;
 	if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
 		const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
 
@@ -620,7 +636,7 @@ __init bool tdx_early_handle_ve(struct p
 
 void tdx_get_ve_info(struct ve_info *ve)
 {
-	struct tdx_module_output out;
+	struct tdx_module_args args = {};
 
 	/*
 	 * Called during #VE handling to retrieve the #VE info from the
@@ -637,15 +653,15 @@ void tdx_get_ve_info(struct ve_info *ve)
 	 * Note, the TDX module treats virtual NMIs as inhibited if the #VE
 	 * valid flag is set. It means that NMI=>#VE will not result in a #DF.
 	 */
-	tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out);
+	tdx_module_call_ret(TDX_GET_VEINFO, &args);
 
 	/* Transfer the output parameters */
-	ve->exit_reason = out.rcx;
-	ve->exit_qual   = out.rdx;
-	ve->gla         = out.r8;
-	ve->gpa         = out.r9;
-	ve->instr_len   = lower_32_bits(out.r10);
-	ve->instr_info  = upper_32_bits(out.r10);
+	ve->exit_reason = args.rcx;
+	ve->exit_qual   = args.rdx;
+	ve->gla         = args.r8;
+	ve->gpa         = args.r9;
+	ve->instr_len   = lower_32_bits(args.r10);
+	ve->instr_info  = upper_32_bits(args.r10);
 }
 
 /*
@@ -779,7 +795,7 @@ static bool try_accept_one(phys_addr_t *
 	}
 
 	tdcall_rcx = *start | page_size;
-	if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
+	if (_tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0))
 		return false;
 
 	*start += accept_size;
@@ -857,7 +873,7 @@ void __init tdx_early_init(void)
 	cc_set_mask(cc_mask);
 
 	/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
-	tdx_module_call(TDX_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL);
+	tdx_module_call(TDX_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL);
 
 	/*
 	 * All bits above GPA width are reserved and kernel treats shared bit
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -37,7 +37,7 @@
  *
  * This is a software only structure and not part of the TDX module/VMM ABI.
  */
-struct tdx_module_output {
+struct tdx_module_args {
 	u64 rcx;
 	u64 rdx;
 	u64 r8;
@@ -67,8 +67,8 @@ struct ve_info {
 void __init tdx_early_init(void);
 
 /* Used to communicate with the TDX module */
-u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
-		      struct tdx_module_output *out);
+u64 __tdx_module_call(u64 fn, struct tdx_module_args *args);
+u64 __tdx_module_call_ret(u64 fn, struct tdx_module_args *args);
 
 void tdx_get_ve_info(struct ve_info *ve);
 
--- a/arch/x86/virt/vmx/tdx/tdxcall.S
+++ b/arch/x86/virt/vmx/tdx/tdxcall.S
@@ -17,37 +17,44 @@
  *            TDX module and hypercalls to the VMM.
  * SEAMCALL - used by TDX hosts to make requests to the
  *            TDX module.
+ *
+ *-------------------------------------------------------------------------
+ * TDCALL/SEAMCALL ABI:
+ *-------------------------------------------------------------------------
+ * Input Registers:
+ *
+ * RAX                 - TDCALL Leaf number.
+ * RCX,RDX,R8-R9       - TDCALL Leaf specific input registers.
+ *
+ * Output Registers:
+ *
+ * RAX                 - TDCALL instruction error code.
+ * RCX,RDX,R8-R11      - TDCALL Leaf specific output registers.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_module_call() function ABI:
+ *
+ * @fn   (RDI)         - TDCALL Leaf ID,    moved to RAX
+ * @regs (RSI)         - struct tdx_regs pointer
+ *
+ * Return status of TDCALL via RAX.
  */
-.macro TDX_MODULE_CALL host:req
-	/*
-	 * R12 will be used as temporary storage for struct tdx_module_output
-	 * pointer. Since R12-R15 registers are not used by TDCALL/SEAMCALL
-	 * services supported by this function, it can be reused.
-	 */
+.macro TDX_MODULE_CALL host:req ret:req
+	FRAME_BEGIN
 
-	/* Callee saved, so preserve it */
-	push %r12
+	mov	%rdi, %rax
+	mov	$TDX_SEAMCALL_VMFAILINVALID, %rdi
 
-	/*
-	 * Push output pointer to stack.
-	 * After the operation, it will be fetched into R12 register.
-	 */
-	push %r9
+	mov	TDX_MODULE_rcx(%rsi), %rcx
+	mov	TDX_MODULE_rdx(%rsi), %rdx
+	mov	TDX_MODULE_r8(%rsi),  %r8
+	mov	TDX_MODULE_r9(%rsi),  %r9
+//	mov	TDX_MODULE_r10(%rsi), %r10
+//	mov	TDX_MODULE_r11(%rsi), %r11
 
-	/* Mangle function call ABI into TDCALL/SEAMCALL ABI: */
-	/* Move Leaf ID to RAX */
-	mov %rdi, %rax
-	/* Move input 4 to R9 */
-	mov %r8,  %r9
-	/* Move input 3 to R8 */
-	mov %rcx, %r8
-	/* Move input 1 to RCX */
-	mov %rsi, %rcx
-	/* Leave input param 2 in RDX */
-
-	.if \host
-1:
-	seamcall
+.if \host
+1:	seamcall
 	/*
 	 * SEAMCALL instruction is essentially a VMExit from VMX root
 	 * mode to SEAM VMX root mode.  VMfailInvalid (CF=1) indicates
@@ -59,53 +66,30 @@
 	 * This value will never be used as actual SEAMCALL error code as
 	 * it is from the Reserved status code class.
 	 */
-	jnc .Lseamcall_out
-	mov $TDX_SEAMCALL_VMFAILINVALID, %rax
-	jmp .Lseamcall_out
+	cmovc	%rdi, %rax
 2:
-	/*
-	 * SEAMCALL caused #GP or #UD.  By reaching here %eax contains
-	 * the trap number.  Convert the trap number to the TDX error
-	 * code by setting TDX_SW_ERROR to the high 32-bits of %rax.
-	 *
-	 * Note cannot OR TDX_SW_ERROR directly to %rax as OR instruction
-	 * only accepts 32-bit immediate at most.
-	 */
-	mov $TDX_SW_ERROR, %r12
-	orq %r12, %rax
-
-	_ASM_EXTABLE_FAULT(1b, 2b)
-.Lseamcall_out:
-	.else
+.else
 	tdcall
-	.endif
-
-	/*
-	 * Fetch output pointer from stack to R12 (It is used
-	 * as temporary storage)
-	 */
-	pop %r12
+.endif
 
-	/*
-	 * Since this macro can be invoked with NULL as an output pointer,
-	 * check if caller provided an output struct before storing output
-	 * registers.
-	 *
-	 * Update output registers, even if the call failed (RAX != 0).
-	 * Other registers may contain details of the failure.
-	 */
-	test %r12, %r12
-	jz .Lno_output_struct
+.if \ret
+	movq %rcx, TDX_MODULE_rcx(%rsi)
+	movq %rdx, TDX_MODULE_rdx(%rsi)
+	movq %r8,  TDX_MODULE_r8(%rsi)
+	movq %r9,  TDX_MODULE_r9(%rsi)
+	movq %r10, TDX_MODULE_r10(%rsi)
+	movq %r11, TDX_MODULE_r11(%rsi)
+.endif
+
+	FRAME_END
+	RET
+
+.if \host
+3:
+	mov	$TDX_SW_ERROR, %rdi
+	or	%rdi, %rax
+	jmp 2b
 
-	/* Copy result registers to output struct: */
-	movq %rcx, TDX_MODULE_rcx(%r12)
-	movq %rdx, TDX_MODULE_rdx(%r12)
-	movq %r8,  TDX_MODULE_r8(%r12)
-	movq %r9,  TDX_MODULE_r9(%r12)
-	movq %r10, TDX_MODULE_r10(%r12)
-	movq %r11, TDX_MODULE_r11(%r12)
-
-.Lno_output_struct:
-	/* Restore the state of R12 register */
-	pop %r12
+	_ASM_EXTABLE_FAULT(1b, 3b)
+.endif
 .endm

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ