[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <819bd98b-2a60-4107-8e13-41f1e4c706b1@linux.intel.com>
Date: Wed, 17 Sep 2025 16:45:46 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org, Tom Lendacky <thomas.lendacky@....com>,
Mathias Krause <minipli@...ecurity.net>, John Allen <john.allen@....com>,
Rick Edgecombe <rick.p.edgecombe@...el.com>, Chao Gao <chao.gao@...el.com>,
Maxim Levitsky <mlevitsk@...hat.com>, Xiaoyao Li <xiaoyao.li@...el.com>,
Zhang Yi Z <yi.z.zhang@...ux.intel.com>
Subject: Re: [PATCH v15 18/41] KVM: x86: Don't emulate instructions affected
by CET features
On 9/13/2025 7:22 AM, Sean Christopherson wrote:
> From: Yang Weijiang <weijiang.yang@...el.com>
>
> Don't emulate branch instructions, e.g. CALL/RET/JMP etc., that are
> affected by Shadow Stacks and/or Indirect Branch Tracking when said
> features are enabled in the guest, as fully emulating CET would require
> significant complexity for no practical benefit (KVM shouldn't need to
> emulate branch instructions on modern hosts). Simply doing nothing isn't
> an option as that would allow a malicious entity to subvert CET
> protections via the emulator.
>
> Note! On far transfers, do NOT consult the current privilege level and
> instead treat SHSTK/IBT as being enabled if they're enabled for User *or*
> Supervisor mode. On inter-privilege level far transfers, SHSTK and IBT
> can be in play for the target privilege level, i.e. checking the current
> privilege could get a false negative, and KVM doesn't know the target
> privilege level until emulation gets under way.
About the emulator, there is a VMX exit reason EXIT_REASON_TASK_SWITCH.
The VM Exit triggers the following path:
EXIT_REASON_TASK_SWITCH
handle_task_switch
kvm_task_switch
emulator_task_switch
According to SDM, in Vol 3 Chapter "Task Management", section "Executing a Task"
"If shadow stack is enabled, then the SSP of the task is located at the 4 bytes
at offset 104 in the 32-bit TSS and is used by the processor to establish the
SSP when a task switch occurs from a task associated with this TSS. Note that
the processor does not write the SSP of the task initiating the task switch to
the TSS of that task, and instead the SSP of the previous task is pushed onto
the shadow stack of the new task."
This case is not covered, although using CET in 32-bit guests should be a corner
case.
>
> Suggested-by: Chao Gao <chao.gao@...el.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@...el.com>
> Cc: Mathias Krause <minipli@...ecurity.net>
> Cc: John Allen <john.allen@....com>
> Cc: Rick Edgecombe <rick.p.edgecombe@...el.com>
> Signed-off-by: Chao Gao <chao.gao@...el.com>
> Co-developed-by: Sean Christopherson <seanjc@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/kvm/emulate.c | 58 ++++++++++++++++++++++++++++++++++--------
> 1 file changed, 47 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 542d3664afa3..e4be54a677b0 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -178,6 +178,8 @@
> #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
> #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
> #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
> +#define ShadowStack ((u64)1 << 57) /* Instruction protected by Shadow Stack. */
> +#define IndirBrnTrk ((u64)1 << 58) /* Instruction protected by IBT. */
>
> #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
>
> @@ -4068,9 +4070,9 @@ static const struct opcode group4[] = {
> static const struct opcode group5[] = {
> F(DstMem | SrcNone | Lock, em_inc),
> F(DstMem | SrcNone | Lock, em_dec),
> - I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
> - I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
> - I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
> + I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk, em_call_near_abs),
> + I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk, em_call_far),
> + I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs),
> I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
> I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
> };
> @@ -4332,11 +4334,11 @@ static const struct opcode opcode_table[256] = {
> /* 0xC8 - 0xCF */
> I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
> I(Stack | IsBranch, em_leave),
> - I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
> - I(ImplicitOps | IsBranch, em_ret_far),
> - D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
> + I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
> + I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
> + D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
> D(ImplicitOps | No64 | IsBranch),
> - II(ImplicitOps | IsBranch, em_iret, iret),
> + II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
> /* 0xD0 - 0xD7 */
> G(Src2One | ByteOp, group2), G(Src2One, group2),
> G(Src2CL | ByteOp, group2), G(Src2CL, group2),
> @@ -4352,7 +4354,7 @@ static const struct opcode opcode_table[256] = {
> I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
> I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
> /* 0xE8 - 0xEF */
> - I(SrcImm | NearBranch | IsBranch, em_call),
> + I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
> D(SrcImm | ImplicitOps | NearBranch | IsBranch),
> I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
> D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
> @@ -4371,7 +4373,7 @@ static const struct opcode opcode_table[256] = {
> static const struct opcode twobyte_table[256] = {
> /* 0x00 - 0x0F */
> G(0, group6), GD(0, &group7), N, N,
> - N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
> + N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk, em_syscall),
> II(ImplicitOps | Priv, em_clts, clts), N,
> DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
> N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
> @@ -4402,8 +4404,8 @@ static const struct opcode twobyte_table[256] = {
> IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
> II(ImplicitOps | Priv, em_rdmsr, rdmsr),
> IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
> - I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
> - I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
> + I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk, em_sysenter),
> + I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
> N, N,
> N, N, N, N, N, N, N, N,
> /* 0x40 - 0x4F */
> @@ -4941,6 +4943,40 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
> if (ctxt->d == 0)
> return EMULATION_FAILED;
>
> + /*
> + * Reject emulation if KVM might need to emulate shadow stack updates
> + * and/or indirect branch tracking enforcement, which the emulator
> + * doesn't support.
> + */
> + if (opcode.flags & (ShadowStack | IndirBrnTrk) &&
> + ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
> + u64 u_cet = 0, s_cet = 0;
> +
> + /*
> + * Check both User and Supervisor on far transfers as inter-
> + * privilege level transfers are impacted by CET at the target
> + * privilege levels, and that is not known at this time. The
> + * the expectation is that the guest will not require emulation
> + * of any CET-affected instructions at any privilege level.
> + */
> + if (!(opcode.flags & NearBranch))
> + u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
> + else if (ctxt->ops->cpl(ctxt) == 3)
> + u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
> + else
> + s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
> +
> + if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
> + (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
> + return EMULATION_FAILED;
> +
> + if ((u_cet | s_cet) & CET_SHSTK_EN && opcode.flags & ShadowStack)
> + return EMULATION_FAILED;
> +
> + if ((u_cet | s_cet) & CET_ENDBR_EN && opcode.flags & IndirBrnTrk)
> + return EMULATION_FAILED;
> + }
> +
> ctxt->execute = opcode.u.execute;
>
> if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
Powered by blists - more mailing lists