[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <6596E9D7-E0B9-4AEA-BC39-2A637B401DC1@zytor.com>
Date: Wed, 22 Oct 2025 18:20:43 -0700
From: Xin Li <xin@...or.com>
To: linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
linux-doc@...r.kernel.org, Dave Hansen <dave.hansen@...ux.intel.com>
Cc: pbonzini@...hat.com, seanjc@...gle.com, corbet@....net, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, x86@...nel.org, hpa@...or.com,
luto@...nel.org, peterz@...radead.org, andrew.cooper3@...rix.com,
chao.gao@...el.com, hch@...radead.org
Subject: Re: [PATCH v8 05/21] x86/cea: Export API for per-CPU exception stacks for KVM
> Convert the __this_cpu_ist_{top,bottom}_va() macros into proper functions,
> and export __this_cpu_ist_top_va() to allow KVM to retrieve the top of the
> per-CPU exception stack.
>
> FRED introduced new fields in the host-state area of the VMCS for stack
> levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively corresponding to
> per-CPU exception stacks for #DB, NMI and #DF. KVM must populate these
> fields each time a vCPU is loaded onto a CPU.
>
> To simplify access to the exception stacks in struct cea_exception_stacks,
> a union is used to create an array alias, enabling array-style indexing of
> the stack entries.
>
> Signed-off-by: Xin Li (Intel) <xin@...or.com>
Dave, can you please help to review patch 4 & 5?
Thanks!
Xin
> ---
>
> Change in v7:
> * Remove Suggested-bys (Dave Hansen).
> * Move rename code in a separate patch (Dave Hansen).
> * Access cea_exception_stacks using array indexing (Dave Hansen).
> * Use BUILD_BUG_ON(ESTACK_DF != 0) to ensure the starting index is 0
> (Dave Hansen).
>
> Change in v5:
> * Export accessor instead of data (Christoph Hellwig).
> * Add TB from Xuelian Guo.
>
> Change in v4:
> * Rewrite the change log and add comments to the export (Dave Hansen).
> ---
> arch/x86/include/asm/cpu_entry_area.h | 51 +++++++++++++--------------
> arch/x86/mm/cpu_entry_area.c | 25 +++++++++++++
> 2 files changed, 50 insertions(+), 26 deletions(-)
>
> diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
> index d0f884c28178..58cd71144e5e 100644
> --- a/arch/x86/include/asm/cpu_entry_area.h
> +++ b/arch/x86/include/asm/cpu_entry_area.h
> @@ -16,6 +16,19 @@
> #define VC_EXCEPTION_STKSZ 0
> #endif
>
> +/*
> + * The exception stack ordering in [cea_]exception_stacks
> + */
> +enum exception_stack_ordering {
> + ESTACK_DF,
> + ESTACK_NMI,
> + ESTACK_DB,
> + ESTACK_MCE,
> + ESTACK_VC,
> + ESTACK_VC2,
> + N_EXCEPTION_STACKS
> +};
> +
> /* Macro to enforce the same ordering and stack sizes */
> #define ESTACKS_MEMBERS(guardsize, optional_stack_size) \
> char ESTACK_DF_stack_guard[guardsize]; \
> @@ -39,37 +52,29 @@ struct exception_stacks {
>
> /* The effective cpu entry area mapping with guard pages. */
> struct cea_exception_stacks {
> - ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
> -};
> -
> -/*
> - * The exception stack ordering in [cea_]exception_stacks
> - */
> -enum exception_stack_ordering {
> - ESTACK_DF,
> - ESTACK_NMI,
> - ESTACK_DB,
> - ESTACK_MCE,
> - ESTACK_VC,
> - ESTACK_VC2,
> - N_EXCEPTION_STACKS
> + union{
> + struct {
> + ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
> + };
> + struct {
> + char stack_guard[PAGE_SIZE];
> + char stack[EXCEPTION_STKSZ];
> + } event_stacks[N_EXCEPTION_STACKS];
> + };
> };
>
> #define CEA_ESTACK_SIZE(st) \
> sizeof(((struct cea_exception_stacks *)0)->st## _stack)
>
> -#define CEA_ESTACK_BOT(ceastp, st) \
> - ((unsigned long)&(ceastp)->st## _stack)
> -
> -#define CEA_ESTACK_TOP(ceastp, st) \
> - (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
> -
> #define CEA_ESTACK_OFFS(st) \
> offsetof(struct cea_exception_stacks, st## _stack)
>
> #define CEA_ESTACK_PAGES \
> (sizeof(struct cea_exception_stacks) / PAGE_SIZE)
>
> +extern unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack);
> +extern unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack);
> +
> #endif
>
> #ifdef CONFIG_X86_32
> @@ -144,10 +149,4 @@ static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
> return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
> }
>
> -#define __this_cpu_ist_top_va(name) \
> - CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
> -
> -#define __this_cpu_ist_bottom_va(name) \
> - CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name)
> -
> #endif
> diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
> index 9fa371af8abc..595c2e03ddd5 100644
> --- a/arch/x86/mm/cpu_entry_area.c
> +++ b/arch/x86/mm/cpu_entry_area.c
> @@ -18,6 +18,31 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage)
> static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
> DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
>
> +/*
> + * FRED introduced new fields in the host-state area of the VMCS for
> + * stack levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively
> + * corresponding to per CPU stacks for #DB, NMI and #DF. KVM must
> + * populate these each time a vCPU is loaded onto a CPU.
> + *
> + * Called from entry code, so must be noinstr.
> + */
> +noinstr unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack)
> +{
> + struct cea_exception_stacks *s;
> +
> + BUILD_BUG_ON(ESTACK_DF != 0);
> +
> + s = __this_cpu_read(cea_exception_stacks);
> +
> + return (unsigned long)&s->event_stacks[stack].stack;
> +}
> +
> +noinstr unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack)
> +{
> + return __this_cpu_ist_bottom_va(stack) + EXCEPTION_STKSZ;
> +}
> +EXPORT_SYMBOL(__this_cpu_ist_top_va);
> +
> static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
>
> static __always_inline unsigned int cea_offset(unsigned int cpu)
> --
> 2.51.0
>
>
Powered by blists - more mailing lists