[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3d579a8c2558391ff6e33e7b45527a83aa67c7f5.camel@intel.com>
Date: Fri, 25 Jul 2025 17:06:17 +0000
From: "Edgecombe, Rick P" <rick.p.edgecombe@...el.com>
To: "masahiroy@...nel.org" <masahiroy@...nel.org>, "rppt@...nel.org"
<rppt@...nel.org>, "lorenzo.stoakes@...cle.com" <lorenzo.stoakes@...cle.com>,
"justinstitt@...gle.com" <justinstitt@...gle.com>,
"nick.desaulniers+lkml@...il.com" <nick.desaulniers+lkml@...il.com>,
"david@...hat.com" <david@...hat.com>, "debug@...osinc.com"
<debug@...osinc.com>, "vbabka@...e.cz" <vbabka@...e.cz>, "morbo@...gle.com"
<morbo@...gle.com>, "palmer@...belt.com" <palmer@...belt.com>,
"akpm@...ux-foundation.org" <akpm@...ux-foundation.org>,
"Liam.Howlett@...cle.com" <Liam.Howlett@...cle.com>,
"nicolas.schier@...ux.dev" <nicolas.schier@...ux.dev>, "surenb@...gle.com"
<surenb@...gle.com>, "monk.chiang@...ive.com" <monk.chiang@...ive.com>,
"nathan@...nel.org" <nathan@...nel.org>, "kito.cheng@...ive.com"
<kito.cheng@...ive.com>, "paul.walmsley@...ive.com"
<paul.walmsley@...ive.com>, "aou@...s.berkeley.edu" <aou@...s.berkeley.edu>,
"mhocko@...e.com" <mhocko@...e.com>, "alex@...ti.fr" <alex@...ti.fr>
CC: "andrew@...ive.com" <andrew@...ive.com>, "samitolvanen@...gle.com"
<samitolvanen@...gle.com>, "cleger@...osinc.com" <cleger@...osinc.com>,
"llvm@...ts.linux.dev" <llvm@...ts.linux.dev>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, "bjorn@...osinc.com" <bjorn@...osinc.com>,
"fweimer@...hat.com" <fweimer@...hat.com>,
"heinrich.schuchardt@...onical.com" <heinrich.schuchardt@...onical.com>,
"linux-mm@...ck.org" <linux-mm@...ck.org>, "conor.dooley@...rochip.com"
<conor.dooley@...rochip.com>, "ved@...osinc.com" <ved@...osinc.com>,
"samuel.holland@...ive.com" <samuel.holland@...ive.com>,
"charlie@...osinc.com" <charlie@...osinc.com>, "jeffreyalaw@...il.com"
<jeffreyalaw@...il.com>, "linux-kbuild@...r.kernel.org"
<linux-kbuild@...r.kernel.org>, "ajones@...tanamicro.com"
<ajones@...tanamicro.com>, "apatel@...tanamicro.com"
<apatel@...tanamicro.com>, "linux-riscv@...ts.infradead.org"
<linux-riscv@...ts.infradead.org>, "broonie@...nel.org" <broonie@...nel.org>
Subject: Re: [PATCH 10/11] scs: generic scs code updated to leverage hw
assisted shadow stack
On Thu, 2025-07-24 at 16:37 -0700, Deepak Gupta wrote:
> If shadow stack have memory protections from underlying cpu, use those
> protections. arches can define PAGE_KERNEL_SHADOWSTACK to vmalloc such shadow
> stack pages. Hw assisted shadow stack pages grow downwards like regular
> stack. Clang based software shadow call stack grows low to high address.
> Thus this patch addresses some of those needs due to opposite direction
> of shadow stack. Furthermore, hw shadow stack can't be memset because memset
> uses normal stores. Lastly to store magic word at base of shadow stack, arch
> specific shadow stack store has to be performed.
>
> Signed-off-by: Deepak Gupta <debug@...osinc.com>
> ---
> include/linux/scs.h | 26 +++++++++++++++++++++++++-
> kernel/scs.c | 38 +++++++++++++++++++++++++++++++++++---
> 2 files changed, 60 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/scs.h b/include/linux/scs.h
> index 4ab5bdc898cf..6ceee07c2d1a 100644
> --- a/include/linux/scs.h
> +++ b/include/linux/scs.h
> @@ -12,6 +12,7 @@
> #include <linux/poison.h>
> #include <linux/sched.h>
> #include <linux/sizes.h>
> +#include <asm/scs.h>
>
> #ifdef CONFIG_SHADOW_CALL_STACK
>
> @@ -37,22 +38,45 @@ static inline void scs_task_reset(struct task_struct *tsk)
> * Reset the shadow stack to the base address in case the task
> * is reused.
> */
> +#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> + task_scs_sp(tsk) = task_scs(tsk) + SCS_SIZE;
> +#else
> task_scs_sp(tsk) = task_scs(tsk);
> +#endif
> }
>
> static inline unsigned long *__scs_magic(void *s)
> {
> +#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> + return (unsigned long *)(s);
> +#else
> return (unsigned long *)(s + SCS_SIZE) - 1;
> +#endif
> }
>
> static inline bool task_scs_end_corrupted(struct task_struct *tsk)
> {
> unsigned long *magic = __scs_magic(task_scs(tsk));
> - unsigned long sz = task_scs_sp(tsk) - task_scs(tsk);
> + unsigned long sz;
> +
> +#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> + sz = (task_scs(tsk) + SCS_SIZE) - task_scs_sp(tsk);
> +#else
> + sz = task_scs_sp(tsk) - task_scs(tsk);
> +#endif
>
> return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
> }
>
> +static inline void __scs_store_magic(unsigned long *s, unsigned long magic_val)
> +{
> +#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> + arch_scs_store(s, magic_val);
> +#else
> + *__scs_magic(s) = magic_val;
> +#endif
> +}
> +
> DECLARE_STATIC_KEY_FALSE(dynamic_scs_enabled);
>
> static inline bool scs_is_dynamic(void)
> diff --git a/kernel/scs.c b/kernel/scs.c
> index d7809affe740..5910c0a8eabd 100644
> --- a/kernel/scs.c
> +++ b/kernel/scs.c
> @@ -11,6 +11,7 @@
> #include <linux/scs.h>
> #include <linux/vmalloc.h>
> #include <linux/vmstat.h>
> +#include <asm-generic/set_memory.h>
>
> #ifdef CONFIG_DYNAMIC_SCS
> DEFINE_STATIC_KEY_FALSE(dynamic_scs_enabled);
> @@ -32,19 +33,31 @@ static void *__scs_alloc(int node)
> {
> int i;
> void *s;
> + pgprot_t prot = PAGE_KERNEL;
> +
> +#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> + prot = PAGE_KERNEL_SHADOWSTACK;
> +#endif
>
> for (i = 0; i < NR_CACHED_SCS; i++) {
> s = this_cpu_xchg(scs_cache[i], NULL);
> if (s) {
> s = kasan_unpoison_vmalloc(s, SCS_SIZE,
> KASAN_VMALLOC_PROT_NORMAL);
> +/*
> + * If software shadow stack, its safe to memset. Else memset is not
> + * possible on hw protected shadow stack. memset constitutes stores and
> + * stores to shadow stack memory are disallowed and will fault.
> + */
> +#ifndef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> memset(s, 0, SCS_SIZE);
> +#endif
> goto out;
> }
> }
>
> s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
> - GFP_SCS, PAGE_KERNEL, 0, node,
> + GFP_SCS, prot, 0, node,
> __builtin_return_address(0));
This doesn't update the direct map alias I think. Do you want to protect it?
>
> out:
> @@ -59,7 +72,7 @@ void *scs_alloc(int node)
> if (!s)
> return NULL;
>
> - *__scs_magic(s) = SCS_END_MAGIC;
> + __scs_store_magic(__scs_magic(s), SCS_END_MAGIC);
>
> /*
> * Poison the allocation to catch unintentional accesses to
> @@ -87,6 +100,16 @@ void scs_free(void *s)
> return;
>
> kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_PROT_NORMAL);
> + /*
> + * Hardware protected shadow stack is not writeable by regular stores
> + * Thus adding this back to free list will raise faults by vmalloc
> + * It needs to be writeable again. It's good sanity as well because
> + * then it can't be inadvertently accesses and if done, it will fault.
> + */
> +#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> + set_memory_rw((unsigned long)s, (SCS_SIZE/PAGE_SIZE));
Above you don't update the direct map permissions. So I don't think you need
this. vmalloc should flush the permissioned mapping before re-using it with the
lazy cleanup scheme.
> +#endif
> +
I was thinking someday when we get to this for CET we would protect the direct
map, and so would need some pool of shadow stacks because flushing the TLB for
every thread alloc/free would likely be too impactful.
> vfree_atomic(s);
> }
>
> @@ -96,6 +119,9 @@ static int scs_cleanup(unsigned int cpu)
> void **cache = per_cpu_ptr(scs_cache, cpu);
>
> for (i = 0; i < NR_CACHED_SCS; i++) {
Oh! There is a cache, but the size is only 2.
> +#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> + set_memory_rw((unsigned long)cache[i], (SCS_SIZE/PAGE_SIZE));
> +#endif
> vfree(cache[i]);
> cache[i] = NULL;
> }
> @@ -122,7 +148,13 @@ int scs_prepare(struct task_struct *tsk, int node)
> if (!s)
> return -ENOMEM;
>
> - task_scs(tsk) = task_scs_sp(tsk) = s;
> + task_scs(tsk) = s;
> +#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
> + task_scs_sp(tsk) = s + SCS_SIZE;
> +#else
> + task_scs_sp(tsk) = s;
> +#endif
> +
> return 0;
> }
>
>
Powered by blists - more mailing lists