[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d0e2d1a5-110a-40be-8662-7c78afe22446@ghiti.fr>
Date: Sun, 12 May 2024 18:50:18 +0200
From: Alexandre Ghiti <alex@...ti.fr>
To: Deepak Gupta <debug@...osinc.com>, paul.walmsley@...ive.com,
rick.p.edgecombe@...el.com, broonie@...nel.org, Szabolcs.Nagy@....com,
kito.cheng@...ive.com, keescook@...omium.org, ajones@...tanamicro.com,
conor.dooley@...rochip.com, cleger@...osinc.com, atishp@...shpatra.org,
bjorn@...osinc.com, alexghiti@...osinc.com, samuel.holland@...ive.com,
conor@...nel.org
Cc: linux-doc@...r.kernel.org, linux-riscv@...ts.infradead.org,
linux-kernel@...r.kernel.org, devicetree@...r.kernel.org,
linux-mm@...ck.org, linux-arch@...r.kernel.org,
linux-kselftest@...r.kernel.org, corbet@....net, palmer@...belt.com,
aou@...s.berkeley.edu, robh+dt@...nel.org,
krzysztof.kozlowski+dt@...aro.org, oleg@...hat.com,
akpm@...ux-foundation.org, arnd@...db.de, ebiederm@...ssion.com,
Liam.Howlett@...cle.com, vbabka@...e.cz, lstoakes@...il.com,
shuah@...nel.org, brauner@...nel.org, andy.chiu@...ive.com,
jerry.shih@...ive.com, hankuan.chen@...ive.com, greentime.hu@...ive.com,
evan@...osinc.com, xiao.w.wang@...el.com, charlie@...osinc.com,
apatel@...tanamicro.com, mchitale@...tanamicro.com,
dbarboza@...tanamicro.com, sameo@...osinc.com, shikemeng@...weicloud.com,
willy@...radead.org, vincent.chen@...ive.com, guoren@...nel.org,
samitolvanen@...gle.com, songshuaishuai@...ylab.org, gerg@...nel.org,
heiko@...ech.de, bhe@...hat.com, jeeheng.sia@...rfivetech.com,
cyy@...self.name, maskray@...gle.com, ancientmodern4@...il.com,
mathis.salmen@...sal.de, cuiyunhui@...edance.com, bgray@...ux.ibm.com,
mpe@...erman.id.au, baruch@...s.co.il, alx@...nel.org, david@...hat.com,
catalin.marinas@....com, revest@...omium.org, josh@...htriplett.org,
shr@...kernel.io, deller@....de, omosnace@...hat.com, ojeda@...nel.org,
jhubbard@...dia.com
Subject: Re: [PATCH v3 14/29] riscv/mm: Implement map_shadow_stack() syscall
On 04/04/2024 01:35, Deepak Gupta wrote:
> As discussed extensively in the changelog for the addition of this
> syscall on x86 ("x86/shstk: Introduce map_shadow_stack syscall") the
> existing mmap() and madvise() syscalls do not map entirely well onto the
> security requirements for shadow stack memory since they lead to windows
> where memory is allocated but not yet protected or stacks which are not
> properly and safely initialised. Instead a new syscall map_shadow_stack()
> has been defined which allocates and initialises a shadow stack page.
>
> This patch implements this syscall for riscv. riscv doesn't require token
> to be setup by kernel because user mode can do that by itself. However to
> provide compatibility and portability with other architectues, user mode
> can specify token set flag.
>
> Signed-off-by: Deepak Gupta <debug@...osinc.com>
> ---
> arch/riscv/kernel/Makefile | 2 +
> arch/riscv/kernel/usercfi.c | 149 ++++++++++++++++++++++++++++++++
> include/uapi/asm-generic/mman.h | 1 +
> 3 files changed, 152 insertions(+)
> create mode 100644 arch/riscv/kernel/usercfi.c
>
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index 604d6bf7e476..3bec82f4e94c 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -107,3 +107,5 @@ obj-$(CONFIG_COMPAT) += compat_vdso/
>
> obj-$(CONFIG_64BIT) += pi/
> obj-$(CONFIG_ACPI) += acpi.o
> +
> +obj-$(CONFIG_RISCV_USER_CFI) += usercfi.o
> diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c
> new file mode 100644
> index 000000000000..c4ed0d4e33d6
> --- /dev/null
> +++ b/arch/riscv/kernel/usercfi.c
> @@ -0,0 +1,149 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Rivos, Inc.
> + * Deepak Gupta <debug@...osinc.com>
> + */
> +
> +#include <linux/sched.h>
> +#include <linux/bitops.h>
> +#include <linux/types.h>
> +#include <linux/mm.h>
> +#include <linux/mman.h>
> +#include <linux/uaccess.h>
> +#include <linux/sizes.h>
> +#include <linux/user.h>
> +#include <linux/syscalls.h>
> +#include <linux/prctl.h>
> +#include <asm/csr.h>
> +#include <asm/usercfi.h>
> +
> +#define SHSTK_ENTRY_SIZE sizeof(void *)
> +
> +/*
> + * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen
> + * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to
> + * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow
> + * stack.
> + */
> +static noinline unsigned long amo_user_shstk(unsigned long *addr, unsigned long val)
> +{
> + /*
> + * Since shadow stack is supported only in 64bit configuration,
> + * ssamoswap.d is used below.
> * * CONFIG_RISCV_USER_CFI is dependent
> + * on 64BIT and compile of this file is dependent on CONFIG_RISCV_USER_CFI
> + * In case ssamoswap faults, return -1.
To me, this part of the comment is not needed.
> + * Never expect -1 on shadow stack. Expect return addresses and zero
In that case, should we BUG() instead?
> + */
> + unsigned long swap = -1;
> +
> + __enable_user_access();
> + asm goto(
> + ".option push\n"
> + ".option arch, +zicfiss\n"
> + "1: ssamoswap.d %[swap], %[val], %[addr]\n"
> + _ASM_EXTABLE(1b, %l[fault])
> + RISCV_ACQUIRE_BARRIER
> + ".option pop\n"
> + : [swap] "=r" (swap), [addr] "+A" (*addr)
> + : [val] "r" (val)
> + : "memory"
> + : fault
> + );
> + __disable_user_access();
> + return swap;
> +fault:
> + __disable_user_access();
> + return -1;
> +}
> +
> +/*
> + * Create a restore token on the shadow stack. A token is always XLEN wide
> + * and aligned to XLEN.
> + */
> +static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
> +{
> + unsigned long addr;
> +
> + /* Token must be aligned */
> + if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE))
> + return -EINVAL;
> +
> + /* On RISC-V we're constructing token to be function of address itself */
> + addr = ssp - SHSTK_ENTRY_SIZE;
> +
> + if (amo_user_shstk((unsigned long __user *)addr, (unsigned long) ssp) == -1)
> + return -EFAULT;
> +
> + if (token_addr)
> + *token_addr = addr;
> +
> + return 0;
> +}
> +
> +static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size,
> + unsigned long token_offset,
> + bool set_tok)
> +{
> + int flags = MAP_ANONYMOUS | MAP_PRIVATE;
> + struct mm_struct *mm = current->mm;
> + unsigned long populate, tok_loc = 0;
> +
> + if (addr)
> + flags |= MAP_FIXED_NOREPLACE;
> +
> + mmap_write_lock(mm);
> + addr = do_mmap(NULL, addr, size, PROT_READ, flags,
Hmmm why do you map the shadow stack as PROT_READ here?
> + VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL);
> + mmap_write_unlock(mm);
> +
> + if (!set_tok || IS_ERR_VALUE(addr))
> + goto out;
> +
> + if (create_rstor_token(addr + token_offset, &tok_loc)) {
> + vm_munmap(addr, size);
> + return -EINVAL;
> + }
> +
> + addr = tok_loc;
> +
> +out:
> + return addr;
> +}
> +
> +SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
> +{
> + bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
> + unsigned long aligned_size = 0;
> +
> + if (!cpu_supports_shadow_stack())
> + return -EOPNOTSUPP;
> +
> + /* Anything other than set token should result in invalid param */
> + if (flags & ~SHADOW_STACK_SET_TOKEN)
> + return -EINVAL;
> +
> + /*
> + * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is available
> + * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction
> + * itself. This provides static property on register programming and writes to CSR can't
> + * be unintentional from programmer's perspective. As long as programmer has guarded areas
> + * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since
> + * CSR_SSP is writeable by user mode, it itself can setup a shadow stack token subsequent
> + * to allocation. Although in order to provide portablity with other architecture (because
> + * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token
> + * flag in flags and if provided in flags, setup a token at the base.
> + */
> +
> + /* If there isn't space for a token */
> + if (set_tok && size < SHSTK_ENTRY_SIZE)
> + return -ENOSPC;
> +
> + if (addr && (addr % PAGE_SIZE))
I would use:
if (addr && (addr & (PAGE_SIZE - 1))
> + return -EINVAL;
> +
> + aligned_size = PAGE_ALIGN(size);
> + if (aligned_size < size)
> + return -EOVERFLOW;
> +
> + return allocate_shadow_stack(addr, aligned_size, size, set_tok);
> +}
> diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
> index 57e8195d0b53..0c0ac6214de6 100644
> --- a/include/uapi/asm-generic/mman.h
> +++ b/include/uapi/asm-generic/mman.h
> @@ -19,4 +19,5 @@
> #define MCL_FUTURE 2 /* lock all future mappings */
> #define MCL_ONFAULT 4 /* lock all pages that are faulted in */
>
> +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
> #endif /* __ASM_GENERIC_MMAN_H */
Don't we need to advertise this new syscall to the man pages?
Powered by blists - more mailing lists