[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <202502061320.07B459A@keescook>
Date: Thu, 6 Feb 2025 13:20:45 -0800
From: Kees Cook <kees@...nel.org>
To: Eyal Birger <eyal.birger@...il.com>
Cc: luto@...capital.net, wad@...omium.org, oleg@...hat.com,
mhiramat@...nel.org, andrii@...nel.org, jolsa@...nel.org,
alexei.starovoitov@...il.com, olsajiri@...il.com, cyphar@...har.com,
songliubraving@...com, yhs@...com, john.fastabend@...il.com,
peterz@...radead.org, tglx@...utronix.de, bp@...en8.de,
daniel@...earbox.net, ast@...nel.org, andrii.nakryiko@...il.com,
rostedt@...dmis.org, rafi@....io, shmulik.ladkani@...il.com,
bpf@...r.kernel.org, linux-api@...r.kernel.org,
linux-trace-kernel@...r.kernel.org, x86@...nel.org,
linux-kernel@...r.kernel.org, stable@...r.kernel.org
Subject: Re: [PATCH v3 1/2] seccomp: passthrough uretprobe systemcall without
filtering
On Sun, Feb 02, 2025 at 08:29:20AM -0800, Eyal Birger wrote:
> When attaching uretprobes to processes running inside docker, the attached
> process is segfaulted when encountering the retprobe.
>
> The reason is that now that uretprobe is a system call the default seccomp
> filters in docker block it as they only allow a specific set of known
> syscalls. This is true for other userspace applications which use seccomp
> to control their syscall surface.
>
> Since uretprobe is a "kernel implementation detail" system call which is
> not used by userspace application code directly, it is impractical and
> there's very little point in forcing all userspace applications to
> explicitly allow it in order to avoid crashing tracked processes.
>
> Pass this systemcall through seccomp without depending on configuration.
>
> Note: uretprobe isn't supported in i386 and __NR_ia32_rt_tgsigqueueinfo
> uses the same number as __NR_uretprobe so the syscall isn't forced in the
> compat bitmap.
>
> Fixes: ff474a78cef5 ("uprobe: Add uretprobe syscall to speed up return probe")
> Reported-by: Rafael Buchbinder <rafi@....io>
> Link: https://lore.kernel.org/lkml/CAHsH6Gs3Eh8DFU0wq58c_LF8A4_+o6z456J7BidmcVY2AqOnHQ@mail.gmail.com/
> Link: https://lore.kernel.org/lkml/20250121182939.33d05470@gandalf.local.home/T/#me2676c378eff2d6a33f3054fed4a5f3afa64e65b
> Link: https://lore.kernel.org/lkml/20250128145806.1849977-1-eyal.birger@gmail.com/
> Cc: stable@...r.kernel.org
> Signed-off-by: Eyal Birger <eyal.birger@...il.com>
> ---
> v3: no change - deferring 32bit compat handling as there aren't plans to
> support this syscall in compat mode.
> v2: use action_cache bitmap and mode1 array to check the syscall
> ---
> kernel/seccomp.c | 24 +++++++++++++++++++++---
> 1 file changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index f59381c4a2ff..09b6f8e6db51 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -734,13 +734,13 @@ seccomp_prepare_user_filter(const char __user *user_filter)
>
> #ifdef SECCOMP_ARCH_NATIVE
> /**
> - * seccomp_is_const_allow - check if filter is constant allow with given data
> + * seccomp_is_filter_const_allow - check if filter is constant allow with given data
> * @fprog: The BPF programs
> * @sd: The seccomp data to check against, only syscall number and arch
> * number are considered constant.
> */
> -static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
> - struct seccomp_data *sd)
> +static bool seccomp_is_filter_const_allow(struct sock_fprog_kern *fprog,
> + struct seccomp_data *sd)
> {
> unsigned int reg_value = 0;
> unsigned int pc;
> @@ -812,6 +812,21 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
> return false;
> }
>
> +static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
> + struct seccomp_data *sd)
> +{
> +#ifdef __NR_uretprobe
> + if (sd->nr == __NR_uretprobe
> +#ifdef SECCOMP_ARCH_COMPAT
> + && sd->arch != SECCOMP_ARCH_COMPAT
> +#endif
> + )
> + return true;
> +#endif
> +
> + return seccomp_is_filter_const_allow(fprog, sd);
> +}
> +
> static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
> void *bitmap, const void *bitmap_prev,
> size_t bitmap_size, int arch)
I minimized the above to:
@@ -749,6 +749,15 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
if (WARN_ON_ONCE(!fprog))
return false;
+ /* Our single exception to filtering. */
+#ifdef __NR_uretprobe
+#ifdef SECCOMP_ARCH_COMPAT
+ if (sd->arch == SECCOMP_ARCH_NATIVE)
+#endif
+ if (sd->nr == __NR_uretprobe)
+ return true;
+#endif
+
for (pc = 0; pc < fprog->len; pc++) {
struct sock_filter *insn = &fprog->filter[pc];
u16 code = insn->code;
> @@ -1023,6 +1038,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
> */
> static const int mode1_syscalls[] = {
> __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
> +#ifdef __NR_uretprobe
> + __NR_uretprobe,
> +#endif
> -1, /* negative terminated */
> };
>
> --
> 2.43.0
>
-Kees
--
Kees Cook
Powered by blists - more mailing lists