[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Zji3U131RJtQDdA_@krava>
Date: Mon, 6 May 2024 12:56:19 +0200
From: Jiri Olsa <olsajiri@...il.com>
To: "Edgecombe, Rick P" <rick.p.edgecombe@...el.com>
Cc: "olsajiri@...il.com" <olsajiri@...il.com>,
"songliubraving@...com" <songliubraving@...com>,
"luto@...nel.org" <luto@...nel.org>,
"mhiramat@...nel.org" <mhiramat@...nel.org>,
"andrii@...nel.org" <andrii@...nel.org>,
"debug@...osinc.com" <debug@...osinc.com>,
"john.fastabend@...il.com" <john.fastabend@...il.com>,
"linux-api@...r.kernel.org" <linux-api@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"mingo@...hat.com" <mingo@...hat.com>,
"rostedt@...dmis.org" <rostedt@...dmis.org>,
"ast@...nel.org" <ast@...nel.org>,
"tglx@...utronix.de" <tglx@...utronix.de>,
"linux-man@...r.kernel.org" <linux-man@...r.kernel.org>,
"oleg@...hat.com" <oleg@...hat.com>, "yhs@...com" <yhs@...com>,
"daniel@...earbox.net" <daniel@...earbox.net>,
"peterz@...radead.org" <peterz@...radead.org>,
"linux-trace-kernel@...r.kernel.org" <linux-trace-kernel@...r.kernel.org>,
"bp@...en8.de" <bp@...en8.de>,
"bpf@...r.kernel.org" <bpf@...r.kernel.org>,
"x86@...nel.org" <x86@...nel.org>,
"broonie@...nel.org" <broonie@...nel.org>
Subject: Re: [PATCHv4 bpf-next 2/7] uprobe: Add uretprobe syscall to speed up
return probe
On Fri, May 03, 2024 at 08:35:24PM +0000, Edgecombe, Rick P wrote:
> On Fri, 2024-05-03 at 22:17 +0200, Jiri Olsa wrote:
> > when uretprobe is created, kernel overwrites the return address on user
> > stack to point to user space trampoline, so the setup is in kernel hands
>
> I mean for uprobes in general. I'm didn't have any specific ideas in mind, but
> in general when we give the kernel more abilities around shadow stack we have to
> think if attackers could use it to work around shadow stack protections.
>
> >
> > with the hack below on top of this patchset I'm no longer seeing shadow
> > stack app crash on uretprobe.. I'll try to polish it and send out next
> > week, any suggestions are welcome ;-)
>
> Thanks. Some comments below.
>
> >
> > thanks,
> > jirka
> >
> >
> > ---
> > diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
> > index 42fee8959df7..d374305a6851 100644
> > --- a/arch/x86/include/asm/shstk.h
> > +++ b/arch/x86/include/asm/shstk.h
> > @@ -21,6 +21,8 @@ unsigned long shstk_alloc_thread_stack(struct task_struct
> > *p, unsigned long clon
> > void shstk_free(struct task_struct *p);
> > int setup_signal_shadow_stack(struct ksignal *ksig);
> > int restore_signal_shadow_stack(void);
> > +void uprobe_change_stack(unsigned long addr);
> > +void uprobe_push_stack(unsigned long addr);
>
> Maybe name them:
> shstk_update_last_frame();
> shstk_push_frame();
ok
>
>
> > #else
> > static inline long shstk_prctl(struct task_struct *task, int option,
> > unsigned long arg2) { return -EINVAL; }
> > diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
> > index 59e15dd8d0f8..804c446231d9 100644
> > --- a/arch/x86/kernel/shstk.c
> > +++ b/arch/x86/kernel/shstk.c
> > @@ -577,3 +577,24 @@ long shstk_prctl(struct task_struct *task, int option,
> > unsigned long arg2)
> > return wrss_control(true);
> > return -EINVAL;
> > }
> > +
> > +void uprobe_change_stack(unsigned long addr)
> > +{
> > + unsigned long ssp;
>
> Probably want something like:
>
> if (!features_enabled(ARCH_SHSTK_SHSTK))
> return;
ok
>
> So this doesn't try the below if shadow stack is disabled.
>
> > +
> > + ssp = get_user_shstk_addr();
> > + write_user_shstk_64((u64 __user *)ssp, (u64)addr);
> > +}
>
> Can we know that there was a valid return address just before this point on the
> stack? Or could it be a sigframe or something?
when uprobe hijack the return address it assumes it's on the top of the stack,
so it's saved and replaced with address of the user space trampoline
>
> > +
> > +void uprobe_push_stack(unsigned long addr)
> > +{
> > + unsigned long ssp;
>
> if (!features_enabled(ARCH_SHSTK_SHSTK))
> return;
>
> > +
> > + ssp = get_user_shstk_addr();
> > + ssp -= SS_FRAME_SIZE;
> > + write_user_shstk_64((u64 __user *)ssp, (u64)addr);
> > +
> > + fpregs_lock_and_load();
> > + wrmsrl(MSR_IA32_PL3_SSP, ssp);
> > + fpregs_unlock();
> > +}
> > diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
> > index 81e6ee95784d..259457838020 100644
> > --- a/arch/x86/kernel/uprobes.c
> > +++ b/arch/x86/kernel/uprobes.c
> > @@ -416,6 +416,7 @@ SYSCALL_DEFINE0(uretprobe)
> > regs->r11 = regs->flags;
> > regs->cx = regs->ip;
> >
> > + uprobe_push_stack(r11_cx_ax[2]);
>
> I'm concerned this could be used to push arbitrary frames to the shadow stack.
> Couldn't an attacker do a jump to the point that calls this syscall? Maybe this
> is what peterz was raising.
of course never say never, but here's my reasoning why I think it's ok
the page with the syscall trampoline is mapped in user space and can be
found in procfs maps file under '[uprobes]' name
the syscall can be called only from this trampoline, if it's called from
anywhere else the calling process receives SIGILL
now if you run the uretprobe syscall without any pending uretprobe for
the task it will receive SIGILL before it gets to the point of pushing
address on the shadow stack
and to configure the uretprobe you need to have CAP_PERFMON or CAP_SYS_ADMIN
if you'd actually managed to get the pending uretprobe instance, the shadow
stack entry is going to be used/pop-ed right away in the trampoline with
the ret instruction
and as I mentioned above it's ensured that the syscall is returning to the
trampoline and it can't be called from any other place
>
> > return regs->ax;
> >
> > sigill:
> > @@ -1191,8 +1192,10 @@ arch_uretprobe_hijack_return_addr(unsigned long
> > trampoline_vaddr, struct pt_regs
> > return orig_ret_vaddr;
> >
> > nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr,
> > rasize);
> > - if (likely(!nleft))
> > + if (likely(!nleft)) {
> > + uprobe_change_stack(trampoline_vaddr);
> > return orig_ret_vaddr;
> > + }
> >
> > if (nleft != rasize) {
> > pr_err("return address clobbered: pid=%d, %%sp=%#lx,
> > %%ip=%#lx\n",
>
I'll try to add uprobe test under tools/testing/selftests/x86/test_shadow_stack.c
and send that and change below as part of new version
thanks for the comments,
jirka
---
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
index 42fee8959df7..2e1ddcf98242 100644
--- a/arch/x86/include/asm/shstk.h
+++ b/arch/x86/include/asm/shstk.h
@@ -21,6 +21,8 @@ unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clon
void shstk_free(struct task_struct *p);
int setup_signal_shadow_stack(struct ksignal *ksig);
int restore_signal_shadow_stack(void);
+int shstk_update_last_frame(unsigned long val);
+int shstk_push_frame(unsigned long val);
#else
static inline long shstk_prctl(struct task_struct *task, int option,
unsigned long arg2) { return -EINVAL; }
@@ -31,6 +33,8 @@ static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
static inline void shstk_free(struct task_struct *p) {}
static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
static inline int restore_signal_shadow_stack(void) { return 0; }
+static inline int shstk_update_last_frame(unsigned long val) { return 0; }
+static inline int shstk_push_frame(unsigned long val) { return 0; }
#endif /* CONFIG_X86_USER_SHADOW_STACK */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index 59e15dd8d0f8..66434dfde52e 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -577,3 +577,32 @@ long shstk_prctl(struct task_struct *task, int option, unsigned long arg2)
return wrss_control(true);
return -EINVAL;
}
+
+int shstk_update_last_frame(unsigned long val)
+{
+ unsigned long ssp;
+
+ if (!features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ ssp = get_user_shstk_addr();
+ return write_user_shstk_64((u64 __user *)ssp, (u64)val);
+}
+
+int shstk_push_frame(unsigned long val)
+{
+ unsigned long ssp;
+
+ if (!features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ ssp = get_user_shstk_addr();
+ ssp -= SS_FRAME_SIZE;
+ if (write_user_shstk_64((u64 __user *)ssp, (u64)val))
+ return -EFAULT;
+
+ fpregs_lock_and_load();
+ wrmsrl(MSR_IA32_PL3_SSP, ssp);
+ fpregs_unlock();
+ return 0;
+}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 81e6ee95784d..ae6c3458a675 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -406,6 +406,11 @@ SYSCALL_DEFINE0(uretprobe)
* trampoline's ret instruction
*/
r11_cx_ax[2] = regs->ip;
+
+ /* make the shadow stack follow that */
+ if (shstk_push_frame(regs->ip))
+ goto sigill;
+
regs->ip = ip;
err = copy_to_user((void __user *)regs->sp, r11_cx_ax, sizeof(r11_cx_ax));
@@ -1191,8 +1196,13 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return orig_ret_vaddr;
nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize);
- if (likely(!nleft))
+ if (likely(!nleft)) {
+ if (shstk_update_last_frame(trampoline_vaddr)) {
+ force_sig(SIGSEGV);
+ return -1;
+ }
return orig_ret_vaddr;
+ }
if (nleft != rasize) {
pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
Powered by blists - more mailing lists