[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALOAHbC6UpfFOOibdDiC7xFc5YFUgZnk3MZ=3Ny6we=AcrNbew@mail.gmail.com>
Date: Wed, 28 Jun 2023 15:16:47 +0800
From: Yafang Shao <laoar.shao@...il.com>
To: Ze Gao <zegao2021@...il.com>
Cc: Steven Rostedt <rostedt@...dmis.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Albert Ou <aou@...s.berkeley.edu>,
Alexander Gordeev <agordeev@...ux.ibm.com>,
Alexei Starovoitov <ast@...nel.org>,
Borislav Petkov <bp@...en8.de>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Dave Hansen <dave.hansen@...ux.intel.com>,
Heiko Carstens <hca@...ux.ibm.com>,
"H. Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...hat.com>,
Palmer Dabbelt <palmer@...belt.com>,
Paul Walmsley <paul.walmsley@...ive.com>,
Sven Schnelle <svens@...ux.ibm.com>,
Thomas Gleixner <tglx@...utronix.de>,
Vasily Gorbik <gor@...ux.ibm.com>, x86@...nel.org,
bpf@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-riscv@...ts.infradead.org, linux-s390@...r.kernel.org,
linux-trace-kernel@...r.kernel.org,
Conor Dooley <conor@...nel.org>, Jiri Olsa <jolsa@...nel.org>,
Yonghong Song <yhs@...com>, Ze Gao <zegao@...cent.com>
Subject: Re: [PATCH v3 2/4] fprobe: make fprobe_kprobe_handler recursion free
On Wed, May 17, 2023 at 11:45 AM Ze Gao <zegao2021@...il.com> wrote:
>
> Current implementation calls kprobe related functions before doing
> ftrace recursion check in fprobe_kprobe_handler, which opens door
> to kernel crash due to stack recursion if preempt_count_{add, sub}
> is traceable in kprobe_busy_{begin, end}.
>
> Things goes like this without this patch quoted from Steven:
> "
> fprobe_kprobe_handler() {
> kprobe_busy_begin() {
> preempt_disable() {
> preempt_count_add() { <-- trace
> fprobe_kprobe_handler() {
> [ wash, rinse, repeat, CRASH!!! ]
> "
>
> By refactoring the common part out of fprobe_kprobe_handler and
> fprobe_handler and call ftrace recursion detection at the very beginning,
> the whole fprobe_kprobe_handler is free from recursion.
>
> Signed-off-by: Ze Gao <zegao@...cent.com>
> Acked-by: Masami Hiramatsu (Google) <mhiramat@...nel.org>
> Link: https://lore.kernel.org/linux-trace-kernel/20230516071830.8190-3-zegao@tencent.com
> ---
> kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> 1 file changed, 44 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> index 9abb3905bc8e..097c740799ba 100644
> --- a/kernel/trace/fprobe.c
> +++ b/kernel/trace/fprobe.c
> @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> char data[];
> };
>
> -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +static inline void __fprobe_handler(unsigned long ip, unsigned long
> + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
> {
> struct fprobe_rethook_node *fpr;
> struct rethook_node *rh = NULL;
> struct fprobe *fp;
> void *entry_data = NULL;
> - int bit, ret;
> + int ret;
>
> fp = container_of(ops, struct fprobe, ops);
> - if (fprobe_disabled(fp))
> - return;
> -
> - bit = ftrace_test_recursion_trylock(ip, parent_ip);
> - if (bit < 0) {
> - fp->nmissed++;
> - return;
> - }
>
> if (fp->exit_handler) {
> rh = rethook_try_get(fp->rethook);
> if (!rh) {
> fp->nmissed++;
> - goto out;
> + return;
> }
> fpr = container_of(rh, struct fprobe_rethook_node, node);
> fpr->entry_ip = ip;
> @@ -61,23 +53,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> else
> rethook_hook(rh, ftrace_get_regs(fregs), true);
> }
> -out:
> +}
> +
> +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> + struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> + struct fprobe *fp;
> + int bit;
> +
> + fp = container_of(ops, struct fprobe, ops);
> + if (fprobe_disabled(fp))
> + return;
> +
> + /* recursion detection has to go before any traceable function and
> + * all functions before this point should be marked as notrace
> + */
> + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> + if (bit < 0) {
> + fp->nmissed++;
> + return;
> + }
> + __fprobe_handler(ip, parent_ip, ops, fregs);
> ftrace_test_recursion_unlock(bit);
> +
> }
> NOKPROBE_SYMBOL(fprobe_handler);
>
> static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> struct ftrace_ops *ops, struct ftrace_regs *fregs)
> {
> - struct fprobe *fp = container_of(ops, struct fprobe, ops);
> + struct fprobe *fp;
> + int bit;
> +
> + fp = container_of(ops, struct fprobe, ops);
> + if (fprobe_disabled(fp))
> + return;
> +
> + /* recursion detection has to go before any traceable function and
> + * all functions called before this point should be marked as notrace
> + */
> + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> + if (bit < 0) {
> + fp->nmissed++;
> + return;
> + }
>
> if (unlikely(kprobe_running())) {
> fp->nmissed++;
I have just looked through this patchset, just out of curiosity,
shouldn't we call ftrace_test_recursion_unlock(bit) here ?
We have already locked it successfully, so why should we not unlock it?
> return;
> }
> +
> kprobe_busy_begin();
> - fprobe_handler(ip, parent_ip, ops, fregs);
> + __fprobe_handler(ip, parent_ip, ops, fregs);
> kprobe_busy_end();
> + ftrace_test_recursion_unlock(bit);
> }
>
> static void fprobe_exit_handler(struct rethook_node *rh, void *data,
> --
> 2.40.1
>
>
--
Regards
Yafang
Powered by blists - more mailing lists