[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20251023075239.12ec605954eda85c198f1e20@kernel.org>
Date: Thu, 23 Oct 2025 07:52:39 +0900
From: Masami Hiramatsu (Google) <mhiramat@...nel.org>
To: Menglong Dong <menglong8.dong@...il.com>
Cc: rostedt@...dmis.org, mathieu.desnoyers@...icios.com,
jiang.biao@...ux.dev, linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org
Subject: Re: [PATCH v4 1/2] tracing: fprobe: optimization for entry only
case
On Wed, 15 Oct 2025 16:32:37 +0800
Menglong Dong <menglong8.dong@...il.com> wrote:
> For now, fgraph is used for the fprobe, even if we need trace the entry
> only. However, the performance of ftrace is better than fgraph, and we
> can use ftrace_ops for this case.
>
> Then performance of kprobe-multi increases from 54M to 69M. Before this
> commit:
>
> $ ./benchs/run_bench_trigger.sh kprobe-multi
> kprobe-multi : 54.663 ± 0.493M/s
>
> After this commit:
>
> $ ./benchs/run_bench_trigger.sh kprobe-multi
> kprobe-multi : 69.447 ± 0.143M/s
>
> Mitigation is disable during the bench testing above.
>
Looks good to me. Thanks!
> Signed-off-by: Menglong Dong <dongml2@...natelecom.cn>
> ---
> v4:
> - fallback to fgraph if FTRACE_OPS_FL_SAVE_REGS not supported
>
> v3:
> - add some comment to the rcu_read_lock() in fprobe_ftrace_entry()
>
> v2:
> - add some document for fprobe_fgraph_entry as Masami suggested
> - merge the rename of fprobe_entry into current patch
> - use ftrace_test_recursion_trylock() in fprobe_ftrace_entry()
> ---
> kernel/trace/fprobe.c | 128 +++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 119 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> index 99d83c08b9e2..ecd623eef68b 100644
> --- a/kernel/trace/fprobe.c
> +++ b/kernel/trace/fprobe.c
> @@ -254,8 +254,106 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent
> return ret;
> }
>
> -static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> - struct ftrace_regs *fregs)
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +/* ftrace_ops callback, this processes fprobes which have only entry_handler. */
> +static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
> + struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> + struct fprobe_hlist_node *node;
> + struct rhlist_head *head, *pos;
> + struct fprobe *fp;
> + int bit;
> +
> + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> + if (bit < 0)
> + return;
> +
> + /*
> + * ftrace_test_recursion_trylock() disables preemption, but
> + * rhltable_lookup() checks whether rcu_read_lcok is held.
> + * So we take rcu_read_lock() here.
> + */
> + rcu_read_lock();
> + head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
> +
> + rhl_for_each_entry_rcu(node, pos, head, hlist) {
> + if (node->addr != ip)
> + break;
> + fp = READ_ONCE(node->fp);
> + if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
> + continue;
> +
> + if (fprobe_shared_with_kprobes(fp))
> + __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
> + else
> + __fprobe_handler(ip, parent_ip, fp, fregs, NULL);
> + }
> + rcu_read_unlock();
> + ftrace_test_recursion_unlock(bit);
> +}
> +NOKPROBE_SYMBOL(fprobe_ftrace_entry);
> +
> +static struct ftrace_ops fprobe_ftrace_ops = {
> + .func = fprobe_ftrace_entry,
> + .flags = FTRACE_OPS_FL_SAVE_REGS,
> +};
> +static int fprobe_ftrace_active;
> +
> +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
> +{
> + int ret;
> +
> + lockdep_assert_held(&fprobe_mutex);
> +
> + ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
> + if (ret)
> + return ret;
> +
> + if (!fprobe_ftrace_active) {
> + ret = register_ftrace_function(&fprobe_ftrace_ops);
> + if (ret) {
> + ftrace_free_filter(&fprobe_ftrace_ops);
> + return ret;
> + }
> + }
> + fprobe_ftrace_active++;
> + return 0;
> +}
> +
> +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
> +{
> + lockdep_assert_held(&fprobe_mutex);
> +
> + fprobe_ftrace_active--;
> + if (!fprobe_ftrace_active)
> + unregister_ftrace_function(&fprobe_ftrace_ops);
> + if (num)
> + ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
> +}
> +
> +static bool fprobe_is_ftrace(struct fprobe *fp)
> +{
> + return !fp->exit_handler;
> +}
> +#else
> +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
> +{
> + return -ENOENT;
> +}
> +
> +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
> +{
> +}
> +
> +static bool fprobe_is_ftrace(struct fprobe *fp)
> +{
> + return false;
> +}
> +#endif
> +
> +/* fgraph_ops callback, this processes fprobes which have exit_handler. */
> +static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> + struct ftrace_regs *fregs)
> {
> unsigned long *fgraph_data = NULL;
> unsigned long func = trace->func;
> @@ -292,7 +390,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> if (node->addr != func)
> continue;
> fp = READ_ONCE(node->fp);
> - if (fp && !fprobe_disabled(fp))
> + if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp))
> fp->nmissed++;
> }
> return 0;
> @@ -312,7 +410,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> if (node->addr != func)
> continue;
> fp = READ_ONCE(node->fp);
> - if (!fp || fprobe_disabled(fp))
> + if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp)))
> continue;
>
> data_size = fp->entry_data_size;
> @@ -340,7 +438,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> /* If any exit_handler is set, data must be used. */
> return used != 0;
> }
> -NOKPROBE_SYMBOL(fprobe_entry);
> +NOKPROBE_SYMBOL(fprobe_fgraph_entry);
>
> static void fprobe_return(struct ftrace_graph_ret *trace,
> struct fgraph_ops *gops,
> @@ -379,7 +477,7 @@ static void fprobe_return(struct ftrace_graph_ret *trace,
> NOKPROBE_SYMBOL(fprobe_return);
>
> static struct fgraph_ops fprobe_graph_ops = {
> - .entryfunc = fprobe_entry,
> + .entryfunc = fprobe_fgraph_entry,
> .retfunc = fprobe_return,
> };
> static int fprobe_graph_active;
> @@ -498,9 +596,14 @@ static int fprobe_module_callback(struct notifier_block *nb,
> } while (node == ERR_PTR(-EAGAIN));
> rhashtable_walk_exit(&iter);
>
> - if (alist.index > 0)
> + if (alist.index > 0) {
> ftrace_set_filter_ips(&fprobe_graph_ops.ops,
> alist.addrs, alist.index, 1, 0);
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> + ftrace_set_filter_ips(&fprobe_ftrace_ops,
> + alist.addrs, alist.index, 1, 0);
> +#endif
> + }
> mutex_unlock(&fprobe_mutex);
>
> kfree(alist.addrs);
> @@ -733,7 +836,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
> mutex_lock(&fprobe_mutex);
>
> hlist_array = fp->hlist_array;
> - ret = fprobe_graph_add_ips(addrs, num);
> + if (fprobe_is_ftrace(fp))
> + ret = fprobe_ftrace_add_ips(addrs, num);
> + else
> + ret = fprobe_graph_add_ips(addrs, num);
> +
> if (!ret) {
> add_fprobe_hash(fp);
> for (i = 0; i < hlist_array->size; i++) {
> @@ -829,7 +936,10 @@ int unregister_fprobe(struct fprobe *fp)
> }
> del_fprobe_hash(fp);
>
> - fprobe_graph_remove_ips(addrs, count);
> + if (fprobe_is_ftrace(fp))
> + fprobe_ftrace_remove_ips(addrs, count);
> + else
> + fprobe_graph_remove_ips(addrs, count);
>
> kfree_rcu(hlist_array, rcu);
> fp->hlist_array = NULL;
> --
> 2.51.0
>
--
Masami Hiramatsu (Google) <mhiramat@...nel.org>
Powered by blists - more mailing lists