[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251013015209.30949-2-dongml2@chinatelecom.cn>
Date: Mon, 13 Oct 2025 09:52:08 +0800
From: Menglong Dong <menglong8.dong@...il.com>
To: mhiramat@...nel.org
Cc: rostedt@...dmis.org,
mathieu.desnoyers@...icios.com,
jiang.biao@...ux.dev,
linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org
Subject: [PATCH v3 1/2] tracing: fprobe: optimization for entry only case
For now, fgraph is used for the fprobe, even if we need trace the entry
only. However, the performance of ftrace is better than fgraph, and we
can use ftrace_ops for this case.
Then performance of kprobe-multi increases from 54M to 69M. Before this
commit:
$ ./benchs/run_bench_trigger.sh kprobe-multi
kprobe-multi : 54.663 ± 0.493M/s
After this commit:
$ ./benchs/run_bench_trigger.sh kprobe-multi
kprobe-multi : 69.447 ± 0.143M/s
Mitigation is disable during the bench testing above.
Signed-off-by: Menglong Dong <dongml2@...natelecom.cn>
---
v3:
- add some comment to the rcu_read_lock() in fprobe_ftrace_entry()
v2:
- add some document for fprobe_fgraph_entry as Masami suggested
- merge the rename of fprobe_entry into current patch
- use ftrace_test_recursion_trylock() in fprobe_ftrace_entry()
---
kernel/trace/fprobe.c | 109 +++++++++++++++++++++++++++++++++++++-----
1 file changed, 98 insertions(+), 11 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 99d83c08b9e2..18319c22cec4 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -254,8 +254,9 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent
return ret;
}
-static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
- struct ftrace_regs *fregs)
+/* fgraph_ops callback, this processes fprobes which have exit_handler. */
+static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
unsigned long *fgraph_data = NULL;
unsigned long func = trace->func;
@@ -292,7 +293,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
if (node->addr != func)
continue;
fp = READ_ONCE(node->fp);
- if (fp && !fprobe_disabled(fp))
+ if (fp && !fprobe_disabled(fp) && fp->exit_handler)
fp->nmissed++;
}
return 0;
@@ -312,11 +313,11 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
if (node->addr != func)
continue;
fp = READ_ONCE(node->fp);
- if (!fp || fprobe_disabled(fp))
+ if (unlikely(!fp || fprobe_disabled(fp) || !fp->exit_handler))
continue;
data_size = fp->entry_data_size;
- if (data_size && fp->exit_handler)
+ if (data_size)
data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
else
data = NULL;
@@ -327,7 +328,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
/* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
- if (!ret && fp->exit_handler) {
+ if (!ret) {
int size_words = SIZE_IN_LONG(data_size);
if (write_fprobe_header(&fgraph_data[used], fp, size_words))
@@ -340,7 +341,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
/* If any exit_handler is set, data must be used. */
return used != 0;
}
-NOKPROBE_SYMBOL(fprobe_entry);
+NOKPROBE_SYMBOL(fprobe_fgraph_entry);
static void fprobe_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops,
@@ -379,11 +380,87 @@ static void fprobe_return(struct ftrace_graph_ret *trace,
NOKPROBE_SYMBOL(fprobe_return);
static struct fgraph_ops fprobe_graph_ops = {
- .entryfunc = fprobe_entry,
+ .entryfunc = fprobe_fgraph_entry,
.retfunc = fprobe_return,
};
static int fprobe_graph_active;
+/* ftrace_ops callback, this processes fprobes which have only entry_handler. */
+static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+ struct fprobe_hlist_node *node;
+ struct rhlist_head *head, *pos;
+ struct fprobe *fp;
+ int bit;
+
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
+ /*
+ * ftrace_test_recursion_trylock() disables preemption, but
+ * rhltable_lookup() checks whether rcu_read_lcok is held.
+ * So we take rcu_read_lock() here.
+ */
+ rcu_read_lock();
+ head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
+
+ rhl_for_each_entry_rcu(node, pos, head, hlist) {
+ if (node->addr != ip)
+ break;
+ fp = READ_ONCE(node->fp);
+ if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
+ continue;
+
+ if (fprobe_shared_with_kprobes(fp))
+ __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
+ else
+ __fprobe_handler(ip, parent_ip, fp, fregs, NULL);
+ }
+ rcu_read_unlock();
+ ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(fprobe_ftrace_entry);
+
+static struct ftrace_ops fprobe_ftrace_ops = {
+ .func = fprobe_ftrace_entry,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
+};
+static int fprobe_ftrace_active;
+
+static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
+{
+ int ret;
+
+ lockdep_assert_held(&fprobe_mutex);
+
+ ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
+ if (ret)
+ return ret;
+
+ if (!fprobe_ftrace_active) {
+ ret = register_ftrace_function(&fprobe_ftrace_ops);
+ if (ret) {
+ ftrace_free_filter(&fprobe_ftrace_ops);
+ return ret;
+ }
+ }
+ fprobe_ftrace_active++;
+ return 0;
+}
+
+static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
+{
+ lockdep_assert_held(&fprobe_mutex);
+
+ fprobe_ftrace_active--;
+ if (!fprobe_ftrace_active)
+ unregister_ftrace_function(&fprobe_ftrace_ops);
+ if (num)
+ ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
+}
+
/* Add @addrs to the ftrace filter and register fgraph if needed. */
static int fprobe_graph_add_ips(unsigned long *addrs, int num)
{
@@ -498,9 +575,12 @@ static int fprobe_module_callback(struct notifier_block *nb,
} while (node == ERR_PTR(-EAGAIN));
rhashtable_walk_exit(&iter);
- if (alist.index > 0)
+ if (alist.index > 0) {
ftrace_set_filter_ips(&fprobe_graph_ops.ops,
alist.addrs, alist.index, 1, 0);
+ ftrace_set_filter_ips(&fprobe_ftrace_ops,
+ alist.addrs, alist.index, 1, 0);
+ }
mutex_unlock(&fprobe_mutex);
kfree(alist.addrs);
@@ -733,7 +813,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
mutex_lock(&fprobe_mutex);
hlist_array = fp->hlist_array;
- ret = fprobe_graph_add_ips(addrs, num);
+ if (fp->exit_handler)
+ ret = fprobe_graph_add_ips(addrs, num);
+ else
+ ret = fprobe_ftrace_add_ips(addrs, num);
+
if (!ret) {
add_fprobe_hash(fp);
for (i = 0; i < hlist_array->size; i++) {
@@ -829,7 +913,10 @@ int unregister_fprobe(struct fprobe *fp)
}
del_fprobe_hash(fp);
- fprobe_graph_remove_ips(addrs, count);
+ if (fp->exit_handler)
+ fprobe_graph_remove_ips(addrs, count);
+ else
+ fprobe_ftrace_remove_ips(addrs, count);
kfree_rcu(hlist_array, rcu);
fp->hlist_array = NULL;
--
2.51.0
Powered by blists - more mailing lists