lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <5014869.GXAFRqVoOG@7940hx>
Date: Wed, 24 Sep 2025 09:33:16 +0800
From: menglong.dong@...ux.dev
To: Masami Hiramatsu <mhiramat@...nel.org>
Cc: rostedt@...dmis.org, mathieu.desnoyers@...icios.com,
 linux-kernel@...r.kernel.org, linux-trace-kernel@...r.kernel.org,
 bpf@...r.kernel.org
Subject: Re: [PATCH 2/2] tracing: fprobe: optimization for entry only case

On 2025/9/24 08:23 Masami Hiramatsu <mhiramat@...nel.org> write:
> Hi Menglong,
> 
> Please add a cover letter if you make a series of patches.
> 
> On Tue, 23 Sep 2025 17:20:01 +0800
> Menglong Dong <menglong8.dong@...il.com> wrote:
> 
> > For now, fgraph is used for the fprobe, even if we need trace the entry
> > only. However, the performance of ftrace is better than fgraph, and we
> > can use ftrace_ops for this case.
> > 
> > Then performance of kprobe-multi increases from 54M to 69M. Before this
> > commit:
> > 
> >   $ ./benchs/run_bench_trigger.sh kprobe-multi
> >   kprobe-multi   :   54.663 ± 0.493M/s
> > 
> > After this commit:
> > 
> >   $ ./benchs/run_bench_trigger.sh kprobe-multi
> >   kprobe-multi   :   69.447 ± 0.143M/s
> > 
> > Mitigation is disable during the bench testing above.
> 
> Hmm, indeed. If it is used only for entry, it can use ftrace.
> 
> Also, please merge [1/2] and [2/2]. [1/2] is meaningless (and do
> nothing) without this change. Moreover, it changes the same file.
> 
> You can split the patch if "that cleanup is meaningful independently"
> or "that changes different subsystem/component (thus you need an Ack
> from another maintainer)".

OK, I see now :)

> 
> But basically looks good to me. Just have some nits.
> 
> > 
> > Signed-off-by: Menglong Dong <dongml2@...natelecom.cn>
> > ---
> >  kernel/trace/fprobe.c | 88 +++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 81 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> > index 1785fba367c9..de4ae075548d 100644
> > --- a/kernel/trace/fprobe.c
> > +++ b/kernel/trace/fprobe.c
> > @@ -292,7 +292,7 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> >  				if (node->addr != func)
> >  					continue;
> >  				fp = READ_ONCE(node->fp);
> > -				if (fp && !fprobe_disabled(fp))
> > +				if (fp && !fprobe_disabled(fp) && fp->exit_handler)
> >  					fp->nmissed++;
> >  			}
> >  			return 0;
> > @@ -312,11 +312,11 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> >  		if (node->addr != func)
> >  			continue;
> >  		fp = READ_ONCE(node->fp);
> > -		if (!fp || fprobe_disabled(fp))
> > +		if (unlikely(!fp || fprobe_disabled(fp) || !fp->exit_handler))
> >  			continue;
> >  
> >  		data_size = fp->entry_data_size;
> > -		if (data_size && fp->exit_handler)
> > +		if (data_size)
> >  			data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
> >  		else
> >  			data = NULL;
> > @@ -327,7 +327,7 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> >  			ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
> >  
> >  		/* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
> > -		if (!ret && fp->exit_handler) {
> > +		if (!ret) {
> >  			int size_words = SIZE_IN_LONG(data_size);
> >  
> >  			if (write_fprobe_header(&fgraph_data[used], fp, size_words))
> > @@ -384,6 +384,70 @@ static struct fgraph_ops fprobe_graph_ops = {
> >  };
> >  static int fprobe_graph_active;
> >  
> 
> > +/* ftrace_ops backend (entry-only) */
>                  ^ callback ?

ACK

> 
> Also, add similar comments on top of fprobe_fgraph_entry. 
> 
> /* fgraph_ops callback, this processes fprobes which have exit_handler. */

ACK

> 
> > +static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
> > +	struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +{
> > +	struct fprobe_hlist_node *node;
> > +	struct rhlist_head *head, *pos;
> > +	struct fprobe *fp;
> > +
> > +	guard(rcu)();
> > +	head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
> > +
> > +	rhl_for_each_entry_rcu(node, pos, head, hlist) {
> > +		if (node->addr != ip)
> > +			break;
> > +		fp = READ_ONCE(node->fp);
> > +		if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
> > +			continue;
> > +		/* entry-only path: no exit_handler nor per-call data */
> > +		if (fprobe_shared_with_kprobes(fp))
> > +			__fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
> > +		else
> > +			__fprobe_handler(ip, parent_ip, fp, fregs, NULL);
> > +	}
> > +}
> > +NOKPROBE_SYMBOL(fprobe_ftrace_entry);
> 
> OK.
> 
> > +
> > +static struct ftrace_ops fprobe_ftrace_ops = {
> > +	.func	= fprobe_ftrace_entry,
> > +	.flags	= FTRACE_OPS_FL_SAVE_REGS,
> 
> [OT] I just wonder we can have FTRACE_OPS_FL_SAVE_FTRACE_REGS instead.

I'll give it a try.

Thanks!
Menglong Dong

> 
> > +};
> > +static int fprobe_ftrace_active;
> > +
> > +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
> > +{
> > +	int ret;
> > +
> > +	lockdep_assert_held(&fprobe_mutex);
> > +
> > +	ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (!fprobe_ftrace_active) {
> > +		ret = register_ftrace_function(&fprobe_ftrace_ops);
> > +		if (ret) {
> > +			ftrace_free_filter(&fprobe_ftrace_ops);
> > +			return ret;
> > +		}
> > +	}
> > +	fprobe_ftrace_active++;
> > +	return 0;
> > +}
> > +
> > +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
> > +{
> > +	lockdep_assert_held(&fprobe_mutex);
> > +
> > +	fprobe_ftrace_active--;
> > +	if (!fprobe_ftrace_active)
> > +		unregister_ftrace_function(&fprobe_ftrace_ops);
> > +	if (num)
> > +		ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
> > +}
> > +
> >  /* Add @addrs to the ftrace filter and register fgraph if needed. */
> >  static int fprobe_graph_add_ips(unsigned long *addrs, int num)
> >  {
> > @@ -500,9 +564,12 @@ static int fprobe_module_callback(struct notifier_block *nb,
> >  	} while (node == ERR_PTR(-EAGAIN));
> >  	rhashtable_walk_exit(&iter);
> >  
> > -	if (alist.index < alist.size && alist.index > 0)
> > +	if (alist.index < alist.size && alist.index > 0) {
> 
> Oops, here is my bug. Let me fix it.
> 
> Thank you,
> 
> >  		ftrace_set_filter_ips(&fprobe_graph_ops.ops,
> >  				      alist.addrs, alist.index, 1, 0);
> > +		ftrace_set_filter_ips(&fprobe_ftrace_ops,
> > +				      alist.addrs, alist.index, 1, 0);
> > +	}
> >  	mutex_unlock(&fprobe_mutex);
> >  
> >  	kfree(alist.addrs);
> > @@ -735,7 +802,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
> >  	mutex_lock(&fprobe_mutex);
> >  
> >  	hlist_array = fp->hlist_array;
> > -	ret = fprobe_graph_add_ips(addrs, num);
> > +	if (fp->exit_handler)
> > +		ret = fprobe_graph_add_ips(addrs, num);
> > +	else
> > +		ret = fprobe_ftrace_add_ips(addrs, num);
> > +
> >  	if (!ret) {
> >  		add_fprobe_hash(fp);
> >  		for (i = 0; i < hlist_array->size; i++) {
> > @@ -831,7 +902,10 @@ int unregister_fprobe(struct fprobe *fp)
> >  	}
> >  	del_fprobe_hash(fp);
> >  
> > -	fprobe_graph_remove_ips(addrs, count);
> > +	if (fp->exit_handler)
> > +		fprobe_graph_remove_ips(addrs, count);
> > +	else
> > +		fprobe_ftrace_remove_ips(addrs, count);
> >  
> >  	kfree_rcu(hlist_array, rcu);
> >  	fp->hlist_array = NULL;
> > -- 
> > 2.51.0
> > 
> 
> 
> -- 
> Masami Hiramatsu (Google) <mhiramat@...nel.org>
> 
> 





Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ