[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0ee99715-7877-4d94-9ae5-5b0e23f34aae@paulmck-laptop>
Date: Mon, 9 Jun 2025 10:39:52 -0700
From: "Paul E. McKenney" <paulmck@...nel.org>
To: Steven Rostedt <rostedt@...dmis.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
Linux Trace Kernel <linux-trace-kernel@...r.kernel.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Subject: Re: [PATCH] tracing: Use queue_rcu_work() to free filters
On Mon, Jun 09, 2025 at 01:17:32PM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@...dmis.org>
>
> Freeing of filters requires to wait for both an RCU grace period as well as
> a RCU task trace wait period after they have been detached from their
> lists. The trace task period can be quite large so the freeing of the
> filters was moved to use the call_rcu*() routines. The problem with that is
> that the callback functions of call_rcu*() is done from a soft irq and can
> cause latencies if the callback takes a bit of time.
>
> The filters are freed per event in a system and the syscalls system
> contains an event per system call, which can be over 700 events. Freeing 700
> filters in a bottom half is undesirable.
>
> Instead, move the freeing to use queue_rcu_work() which is done in task
> context.
>
> Link: https://lore.kernel.org/all/9a2f0cd0-1561-4206-8966-f93ccd25927f@paulmck-laptop/
>
> Fixes: a9d0aab5eb33 ("tracing: Fix regression of filter waiting a long time on RCU synchronization")
> Suggested-by: "Paul E. McKenney" <paulmck@...nel.org>
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
Thank you, and looks good to me.
Reviewed-by: Paul E. McKenney <paulmck@...nel.org>
> ---
> Note, I added a Fixes tag but not a stable tag as this is a nice-to-have
> but doesn't hit the level of critical fix to backport or add to an rc
> release. If someone wants to back port it, feel free.
>
> kernel/trace/trace_events_filter.c | 28 ++++++++++++++++++++--------
> 1 file changed, 20 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
> index ea8b364b6818..b6fe8167ef01 100644
> --- a/kernel/trace/trace_events_filter.c
> +++ b/kernel/trace/trace_events_filter.c
> @@ -1344,13 +1344,14 @@ struct filter_list {
>
> struct filter_head {
> struct list_head list;
> - struct rcu_head rcu;
> + union {
> + struct rcu_head rcu;
> + struct rcu_work rwork;
> + };
> };
>
> -
> -static void free_filter_list(struct rcu_head *rhp)
> +static void free_filter_list(struct filter_head *filter_list)
> {
> - struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu);
> struct filter_list *filter_item, *tmp;
>
> list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) {
> @@ -1361,9 +1362,20 @@ static void free_filter_list(struct rcu_head *rhp)
> kfree(filter_list);
> }
>
> +static void free_filter_list_work(struct work_struct *work)
> +{
> + struct filter_head *filter_list;
> +
> + filter_list = container_of(to_rcu_work(work), struct filter_head, rwork);
> + free_filter_list(filter_list);
> +}
> +
> static void free_filter_list_tasks(struct rcu_head *rhp)
> {
> - call_rcu(rhp, free_filter_list);
> + struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu);
> +
> + INIT_RCU_WORK(&filter_list->rwork, free_filter_list_work);
> + queue_rcu_work(system_wq, &filter_list->rwork);
> }
>
> /*
> @@ -1462,7 +1474,7 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
> tracepoint_synchronize_unregister();
>
> if (head)
> - free_filter_list(&head->rcu);
> + free_filter_list(head);
>
> list_for_each_entry(file, &tr->events, list) {
> if (file->system != dir || !file->filter)
> @@ -2307,7 +2319,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
> return 0;
> fail:
> /* No call succeeded */
> - free_filter_list(&filter_list->rcu);
> + free_filter_list(filter_list);
> parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
> return -EINVAL;
> fail_mem:
> @@ -2317,7 +2329,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
> if (!fail)
> delay_free_filter(filter_list);
> else
> - free_filter_list(&filter_list->rcu);
> + free_filter_list(filter_list);
>
> return -ENOMEM;
> }
> --
> 2.47.2
>
Powered by blists - more mailing lists