[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230517193549.GA401@W11-BEAU-MD.localdomain>
Date: Wed, 17 May 2023 12:36:02 -0700
From: Beau Belgrave <beaub@...ux.microsoft.com>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Steven Rostedt <rostedt@...dmis.org>,
Alexei Starovoitov <alexei.starovoitov@...il.com>,
Masami Hiramatsu <mhiramat@...nel.org>,
LKML <linux-kernel@...r.kernel.org>,
linux-trace-kernel@...r.kernel.org,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>, bpf <bpf@...r.kernel.org>,
David Vernet <void@...ifault.com>, dthaler@...rosoft.com,
brauner@...nel.org, hch@...radead.org
Subject: Re: [PATCH] tracing/user_events: Run BPF program if attached
On Wed, May 17, 2023 at 12:26:44PM -0700, Linus Torvalds wrote:
> On Wed, May 17, 2023 at 12:08 PM Beau Belgrave
> <beaub@...ux.microsoft.com> wrote:
> >
> > user_event_mm_dup() puts a new mm into the global list before the
> > enablers list is fully populated.
>
> Then that simply needs to be fixed.
>
Agreed.
> user_event_mm_dup() should not madd the mm into the global list until
> it is *done*.
>
> Because if it makes that list visible to others in a half-way state,
> then it needs to use the proper locking and use event_mutex.
>
> You can't say "this is so critical that we can't take a lock" and then
> use that as an excuse to simply do buggy code.
>
Didn't mean that, I just have more work to do then just the RCU walks.
I will fix these up.
> Either take the lock in user_event_mm_dup(), or make sure that the
> data structures are all completely local so that no lock is necessary.
>
> Here's a COMPLETELY UNTESTED patch that just separates out the notion
> of "allocate" and "attach".
>
> NOTE NOTE NOTE! I am *not* claiming this patch works. It builds for
> me. It looks right. It seems like it's the right thing to do. But it
> might have some issues.
>
> With this, the newly dup'ed list is attached to the process once after
> it is done, so nobody can see the list being built up.
>
> Also note that this does NOT fix the incorrect RCU walks.
>
> Linus
Thanks for the patch and feedback!
-Beau
> kernel/trace/trace_events_user.c | 29 ++++++++++++++++++-----------
> 1 file changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
> index b1ecd7677642..b2aecbfbbd24 100644
> --- a/kernel/trace/trace_events_user.c
> +++ b/kernel/trace/trace_events_user.c
> @@ -538,10 +538,9 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
> return found;
> }
>
> -static struct user_event_mm *user_event_mm_create(struct task_struct *t)
> +static struct user_event_mm *user_event_mm_alloc(struct task_struct *t)
> {
> struct user_event_mm *user_mm;
> - unsigned long flags;
>
> user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT);
>
> @@ -553,12 +552,6 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
> refcount_set(&user_mm->refcnt, 1);
> refcount_set(&user_mm->tasks, 1);
>
> - spin_lock_irqsave(&user_event_mms_lock, flags);
> - list_add_rcu(&user_mm->link, &user_event_mms);
> - spin_unlock_irqrestore(&user_event_mms_lock, flags);
> -
> - t->user_event_mm = user_mm;
> -
> /*
> * The lifetime of the memory descriptor can slightly outlast
> * the task lifetime if a ref to the user_event_mm is taken
> @@ -572,6 +565,17 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
> return user_mm;
> }
>
> +static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_struct *t)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&user_event_mms_lock, flags);
> + list_add_rcu(&user_mm->link, &user_event_mms);
> + spin_unlock_irqrestore(&user_event_mms_lock, flags);
> +
> + t->user_event_mm = user_mm;
> +}
> +
> static struct user_event_mm *current_user_event_mm(void)
> {
> struct user_event_mm *user_mm = current->user_event_mm;
> @@ -579,10 +583,12 @@ static struct user_event_mm *current_user_event_mm(void)
> if (user_mm)
> goto inc;
>
> - user_mm = user_event_mm_create(current);
> + user_mm = user_event_mm_alloc(current);
>
> if (!user_mm)
> goto error;
> +
> + user_event_mm_attach(user_mm, current);
> inc:
> refcount_inc(&user_mm->refcnt);
> error:
> @@ -670,7 +676,7 @@ void user_event_mm_remove(struct task_struct *t)
>
> void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
> {
> - struct user_event_mm *mm = user_event_mm_create(t);
> + struct user_event_mm *mm = user_event_mm_alloc(t);
> struct user_event_enabler *enabler;
>
> if (!mm)
> @@ -684,10 +690,11 @@ void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
>
> rcu_read_unlock();
>
> + user_event_mm_attach(mm, t);
> return;
> error:
> rcu_read_unlock();
> - user_event_mm_remove(t);
> + user_event_mm_destroy(mm);
> }
>
> static bool current_user_event_enabler_exists(unsigned long uaddr,
Powered by blists - more mailing lists