[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4Bzb3xjTH7Qh8c_j95jEr4fNxBgG11a0sCe4hoF9chwUtYg@mail.gmail.com>
Date: Fri, 18 Oct 2024 11:22:00 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Andrii Nakryiko <andrii@...nel.org>, linux-trace-kernel@...r.kernel.org, oleg@...hat.com,
rostedt@...dmis.org, mhiramat@...nel.org, mingo@...nel.org,
bpf@...r.kernel.org, linux-kernel@...r.kernel.org, jolsa@...nel.org,
paulmck@...nel.org
Subject: Re: [PATCH v2 tip/perf/core 1/2] uprobes: allow put_uprobe() from
non-sleepable softirq context
On Fri, Oct 18, 2024 at 1:26 AM Peter Zijlstra <peterz@...radead.org> wrote:
>
> On Mon, Oct 07, 2024 at 05:25:55PM -0700, Andrii Nakryiko wrote:
> > Currently put_uprobe() might trigger mutex_lock()/mutex_unlock(), which
> > makes it unsuitable to be called from more restricted context like softirq.
>
> This is delayed_uprobe_lock, right?
Not just delated_uprobe_lock, there is also uprobes_treelock (I forgot
to update the commit message to mention that). Oleg had concerns (see
[0]) with that being taken from the timer thread, so I just moved all
of the locking into deferred work callback.
[0] https://lore.kernel.org/linux-trace-kernel/20240915144910.GA27726@redhat.com/
>
> So can't we do something like so instead?
I'll need to look at this more thoroughly (and hopefully Oleg will get
a chance as well), dropping lock from delayed_ref_ctr_inc() is a bit
scary, but might be ok.
But generally speaking, what's your concern with doing deferred work
in put_uprobe()? It's not a hot path by any means, worst case we'll
have maybe thousands of uprobes attached/detached.
>
> ---
> kernel/events/uprobes.c | 40 +++++++++++++++++++++++-----------------
> 1 file changed, 23 insertions(+), 17 deletions(-)
>
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 2a0059464383..d17a9046de35 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -83,9 +83,11 @@ struct delayed_uprobe {
> struct list_head list;
> struct uprobe *uprobe;
> struct mm_struct *mm;
> + struct rcu_head rcu;
> };
>
> -static DEFINE_MUTEX(delayed_uprobe_lock);
> +/* XXX global state; use per mm list instead ? */
> +static DEFINE_SPINLOCK(delayed_uprobe_lock);
> static LIST_HEAD(delayed_uprobe_list);
>
> /*
> @@ -289,9 +291,11 @@ delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
> {
> struct delayed_uprobe *du;
>
> - list_for_each_entry(du, &delayed_uprobe_list, list)
> + guard(rcu)();
> + list_for_each_entry_rcu(du, &delayed_uprobe_list, list) {
> if (du->uprobe == uprobe && du->mm == mm)
> return du;
> + }
> return NULL;
> }
>
> @@ -308,7 +312,8 @@ static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
>
> du->uprobe = uprobe;
> du->mm = mm;
> - list_add(&du->list, &delayed_uprobe_list);
> + scoped_guard(spinlock, &delayed_uprobe_lock)
> + list_add_rcu(&du->list, &delayed_uprobe_list);
> return 0;
> }
>
> @@ -316,19 +321,21 @@ static void delayed_uprobe_delete(struct delayed_uprobe *du)
> {
> if (WARN_ON(!du))
> return;
> - list_del(&du->list);
> - kfree(du);
> + scoped_guard(spinlock, &delayed_uprobe_lock)
> + list_del(&du->list);
> + kfree_rcu(du, rcu);
> }
>
> static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
> {
> - struct list_head *pos, *q;
> struct delayed_uprobe *du;
> + struct list_head *pos;
>
> if (!uprobe && !mm)
> return;
>
> - list_for_each_safe(pos, q, &delayed_uprobe_list) {
> + guard(rcu)();
> + list_for_each_rcu(pos, &delayed_uprobe_list) {
> du = list_entry(pos, struct delayed_uprobe, list);
>
> if (uprobe && du->uprobe != uprobe)
> @@ -434,12 +441,10 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
> return ret;
> }
>
> - mutex_lock(&delayed_uprobe_lock);
> if (d > 0)
> ret = delayed_uprobe_add(uprobe, mm);
> else
> delayed_uprobe_remove(uprobe, mm);
> - mutex_unlock(&delayed_uprobe_lock);
>
> return ret;
> }
> @@ -645,9 +650,7 @@ static void put_uprobe(struct uprobe *uprobe)
> * gets called, we don't get a chance to remove uprobe from
> * delayed_uprobe_list from remove_breakpoint(). Do it here.
> */
> - mutex_lock(&delayed_uprobe_lock);
> delayed_uprobe_remove(uprobe, NULL);
> - mutex_unlock(&delayed_uprobe_lock);
>
> call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu);
> }
> @@ -1350,13 +1353,18 @@ static void build_probe_list(struct inode *inode,
> /* @vma contains reference counter, not the probed instruction. */
> static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
> {
> - struct list_head *pos, *q;
> struct delayed_uprobe *du;
> + struct list_head *pos;
> unsigned long vaddr;
> int ret = 0, err = 0;
>
> - mutex_lock(&delayed_uprobe_lock);
> - list_for_each_safe(pos, q, &delayed_uprobe_list) {
> + /*
> + * delayed_uprobe_list is added to when the ref_ctr is not mapped
> + * and is consulted (this function) when adding maps. And since
> + * mmap_lock serializes these, it is not possible miss an entry.
> + */
> + guard(rcu)();
> + list_for_each_rcu(pos, &delayed_uprobe_list) {
> du = list_entry(pos, struct delayed_uprobe, list);
>
> if (du->mm != vma->vm_mm ||
> @@ -1370,9 +1378,9 @@ static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
> if (!err)
> err = ret;
> }
> +
> delayed_uprobe_delete(du);
> }
> - mutex_unlock(&delayed_uprobe_lock);
> return err;
> }
>
> @@ -1596,9 +1604,7 @@ void uprobe_clear_state(struct mm_struct *mm)
> {
> struct xol_area *area = mm->uprobes_state.xol_area;
>
> - mutex_lock(&delayed_uprobe_lock);
> delayed_uprobe_remove(NULL, mm);
> - mutex_unlock(&delayed_uprobe_lock);
>
> if (!area)
> return;
Powered by blists - more mailing lists