[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4Bzb3mCWK5St51bRDnQ1b-aTj=2w6bi6MkZydW48s=R+CCA@mail.gmail.com>
Date: Thu, 29 Aug 2024 16:31:18 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Jiri Olsa <olsajiri@...il.com>
Cc: Andrii Nakryiko <andrii@...nel.org>, linux-trace-kernel@...r.kernel.org,
peterz@...radead.org, oleg@...hat.com, rostedt@...dmis.org,
mhiramat@...nel.org, bpf@...r.kernel.org, linux-kernel@...r.kernel.org,
paulmck@...nel.org, willy@...radead.org, surenb@...gle.com,
akpm@...ux-foundation.org, linux-mm@...ck.org
Subject: Re: [PATCH v4 4/8] uprobes: travers uprobe's consumer list locklessly
under SRCU protection
On Thu, Aug 29, 2024 at 4:10 PM Jiri Olsa <olsajiri@...il.com> wrote:
>
> On Thu, Aug 29, 2024 at 11:37:37AM -0700, Andrii Nakryiko wrote:
> > uprobe->register_rwsem is one of a few big bottlenecks to scalability of
> > uprobes, so we need to get rid of it to improve uprobe performance and
> > multi-CPU scalability.
> >
> > First, we turn uprobe's consumer list to a typical doubly-linked list
> > and utilize existing RCU-aware helpers for traversing such lists, as
> > well as adding and removing elements from it.
> >
> > For entry uprobes we already have SRCU protection active since before
> > uprobe lookup. For uretprobe we keep refcount, guaranteeing that uprobe
> > won't go away from under us, but we add SRCU protection around consumer
> > list traversal.
> >
> > Lastly, to keep handler_chain()'s UPROBE_HANDLER_REMOVE handling simple,
> > we remember whether any removal was requested during handler calls, but
> > then we double-check the decision under a proper register_rwsem using
> > consumers' filter callbacks. Handler removal is very rare, so this extra
> > lock won't hurt performance, overall, but we also avoid the need for any
> > extra protection (e.g., seqcount locks).
> >
> > Signed-off-by: Andrii Nakryiko <andrii@...nel.org>
> > ---
> > include/linux/uprobes.h | 2 +-
> > kernel/events/uprobes.c | 104 +++++++++++++++++++++++-----------------
> > 2 files changed, 62 insertions(+), 44 deletions(-)
> >
> > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> > index 9cf0dce62e4c..29c935b0d504 100644
> > --- a/include/linux/uprobes.h
> > +++ b/include/linux/uprobes.h
> > @@ -35,7 +35,7 @@ struct uprobe_consumer {
> > struct pt_regs *regs);
> > bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
> >
> > - struct uprobe_consumer *next;
> > + struct list_head cons_node;
> > };
> >
> > #ifdef CONFIG_UPROBES
> > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > index 8bdcdc6901b2..97e58d160647 100644
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -59,7 +59,7 @@ struct uprobe {
> > struct rw_semaphore register_rwsem;
> > struct rw_semaphore consumer_rwsem;
> > struct list_head pending_list;
> > - struct uprobe_consumer *consumers;
> > + struct list_head consumers;
> > struct inode *inode; /* Also hold a ref to inode */
> > struct rcu_head rcu;
> > loff_t offset;
> > @@ -783,6 +783,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
> > uprobe->inode = inode;
> > uprobe->offset = offset;
> > uprobe->ref_ctr_offset = ref_ctr_offset;
> > + INIT_LIST_HEAD(&uprobe->consumers);
> > init_rwsem(&uprobe->register_rwsem);
> > init_rwsem(&uprobe->consumer_rwsem);
> > RB_CLEAR_NODE(&uprobe->rb_node);
> > @@ -808,32 +809,19 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
> > static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
> > {
> > down_write(&uprobe->consumer_rwsem);
> > - uc->next = uprobe->consumers;
> > - uprobe->consumers = uc;
> > + list_add_rcu(&uc->cons_node, &uprobe->consumers);
> > up_write(&uprobe->consumer_rwsem);
> > }
> >
> > /*
> > * For uprobe @uprobe, delete the consumer @uc.
> > - * Return true if the @uc is deleted successfully
> > - * or return false.
> > + * Should never be called with consumer that's not part of @uprobe->consumers.
> > */
> > -static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
> > +static void consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
> > {
> > - struct uprobe_consumer **con;
> > - bool ret = false;
> > -
> > down_write(&uprobe->consumer_rwsem);
> > - for (con = &uprobe->consumers; *con; con = &(*con)->next) {
> > - if (*con == uc) {
> > - *con = uc->next;
> > - ret = true;
> > - break;
> > - }
> > - }
> > + list_del_rcu(&uc->cons_node);
> > up_write(&uprobe->consumer_rwsem);
> > -
> > - return ret;
> > }
> >
> > static int __copy_insn(struct address_space *mapping, struct file *filp,
> > @@ -929,7 +917,8 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
> > bool ret = false;
> >
> > down_read(&uprobe->consumer_rwsem);
> > - for (uc = uprobe->consumers; uc; uc = uc->next) {
> > + list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> > + srcu_read_lock_held(&uprobes_srcu)) {
> > ret = consumer_filter(uc, mm);
> > if (ret)
> > break;
> > @@ -1125,18 +1114,29 @@ void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
> > int err;
> >
> > down_write(&uprobe->register_rwsem);
> > - if (WARN_ON(!consumer_del(uprobe, uc))) {
> > - err = -ENOENT;
> > - } else {
> > - err = register_for_each_vma(uprobe, NULL);
> > - /* TODO : cant unregister? schedule a worker thread */
> > - if (unlikely(err))
> > - uprobe_warn(current, "unregister, leaking uprobe");
> > - }
> > + consumer_del(uprobe, uc);
> > + err = register_for_each_vma(uprobe, NULL);
> > up_write(&uprobe->register_rwsem);
> >
> > - if (!err)
> > - put_uprobe(uprobe);
> > + /* TODO : cant unregister? schedule a worker thread */
> > + if (unlikely(err)) {
> > + uprobe_warn(current, "unregister, leaking uprobe");
> > + goto out_sync;
> > + }
> > +
> > + put_uprobe(uprobe);
> > +
> > +out_sync:
> > + /*
> > + * Now that handler_chain() and handle_uretprobe_chain() iterate over
> > + * uprobe->consumers list under RCU protection without holding
> > + * uprobe->register_rwsem, we need to wait for RCU grace period to
> > + * make sure that we can't call into just unregistered
> > + * uprobe_consumer's callbacks anymore. If we don't do that, fast and
> > + * unlucky enough caller can free consumer's memory and cause
> > + * handler_chain() or handle_uretprobe_chain() to do an use-after-free.
> > + */
> > + synchronize_srcu(&uprobes_srcu);
> > }
> > EXPORT_SYMBOL_GPL(uprobe_unregister);
> >
> > @@ -1214,13 +1214,20 @@ EXPORT_SYMBOL_GPL(uprobe_register);
> > int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
> > {
> > struct uprobe_consumer *con;
> > - int ret = -ENOENT;
> > + int ret = -ENOENT, srcu_idx;
> >
> > down_write(&uprobe->register_rwsem);
> > - for (con = uprobe->consumers; con && con != uc ; con = con->next)
> > - ;
> > - if (con)
> > - ret = register_for_each_vma(uprobe, add ? uc : NULL);
> > +
> > + srcu_idx = srcu_read_lock(&uprobes_srcu);
> > + list_for_each_entry_srcu(con, &uprobe->consumers, cons_node,
> > + srcu_read_lock_held(&uprobes_srcu)) {
> > + if (con == uc) {
> > + ret = register_for_each_vma(uprobe, add ? uc : NULL);
> > + break;
> > + }
> > + }
> > + srcu_read_unlock(&uprobes_srcu, srcu_idx);
> > +
> > up_write(&uprobe->register_rwsem);
> >
> > return ret;
> > @@ -2085,10 +2092,12 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
> > struct uprobe_consumer *uc;
> > int remove = UPROBE_HANDLER_REMOVE;
> > bool need_prep = false; /* prepare return uprobe, when needed */
> > + bool has_consumers = false;
> >
> > - down_read(&uprobe->register_rwsem);
> > current->utask->auprobe = &uprobe->arch;
> > - for (uc = uprobe->consumers; uc; uc = uc->next) {
> > +
> > + list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> > + srcu_read_lock_held(&uprobes_srcu)) {
> > int rc = 0;
> >
> > if (uc->handler) {
> > @@ -2101,17 +2110,24 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
> > need_prep = true;
> >
> > remove &= rc;
> > + has_consumers = true;
> > }
> > current->utask->auprobe = NULL;
> >
> > if (need_prep && !remove)
> > prepare_uretprobe(uprobe, regs); /* put bp at return */
> >
> > - if (remove && uprobe->consumers) {
> > - WARN_ON(!uprobe_is_active(uprobe));
> > - unapply_uprobe(uprobe, current->mm);
> > + if (remove && has_consumers) {
> > + down_read(&uprobe->register_rwsem);
> > +
> > + /* re-check that removal is still required, this time under lock */
> > + if (!filter_chain(uprobe, current->mm)) {
>
> sorry for late question, but I do not follow this change..
>
> at this point we got 1 as handler's return value from all the uprobe's consumers,
> why do we need to call filter_chain in here.. IIUC this will likely skip over
> the removal?
>
Because we don't hold register_rwsem we are now racing with
registration. So while we can get all consumers at the time we were
iterating over the consumer list to request deletion, a parallel CPU
can add another consumer that needs this uprobe+PID combination. So if
we don't double-check, we are risking having a consumer that will not
be triggered for the desired process.
Does it make sense? Given removal is rare, it's ok to take lock if we
*suspect* removal, and then check authoritatively again under lock.
> with single uprobe_multi consumer:
>
> handler_chain
> uprobe_multi_link_handler
> uprobe_prog_run
> bpf_prog returns 1
>
> remove = 1
>
> if (remove && has_consumers) {
>
> filter_chain - uprobe_multi_link_filter returns true.. so the uprobe stays?
>
> maybe I just need to write test for it ;-)
>
> thanks,
> jirka
>
>
> > + WARN_ON(!uprobe_is_active(uprobe));
> > + unapply_uprobe(uprobe, current->mm);
> > + }
> > +
> > + up_read(&uprobe->register_rwsem);
> > }
> > - up_read(&uprobe->register_rwsem);
> > }
> >
> > static void
> > @@ -2119,13 +2135,15 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
> > {
> > struct uprobe *uprobe = ri->uprobe;
> > struct uprobe_consumer *uc;
> > + int srcu_idx;
> >
> > - down_read(&uprobe->register_rwsem);
> > - for (uc = uprobe->consumers; uc; uc = uc->next) {
> > + srcu_idx = srcu_read_lock(&uprobes_srcu);
> > + list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> > + srcu_read_lock_held(&uprobes_srcu)) {
> > if (uc->ret_handler)
> > uc->ret_handler(uc, ri->func, regs);
> > }
> > - up_read(&uprobe->register_rwsem);
> > + srcu_read_unlock(&uprobes_srcu, srcu_idx);
> > }
> >
> > static struct return_instance *find_next_ret_chain(struct return_instance *ri)
> > --
> > 2.43.5
> >
Powered by blists - more mailing lists