lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzZaZGE7Kb+AZkN0eTH+0ny-_0WUxKT7ydDzAfEwP8cKVg@mail.gmail.com>
Date: Fri, 18 Oct 2024 11:22:09 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Andrii Nakryiko <andrii@...nel.org>, linux-trace-kernel@...r.kernel.org, oleg@...hat.com, 
	rostedt@...dmis.org, mhiramat@...nel.org, mingo@...nel.org, 
	bpf@...r.kernel.org, linux-kernel@...r.kernel.org, jolsa@...nel.org, 
	paulmck@...nel.org
Subject: Re: [PATCH v2 tip/perf/core 2/2] uprobes: SRCU-protect uretprobe
 lifetime (with timeout)

On Fri, Oct 18, 2024 at 3:16 AM Peter Zijlstra <peterz@...radead.org> wrote:
>
> On Mon, Oct 07, 2024 at 05:25:56PM -0700, Andrii Nakryiko wrote:
>

[...]

> > +
> > +             /* We lost the race, undo refcount bump (if it ever happened) */
> > +             if (uprobe)
> > +                     put_uprobe(uprobe);
> > +             /*
> > +              * Even if hprobe_consume() or another hprobe_expire() wins
> > +              * the state update race and unlocks SRCU from under us, we
> > +              * still have a guarantee that underyling uprobe won't be
> > +              * freed due to ongoing caller's SRCU lock region, so we can
> > +              * return it regardless. The caller then can attempt its own
> > +              * try_get_uprobe() to preserve the instance, if necessary.
> > +              * This is used in dup_utask().
> > +              */
> > +             return uprobe;
> > +     }
> > +     default:
> > +             WARN(1, "unknown hprobe state %d", hstate);
> > +             return NULL;
> > +     }
> > +}
>
> So... after a few readings I think I'm mostly okay with this. But I got
> annoyed by the whole HPROBE_STABLE with uprobe=NULL weirdness. Also,
> that data_race() usage is weird, what is that about?

People keep saying that evil KCSAN will come after me if I don't add
data_race() for values that can change under me, so I add it to make
it explicit that it's fine. But I can of course just drop data_race(),
as it has no bearing on correctness.

>
> And then there's the case where we end up doing:
>
>   try_get_uprobe()
>   put_uprobe()
>   try_get_uprobe()
>
> in the dup path. Yes, it's unlikely, but gah.
>
>
> So how about something like this?

Yep, it makes sense to start with HPROBE_GONE if it's already NULL, no
problem. I'll roll those changes in.

I'm fine with the `bool get` flag as well. Will incorporate all that
into the next revision, thanks!

The only problem I can see is in the assumption that `srcu_idx < 0` is
never going to be returned by srcu_read_lock(). Paul says that it can
only be 0 or 1, but it's not codified as part of a contract. So until
we change that, probably safer to pass an extra bool specifying
whether srcu_idx is valid or not, is that OK?

(and I assume you want me to drop verbose comments for various states, right?)



>
> ---
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 06ec41c75c45..efb4f5ee6212 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -657,20 +657,19 @@ static void put_uprobe(struct uprobe *uprobe)
>         call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu);
>  }
>
> -/* Initialize hprobe as SRCU-protected "leased" uprobe */
> -static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
> +static void hprobe_init(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
>  {
> -       hprobe->state = HPROBE_LEASED;
> -       hprobe->uprobe = uprobe;
> -       hprobe->srcu_idx = srcu_idx;
> -}
> +       enum hprobe_state state = HPROBE_GONE;
>
> -/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */
> -static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe)
> -{
> -       hprobe->state = HPROBE_STABLE;
> +       if (uprobe) {
> +               state = HPROBE_LEASED;
> +               if (srcu_idx < 0)
> +                       state = HPROBE_STABLE;
> +       }
> +
> +       hprobe->state = state;
>         hprobe->uprobe = uprobe;
> -       hprobe->srcu_idx = -1;
> +       hprobe->srcu_idx = srcu_idx;
>  }
>
>  /*
> @@ -713,8 +712,7 @@ static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
>                 __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
>                 break;
>         case HPROBE_STABLE:
> -               if (hprobe->uprobe)
> -                       put_uprobe(hprobe->uprobe);
> +               put_uprobe(hprobe->uprobe);
>                 break;
>         case HPROBE_GONE:
>         case HPROBE_CONSUMED:
> @@ -739,8 +737,9 @@ static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
>   * refcount, so caller has to attempt try_get_uprobe(), if it needs to
>   * preserve uprobe beyond current SRCU lock region. See dup_utask().
>   */
> -static struct uprobe* hprobe_expire(struct hprobe *hprobe)
> +static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
>  {
> +       struct uprobe *uprobe = NULL;
>         enum hprobe_state hstate;
>
>         /*
> @@ -749,25 +748,18 @@ static struct uprobe* hprobe_expire(struct hprobe *hprobe)
>          */
>         lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
>
> -       hstate = data_race(READ_ONCE(hprobe->state));
> +       hstate = READ_ONCE(hprobe->state);
>         switch (hstate) {
>         case HPROBE_STABLE:
> -               /* uprobe is properly refcounted, return it */
> -               return hprobe->uprobe;
> +               uprobe = hprobe->uprobe;
> +               break;
> +
>         case HPROBE_GONE:
> -               /*
> -                * SRCU was unlocked earlier and we didn't manage to take
> -                * uprobe refcnt, so it's effectively NULL
> -                */
> -               return NULL;
>         case HPROBE_CONSUMED:
> -               /*
> -                * uprobe was consumed, so it's effectively NULL as far as
> -                * uretprobe processing logic is concerned
> -                */
> -               return NULL;
> -       case HPROBE_LEASED: {
> -               struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe);
> +               break;
> +
> +       case HPROBE_LEASED:
> +               uprobe = try_get_uprobe(hprobe->uprobe);
>                 /*
>                  * Try to switch hprobe state, guarding against
>                  * hprobe_consume() or another hprobe_expire() racing with us.
> @@ -778,27 +770,26 @@ static struct uprobe* hprobe_expire(struct hprobe *hprobe)
>                 if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) {
>                         /* We won the race, we are the ones to unlock SRCU */
>                         __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
> -                       return uprobe;
> +                       break;
>                 }
>
>                 /* We lost the race, undo refcount bump (if it ever happened) */
> -               if (uprobe)
> +               if (uprobe && !get) {
>                         put_uprobe(uprobe);
> -               /*
> -                * Even if hprobe_consume() or another hprobe_expire() wins
> -                * the state update race and unlocks SRCU from under us, we
> -                * still have a guarantee that underyling uprobe won't be
> -                * freed due to ongoing caller's SRCU lock region, so we can
> -                * return it regardless. The caller then can attempt its own
> -                * try_get_uprobe() to preserve the instance, if necessary.
> -                * This is used in dup_utask().
> -                */
> +                       uprobe = NULL;
> +               }
> +
>                 return uprobe;
> -       }
> +
>         default:
>                 WARN(1, "unknown hprobe state %d", hstate);
>                 return NULL;
>         }
> +
> +       if (uprobe && get)
> +               return try_get_uprobe(uprobe);
> +
> +       return uprobe;
>  }
>
>  static __always_inline
> @@ -1920,9 +1911,8 @@ static void ri_timer(struct timer_list *timer)
>         /* RCU protects return_instance from freeing. */
>         guard(rcu)();
>
> -       for_each_ret_instance_rcu(ri, utask->return_instances) {
> -               hprobe_expire(&ri->hprobe);
> -       }
> +       for_each_ret_instance_rcu(ri, utask->return_instances)
> +               hprobe_expire(&ri->hprobe, false);
>  }
>
>  static struct uprobe_task *alloc_utask(void)
> @@ -1975,10 +1965,7 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
>
>                 *n = *o;
>
> -               /* see hprobe_expire() comments */
> -               uprobe = hprobe_expire(&o->hprobe);
> -               if (uprobe) /* refcount bump for new utask */
> -                       uprobe = try_get_uprobe(uprobe);
> +               uprobe = hprobe_expire(&o->hprobe, true);
>
>                 /*
>                  * New utask will have stable properly refcounted uprobe or
> @@ -1986,7 +1973,7 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
>                  * need to preserve full set of return_instances for proper
>                  * uretprobe handling and nesting in forked task.
>                  */
> -               hprobe_init_stable(&n->hprobe, uprobe);
> +               hprobe_init(&n->hprobe, uprobe, -1);
>
>                 n->next = NULL;
>                 rcu_assign_pointer(*p, n);
> @@ -2131,7 +2118,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>
>         utask->depth++;
>
> -       hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx);
> +       hprobe_init(&ri->hprobe, uprobe, srcu_idx);
>         ri->next = utask->return_instances;
>         rcu_assign_pointer(utask->return_instances, ri);
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ