[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241018101647.GA36494@noisy.programming.kicks-ass.net>
Date: Fri, 18 Oct 2024 12:16:47 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Andrii Nakryiko <andrii@...nel.org>
Cc: linux-trace-kernel@...r.kernel.org, oleg@...hat.com,
rostedt@...dmis.org, mhiramat@...nel.org, mingo@...nel.org,
bpf@...r.kernel.org, linux-kernel@...r.kernel.org, jolsa@...nel.org,
paulmck@...nel.org
Subject: Re: [PATCH v2 tip/perf/core 2/2] uprobes: SRCU-protect uretprobe
lifetime (with timeout)
On Mon, Oct 07, 2024 at 05:25:56PM -0700, Andrii Nakryiko wrote:
> +/* Initialize hprobe as SRCU-protected "leased" uprobe */
> +static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
> +{
> + hprobe->state = HPROBE_LEASED;
> + hprobe->uprobe = uprobe;
> + hprobe->srcu_idx = srcu_idx;
> +}
> +
> +/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */
> +static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe)
> +{
> + hprobe->state = HPROBE_STABLE;
> + hprobe->uprobe = uprobe;
> + hprobe->srcu_idx = -1;
> +}
> +
> +/*
> + * hprobe_consume() fetches hprobe's underlying uprobe and detects whether
> + * uprobe is SRCU protected or is refcounted. hprobe_consume() can be
> + * used only once for a given hprobe.
> + *
> + * Caller has to call hprobe_finalize() and pass previous hprobe_state, so
> + * that hprobe_finalize() can perform SRCU unlock or put uprobe, whichever
> + * is appropriate.
> + */
> +static inline struct uprobe *hprobe_consume(struct hprobe *hprobe, enum hprobe_state *hstate)
> +{
> + enum hprobe_state state;
> +
> + *hstate = xchg(&hprobe->state, HPROBE_CONSUMED);
> + switch (*hstate) {
> + case HPROBE_LEASED:
> + case HPROBE_STABLE:
> + return hprobe->uprobe;
> + case HPROBE_GONE:
> + return NULL; /* couldn't refcnt uprobe, it's effectively NULL */
> + case HPROBE_CONSUMED:
> + return NULL; /* uprobe was finalized already, do nothing */
> + default:
> + WARN(1, "hprobe invalid state %d", state);
> + return NULL;
> + }
> +}
> +
> +/*
> + * Reset hprobe state and, if hprobe was LEASED, release SRCU lock.
> + * hprobe_finalize() can only be used from current context after
> + * hprobe_consume() call (which determines uprobe and hstate value).
> + */
> +static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
> +{
> + switch (hstate) {
> + case HPROBE_LEASED:
> + __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
> + break;
> + case HPROBE_STABLE:
> + if (hprobe->uprobe)
> + put_uprobe(hprobe->uprobe);
> + break;
> + case HPROBE_GONE:
> + case HPROBE_CONSUMED:
> + break;
> + default:
> + WARN(1, "hprobe invalid state %d", hstate);
> + break;
> + }
> +}
> +
> +/*
> + * Attempt to switch (atomically) uprobe from being SRCU protected (LEASED)
> + * to refcounted (STABLE) state. Competes with hprobe_consume(); only one of
> + * them can win the race to perform SRCU unlocking. Whoever wins must perform
> + * SRCU unlock.
> + *
> + * Returns underlying valid uprobe or NULL, if there was no underlying uprobe
> + * to begin with or we failed to bump its refcount and it's going away.
> + *
> + * Returned non-NULL uprobe can be still safely used within an ongoing SRCU
> + * locked region. It's not guaranteed that returned uprobe has a positive
> + * refcount, so caller has to attempt try_get_uprobe(), if it needs to
> + * preserve uprobe beyond current SRCU lock region. See dup_utask().
> + */
> +static struct uprobe* hprobe_expire(struct hprobe *hprobe)
> +{
> + enum hprobe_state hstate;
> +
> + /*
> + * return_instance's hprobe is protected by RCU.
> + * Underlying uprobe is itself protected from reuse by SRCU.
> + */
> + lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
> +
> + hstate = data_race(READ_ONCE(hprobe->state));
> + switch (hstate) {
> + case HPROBE_STABLE:
> + /* uprobe is properly refcounted, return it */
> + return hprobe->uprobe;
> + case HPROBE_GONE:
> + /*
> + * SRCU was unlocked earlier and we didn't manage to take
> + * uprobe refcnt, so it's effectively NULL
> + */
> + return NULL;
> + case HPROBE_CONSUMED:
> + /*
> + * uprobe was consumed, so it's effectively NULL as far as
> + * uretprobe processing logic is concerned
> + */
> + return NULL;
> + case HPROBE_LEASED: {
> + struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe);
> + /*
> + * Try to switch hprobe state, guarding against
> + * hprobe_consume() or another hprobe_expire() racing with us.
> + * Note, if we failed to get uprobe refcount, we use special
> + * HPROBE_GONE state to signal that hprobe->uprobe shouldn't
> + * be used as it will be freed after SRCU is unlocked.
> + */
> + if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) {
> + /* We won the race, we are the ones to unlock SRCU */
> + __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
> + return uprobe;
> + }
> +
> + /* We lost the race, undo refcount bump (if it ever happened) */
> + if (uprobe)
> + put_uprobe(uprobe);
> + /*
> + * Even if hprobe_consume() or another hprobe_expire() wins
> + * the state update race and unlocks SRCU from under us, we
> + * still have a guarantee that underyling uprobe won't be
> + * freed due to ongoing caller's SRCU lock region, so we can
> + * return it regardless. The caller then can attempt its own
> + * try_get_uprobe() to preserve the instance, if necessary.
> + * This is used in dup_utask().
> + */
> + return uprobe;
> + }
> + default:
> + WARN(1, "unknown hprobe state %d", hstate);
> + return NULL;
> + }
> +}
So... after a few readings I think I'm mostly okay with this. But I got
annoyed by the whole HPROBE_STABLE with uprobe=NULL weirdness. Also,
that data_race() usage is weird, what is that about?
And then there's the case where we end up doing:
try_get_uprobe()
put_uprobe()
try_get_uprobe()
in the dup path. Yes, it's unlikely, but gah.
So how about something like this?
---
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 06ec41c75c45..efb4f5ee6212 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -657,20 +657,19 @@ static void put_uprobe(struct uprobe *uprobe)
call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu);
}
-/* Initialize hprobe as SRCU-protected "leased" uprobe */
-static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
+static void hprobe_init(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
{
- hprobe->state = HPROBE_LEASED;
- hprobe->uprobe = uprobe;
- hprobe->srcu_idx = srcu_idx;
-}
+ enum hprobe_state state = HPROBE_GONE;
-/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */
-static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe)
-{
- hprobe->state = HPROBE_STABLE;
+ if (uprobe) {
+ state = HPROBE_LEASED;
+ if (srcu_idx < 0)
+ state = HPROBE_STABLE;
+ }
+
+ hprobe->state = state;
hprobe->uprobe = uprobe;
- hprobe->srcu_idx = -1;
+ hprobe->srcu_idx = srcu_idx;
}
/*
@@ -713,8 +712,7 @@ static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
break;
case HPROBE_STABLE:
- if (hprobe->uprobe)
- put_uprobe(hprobe->uprobe);
+ put_uprobe(hprobe->uprobe);
break;
case HPROBE_GONE:
case HPROBE_CONSUMED:
@@ -739,8 +737,9 @@ static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
* refcount, so caller has to attempt try_get_uprobe(), if it needs to
* preserve uprobe beyond current SRCU lock region. See dup_utask().
*/
-static struct uprobe* hprobe_expire(struct hprobe *hprobe)
+static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
{
+ struct uprobe *uprobe = NULL;
enum hprobe_state hstate;
/*
@@ -749,25 +748,18 @@ static struct uprobe* hprobe_expire(struct hprobe *hprobe)
*/
lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
- hstate = data_race(READ_ONCE(hprobe->state));
+ hstate = READ_ONCE(hprobe->state);
switch (hstate) {
case HPROBE_STABLE:
- /* uprobe is properly refcounted, return it */
- return hprobe->uprobe;
+ uprobe = hprobe->uprobe;
+ break;
+
case HPROBE_GONE:
- /*
- * SRCU was unlocked earlier and we didn't manage to take
- * uprobe refcnt, so it's effectively NULL
- */
- return NULL;
case HPROBE_CONSUMED:
- /*
- * uprobe was consumed, so it's effectively NULL as far as
- * uretprobe processing logic is concerned
- */
- return NULL;
- case HPROBE_LEASED: {
- struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe);
+ break;
+
+ case HPROBE_LEASED:
+ uprobe = try_get_uprobe(hprobe->uprobe);
/*
* Try to switch hprobe state, guarding against
* hprobe_consume() or another hprobe_expire() racing with us.
@@ -778,27 +770,26 @@ static struct uprobe* hprobe_expire(struct hprobe *hprobe)
if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) {
/* We won the race, we are the ones to unlock SRCU */
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
- return uprobe;
+ break;
}
/* We lost the race, undo refcount bump (if it ever happened) */
- if (uprobe)
+ if (uprobe && !get) {
put_uprobe(uprobe);
- /*
- * Even if hprobe_consume() or another hprobe_expire() wins
- * the state update race and unlocks SRCU from under us, we
- * still have a guarantee that underyling uprobe won't be
- * freed due to ongoing caller's SRCU lock region, so we can
- * return it regardless. The caller then can attempt its own
- * try_get_uprobe() to preserve the instance, if necessary.
- * This is used in dup_utask().
- */
+ uprobe = NULL;
+ }
+
return uprobe;
- }
+
default:
WARN(1, "unknown hprobe state %d", hstate);
return NULL;
}
+
+ if (uprobe && get)
+ return try_get_uprobe(uprobe);
+
+ return uprobe;
}
static __always_inline
@@ -1920,9 +1911,8 @@ static void ri_timer(struct timer_list *timer)
/* RCU protects return_instance from freeing. */
guard(rcu)();
- for_each_ret_instance_rcu(ri, utask->return_instances) {
- hprobe_expire(&ri->hprobe);
- }
+ for_each_ret_instance_rcu(ri, utask->return_instances)
+ hprobe_expire(&ri->hprobe, false);
}
static struct uprobe_task *alloc_utask(void)
@@ -1975,10 +1965,7 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
*n = *o;
- /* see hprobe_expire() comments */
- uprobe = hprobe_expire(&o->hprobe);
- if (uprobe) /* refcount bump for new utask */
- uprobe = try_get_uprobe(uprobe);
+ uprobe = hprobe_expire(&o->hprobe, true);
/*
* New utask will have stable properly refcounted uprobe or
@@ -1986,7 +1973,7 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
* need to preserve full set of return_instances for proper
* uretprobe handling and nesting in forked task.
*/
- hprobe_init_stable(&n->hprobe, uprobe);
+ hprobe_init(&n->hprobe, uprobe, -1);
n->next = NULL;
rcu_assign_pointer(*p, n);
@@ -2131,7 +2118,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
utask->depth++;
- hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx);
+ hprobe_init(&ri->hprobe, uprobe, srcu_idx);
ri->next = utask->return_instances;
rcu_assign_pointer(utask->return_instances, ri);
Powered by blists - more mailing lists