[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210727150613.20886-3-mathieu.desnoyers@efficios.com>
Date: Tue, 27 Jul 2021 11:06:12 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Steven Rostedt <rostedt@...dmis.org>,
"Paul E. McKenney" <paulmck@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Stefan Metzmacher <metze@...ba.org>
Cc: linux-kernel@...r.kernel.org,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Subject: [RFC PATCH 2/3] Fix: tracepoint: static call function vs data state mismatch
On a 1->0->1 callbacks transition, there is an issue with the new
callback using the old callback's data.
Considering __DO_TRACE_CALL:
do { \
struct tracepoint_func *it_func_ptr; \
void *__data; \
it_func_ptr = \
rcu_dereference_raw((&__tracepoint_##name)->funcs); \
if (it_func_ptr) { \
__data = (it_func_ptr)->data; \
----> [ delayed here on one CPU (e.g. vcpu preempted by the host) ]
static_call(tp_func_##name)(__data, args); \
} \
} while (0)
It has loaded the tp->funcs of the old callback, so it will try to use the old
data. This can be fixed by adding a RCU sync anywhere in the 1->0->1
transition chain.
On a N->2->1 transition, we need an rcu-sync because you may have a
sequence of 3->2->1 (or 1->2->1) where the element 0 data is unchanged
between 2->1, but was changed from 3->2 (or from 1->2), which may be
observed by the static call. This can be fixed by adding an
unconditional RCU sync in transition 2->1.
A follow up fix will introduce a more lightweight scheme based on RCU
get_state and cond_sync.
[ Build tested only. ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
---
kernel/tracepoint.c | 100 +++++++++++++++++++++++++++++++++++---------
1 file changed, 80 insertions(+), 20 deletions(-)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 133b6454b287..a85e7dc8b490 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -15,6 +15,13 @@
#include <linux/sched/task.h>
#include <linux/static_key.h>
+enum tp_func_state {
+ TP_FUNC_0,
+ TP_FUNC_1,
+ TP_FUNC_2,
+ TP_FUNC_N,
+};
+
extern tracepoint_ptr_t __start___tracepoints_ptrs[];
extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
@@ -246,26 +253,29 @@ static void *func_remove(struct tracepoint_func **funcs,
return old;
}
-static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync)
+/*
+ * Count the number of functions (enum tp_func_state) in a tp_funcs array.
+ */
+static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs)
+{
+ if (!tp_funcs)
+ return TP_FUNC_0;
+ if (!tp_funcs[1].func)
+ return TP_FUNC_1;
+ if (!tp_funcs[2].func)
+ return TP_FUNC_2;
+ return TP_FUNC_N; /* 3 or more */
+}
+
+static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs)
{
void *func = tp->iterator;
/* Synthetic events do not have static call sites */
if (!tp->static_call_key)
return;
-
- if (!tp_funcs[1].func) {
+ if (nr_func_state(tp_funcs) == TP_FUNC_1)
func = tp_funcs[0].func;
- /*
- * If going from the iterator back to a single caller,
- * we need to synchronize with __DO_TRACE to make sure
- * that the data passed to the callback is the one that
- * belongs to that callback.
- */
- if (sync)
- tracepoint_synchronize_unregister();
- }
-
__static_call_update(tp->static_call_key, tp->static_call_tramp, func);
}
@@ -299,9 +309,31 @@ static int tracepoint_add_func(struct tracepoint *tp,
* a pointer to it. This array is referenced by __DO_TRACE from
* include/linux/tracepoint.h using rcu_dereference_sched().
*/
- tracepoint_update_call(tp, tp_funcs, false);
- rcu_assign_pointer(tp->funcs, tp_funcs);
- static_key_enable(&tp->key);
+ switch (nr_func_state(tp_funcs)) {
+ case TP_FUNC_1: /* 0->1 */
+ /* Set static call to first function */
+ tracepoint_update_call(tp, tp_funcs);
+ /* Both iterator and static call handle NULL tp->funcs */
+ rcu_assign_pointer(tp->funcs, tp_funcs);
+ static_key_enable(&tp->key);
+ break;
+ case TP_FUNC_2: /* 1->2 */
+ /* Set iterator static call */
+ tracepoint_update_call(tp, tp_funcs);
+ /*
+ * Iterator callback installed before updating tp->funcs.
+ * Requires ordering between RCU assign/dereference and
+ * static call update/call.
+ */
+ rcu_assign_pointer(tp->funcs, tp_funcs);
+ break;
+ case TP_FUNC_N: /* N->N+1 (N>1) */
+ rcu_assign_pointer(tp->funcs, tp_funcs);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
release_probes(old);
return 0;
@@ -328,17 +360,45 @@ static int tracepoint_remove_func(struct tracepoint *tp,
/* Failed allocating new tp_funcs, replaced func with stub */
return 0;
- if (!tp_funcs) {
+ switch (nr_func_state(tp_funcs)) {
+ case TP_FUNC_0: /* 1->0 */
/* Removed last function */
if (tp->unregfunc && static_key_enabled(&tp->key))
tp->unregfunc();
static_key_disable(&tp->key);
+ /* Set iterator static call */
+ tracepoint_update_call(tp, tp_funcs);
+ /* Both iterator and static call handle NULL tp->funcs */
+ rcu_assign_pointer(tp->funcs, NULL);
+ /*
+ * Make sure new func never uses old data after a 1->0->1
+ * transition sequence.
+ * Considering that transition 0->1 is the common case
+ * and don't have rcu-sync, issue rcu-sync after
+ * transition 1->0 to break that sequence by waiting for
+ * readers to be quiescent.
+ */
+ tracepoint_synchronize_unregister();
+ break;
+ case TP_FUNC_1: /* 2->1 */
rcu_assign_pointer(tp->funcs, tp_funcs);
- } else {
+ /*
+ * On 2->1 transition, RCU sync is needed before setting
+ * static call to first callback, because the observer
+ * may have loaded any prior tp->funcs after the last one
+ * associated with an rcu-sync.
+ */
+ tracepoint_synchronize_unregister();
+ /* Set static call to first function */
+ tracepoint_update_call(tp, tp_funcs);
+ break;
+ case TP_FUNC_N: /* N->N-1 (N>2) */
rcu_assign_pointer(tp->funcs, tp_funcs);
- tracepoint_update_call(tp, tp_funcs,
- tp_funcs[0].data != old[0].data);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
}
release_probes(old);
return 0;
--
2.20.1
Powered by blists - more mailing lists