[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250514084314.57976-7-gmonaco@redhat.com>
Date: Wed, 14 May 2025 10:43:08 +0200
From: Gabriele Monaco <gmonaco@...hat.com>
To: linux-kernel@...r.kernel.org,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Steven Rostedt <rostedt@...dmis.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
linux-trace-kernel@...r.kernel.org
Cc: Gabriele Monaco <gmonaco@...hat.com>,
Nam Cao <namcao@...utronix.de>,
Tomas Glozar <tglozar@...hat.com>,
Juri Lelli <jlelli@...hat.com>
Subject: [RFC PATCH v2 06/12] sched: Adapt sched tracepoints for RV task model
Add the following tracepoints:
* sched_set_need_resched(tsk, cpu, tif)
Called when a task is set the need resched [lazy] flag
* sched_switch_vain(preempt, tsk, tsk_state)
Called when a task is selected again during __schedule
i.e. prev == next == tsk : no real context switch
Add new parameter to sched_set_state to identify whether the state
change was due to an explicit call or a signal pending while scheduling.
We now also trace from try_to_block_task in case a signal was pending
and the task is set to runnable.
These tracepoints are useful to describe the Linux task model and are
adapted from the patches by Daniel Bristot de Oliveira
(https://bristot.me/linux-task-model/).
Signed-off-by: Gabriele Monaco <gmonaco@...hat.com>
---
include/linux/sched.h | 7 ++++++-
include/trace/events/sched.h | 17 +++++++++++++++--
kernel/sched/core.c | 10 +++++++++-
3 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04f808ab8888..4d9da32330bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -340,9 +340,11 @@ extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
extern void io_schedule(void);
-/* wrapper function to trace from this header file */
+/* wrapper functions to trace from this header file */
DECLARE_TRACEPOINT(sched_set_state_tp);
extern void __trace_set_current_state(int state_value);
+DECLARE_TRACEPOINT(sched_set_need_resched_tp);
+extern void __trace_set_need_resched(struct task_struct *curr, int tif);
/**
* struct prev_cputime - snapshot of system and user cputime
@@ -2065,6 +2067,9 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
static inline void set_tsk_need_resched(struct task_struct *tsk)
{
+ if (tracepoint_enabled(sched_set_need_resched_tp) &&
+ !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED))
+ __trace_set_need_resched(tsk, TIF_NEED_RESCHED);
set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 2390818b139b..158b9c504fab 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -889,11 +889,24 @@ DECLARE_TRACE(sched_exit_tp,
TP_PROTO(bool is_switch, unsigned long ip),
TP_ARGS(is_switch, ip));
+/*
+ * Tracepoint called when setting the state of a task;
+ * this tracepoint is guaranteed to be called from the waking context of the
+ * task setting the state.
+ */
DECLARE_TRACE_CONDITION(sched_set_state_tp,
- TP_PROTO(struct task_struct *tsk, int state),
- TP_ARGS(tsk, state),
+ TP_PROTO(struct task_struct *tsk, int state, bool from_signal),
+ TP_ARGS(tsk, state, from_signal),
TP_CONDITION(!!(tsk->__state) != !!state));
+DECLARE_TRACE(sched_set_need_resched_tp,
+ TP_PROTO(struct task_struct *tsk, int cpu, int tif),
+ TP_ARGS(tsk, cpu, tif));
+
+DECLARE_TRACE(sched_switch_vain_tp,
+ TP_PROTO(bool preempt, struct task_struct *tsk, unsigned int prev_state),
+ TP_ARGS(preempt, tsk, prev_state));
+
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5f844bae1a14..89e81fc7f393 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -494,7 +494,7 @@ EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp);
/* Call via the helper macro trace_set_current_state. */
void __trace_set_current_state(int state_value)
{
- trace_sched_set_state_tp(current, state_value);
+ trace_sched_set_state_tp(current, state_value, false);
}
EXPORT_SYMBOL(__trace_set_current_state);
@@ -1109,6 +1109,7 @@ static void __resched_curr(struct rq *rq, int tif)
cpu = cpu_of(rq);
+ trace_sched_set_need_resched_tp(curr, cpu, tif);
if (cpu == smp_processor_id()) {
set_ti_thread_flag(cti, tif);
if (tif == TIF_NEED_RESCHED)
@@ -1124,6 +1125,11 @@ static void __resched_curr(struct rq *rq, int tif)
}
}
+void __trace_set_need_resched(struct task_struct *curr, int tif)
+{
+ trace_sched_set_need_resched_tp(curr, smp_processor_id(), tif);
+}
+
void resched_curr(struct rq *rq)
{
__resched_curr(rq, TIF_NEED_RESCHED);
@@ -6587,6 +6593,7 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p,
int flags = DEQUEUE_NOCLOCK;
if (signal_pending_state(task_state, p)) {
+ trace_sched_set_state_tp(p, TASK_RUNNING, true);
WRITE_ONCE(p->__state, TASK_RUNNING);
*task_state_p = TASK_RUNNING;
return false;
@@ -6779,6 +6786,7 @@ static void __sched notrace __schedule(int sched_mode)
rq = context_switch(rq, prev, next, &rf);
} else {
rq_unpin_lock(rq, &rf);
+ trace_sched_switch_vain_tp(preempt, prev, prev_state);
__balance_callbacks(rq);
raw_spin_rq_unlock_irq(rq);
}
--
2.49.0
Powered by blists - more mailing lists