[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250715071434.22508-13-gmonaco@redhat.com>
Date: Tue, 15 Jul 2025 09:14:29 +0200
From: Gabriele Monaco <gmonaco@...hat.com>
To: linux-kernel@...r.kernel.org,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Steven Rostedt <rostedt@...dmis.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
linux-trace-kernel@...r.kernel.org
Cc: Gabriele Monaco <gmonaco@...hat.com>,
Nam Cao <namcao@...utronix.de>,
Tomas Glozar <tglozar@...hat.com>,
Juri Lelli <jlelli@...hat.com>,
Clark Williams <williams@...hat.com>,
John Kacur <jkacur@...hat.com>
Subject: [PATCH v3 12/17] sched: Adapt sched tracepoints for RV task model
Add the following tracepoints:
* sched_set_need_resched(tsk, cpu, tif)
Called when a task is set the need resched [lazy] flag
* sched_switch_vain(preempt, tsk, tsk_state)
Called when a task is selected again during __schedule
i.e. prev == next == tsk : no real context switch
Add new parameter to sched_set_state to identify whether the state
change was due to an explicit call or a signal pending while scheduling.
We now also trace from try_to_block_task in case a signal was pending
and the task is set to runnable.
Also adapt all monitors using sched_set_state to avoid breaking build.
These tracepoints are useful to describe the Linux task model and are
adapted from the patches by Daniel Bristot de Oliveira
(https://bristot.me/linux-task-model/).
Signed-off-by: Gabriele Monaco <gmonaco@...hat.com>
---
include/linux/sched.h | 7 ++++++-
include/trace/events/sched.h | 17 +++++++++++++++--
kernel/sched/core.c | 10 +++++++++-
kernel/trace/rv/monitors/sco/sco.c | 3 ++-
kernel/trace/rv/monitors/sleep/sleep.c | 3 ++-
kernel/trace/rv/monitors/snroc/snroc.c | 3 ++-
6 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7bce4c7ae3b4f..19ab4597c97d3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -339,9 +339,11 @@ extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
extern void io_schedule(void);
-/* wrapper function to trace from this header file */
+/* wrapper functions to trace from this header file */
DECLARE_TRACEPOINT(sched_set_state_tp);
extern void __trace_set_current_state(int state_value);
+DECLARE_TRACEPOINT(sched_set_need_resched_tp);
+extern void __trace_set_need_resched(struct task_struct *curr, int tif);
/**
* struct prev_cputime - snapshot of system and user cputime
@@ -2059,6 +2061,9 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
static inline void set_tsk_need_resched(struct task_struct *tsk)
{
+ if (tracepoint_enabled(sched_set_need_resched_tp) &&
+ !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED))
+ __trace_set_need_resched(tsk, TIF_NEED_RESCHED);
set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 4e6b2910cec3f..c9dec6d38ad2d 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -889,11 +889,24 @@ DECLARE_TRACE(sched_exit,
TP_PROTO(bool is_switch, unsigned long ip),
TP_ARGS(is_switch, ip));
+/*
+ * Tracepoint called when setting the state of a task;
+ * this tracepoint is guaranteed to be called from the waking context of the
+ * task setting the state.
+ */
DECLARE_TRACE_CONDITION(sched_set_state,
- TP_PROTO(struct task_struct *tsk, int state),
- TP_ARGS(tsk, state),
+ TP_PROTO(struct task_struct *tsk, int state, bool from_signal),
+ TP_ARGS(tsk, state, from_signal),
TP_CONDITION(!!(tsk->__state) != !!state));
+DECLARE_TRACE(sched_set_need_resched,
+ TP_PROTO(struct task_struct *tsk, int cpu, int tif),
+ TP_ARGS(tsk, cpu, tif));
+
+DECLARE_TRACE(sched_switch_vain,
+ TP_PROTO(bool preempt, struct task_struct *tsk, unsigned int prev_state),
+ TP_ARGS(preempt, tsk, prev_state));
+
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 81c6df746df17..6cb70e6f7fa17 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -495,7 +495,7 @@ EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp);
/* Call via the helper macro trace_set_current_state. */
void __trace_set_current_state(int state_value)
{
- trace_sched_set_state_tp(current, state_value);
+ trace_sched_set_state_tp(current, state_value, false);
}
EXPORT_SYMBOL(__trace_set_current_state);
@@ -1110,6 +1110,7 @@ static void __resched_curr(struct rq *rq, int tif)
cpu = cpu_of(rq);
+ trace_sched_set_need_resched_tp(curr, cpu, tif);
if (cpu == smp_processor_id()) {
set_ti_thread_flag(cti, tif);
if (tif == TIF_NEED_RESCHED)
@@ -1125,6 +1126,11 @@ static void __resched_curr(struct rq *rq, int tif)
}
}
+void __trace_set_need_resched(struct task_struct *curr, int tif)
+{
+ trace_sched_set_need_resched_tp(curr, smp_processor_id(), tif);
+}
+
void resched_curr(struct rq *rq)
{
__resched_curr(rq, TIF_NEED_RESCHED);
@@ -6592,6 +6598,7 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p,
int flags = DEQUEUE_NOCLOCK;
if (signal_pending_state(task_state, p)) {
+ trace_sched_set_state_tp(p, TASK_RUNNING, true);
WRITE_ONCE(p->__state, TASK_RUNNING);
*task_state_p = TASK_RUNNING;
return false;
@@ -6786,6 +6793,7 @@ static void __sched notrace __schedule(int sched_mode)
rq = context_switch(rq, prev, next, &rf);
} else {
rq_unpin_lock(rq, &rf);
+ trace_sched_switch_vain_tp(preempt, prev, prev_state);
__balance_callbacks(rq);
raw_spin_rq_unlock_irq(rq);
}
diff --git a/kernel/trace/rv/monitors/sco/sco.c b/kernel/trace/rv/monitors/sco/sco.c
index 66f4639d46ac4..c9206aa12c319 100644
--- a/kernel/trace/rv/monitors/sco/sco.c
+++ b/kernel/trace/rv/monitors/sco/sco.c
@@ -19,7 +19,8 @@
static struct rv_monitor rv_sco;
DECLARE_DA_MON_PER_CPU(sco, unsigned char);
-static void handle_sched_set_state(void *data, struct task_struct *tsk, int state)
+static void handle_sched_set_state(void *data, struct task_struct *tsk,
+ int state, bool from_signal)
{
da_handle_start_event_sco(sched_set_state_sco);
}
diff --git a/kernel/trace/rv/monitors/sleep/sleep.c b/kernel/trace/rv/monitors/sleep/sleep.c
index eea447b069071..5103a98818c53 100644
--- a/kernel/trace/rv/monitors/sleep/sleep.c
+++ b/kernel/trace/rv/monitors/sleep/sleep.c
@@ -82,7 +82,8 @@ static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bo
}
-static void handle_sched_set_state(void *data, struct task_struct *task, int state)
+static void handle_sched_set_state(void *data, struct task_struct *task,
+ int state, bool from_signal)
{
if (state & TASK_INTERRUPTIBLE)
ltl_atom_pulse(task, LTL_SLEEP, true);
diff --git a/kernel/trace/rv/monitors/snroc/snroc.c b/kernel/trace/rv/monitors/snroc/snroc.c
index 540e686e699f4..2651f589d1554 100644
--- a/kernel/trace/rv/monitors/snroc/snroc.c
+++ b/kernel/trace/rv/monitors/snroc/snroc.c
@@ -19,7 +19,8 @@
static struct rv_monitor rv_snroc;
DECLARE_DA_MON_PER_TASK(snroc, unsigned char);
-static void handle_sched_set_state(void *data, struct task_struct *tsk, int state)
+static void handle_sched_set_state(void *data, struct task_struct *tsk,
+ int state, bool from_signal)
{
da_handle_event_snroc(tsk, sched_set_state_snroc);
}
--
2.50.1
Powered by blists - more mailing lists