__trace_sched_switch_state() is the last remaining PREEMPT_ACTIVE user, move trace_sched_switch() from prepare_task_switch() to __schedule() and propagate the @preempt argument. Reviewed-by: Thomas Gleixner Reviewed-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) --- include/trace/events/sched.h | 22 +++++++++------------- kernel/sched/core.c | 2 +- kernel/trace/ftrace.c | 2 +- kernel/trace/trace_sched_switch.c | 3 ++- kernel/trace/trace_sched_wakeup.c | 2 +- 5 files changed, 14 insertions(+), 17 deletions(-) --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -104,22 +104,17 @@ DEFINE_EVENT(sched_wakeup_template, sche TP_ARGS(p)); #ifdef CREATE_TRACE_POINTS -static inline long __trace_sched_switch_state(struct task_struct *p) +static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) { - long state = p->state; - -#ifdef CONFIG_PREEMPT #ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); #endif /* CONFIG_SCHED_DEBUG */ + /* - * For all intents and purposes a preempted task is a running task. + * Preemption ignores task state, therefore preempted tasks are always + * RUNNING (we will not have dequeued if state != RUNNING). */ - if (preempt_count() & PREEMPT_ACTIVE) - state = TASK_RUNNING | TASK_STATE_MAX; -#endif /* CONFIG_PREEMPT */ - - return state; + return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; } #endif /* CREATE_TRACE_POINTS */ @@ -128,10 +123,11 @@ static inline long __trace_sched_switch_ */ TRACE_EVENT(sched_switch, - TP_PROTO(struct task_struct *prev, + TP_PROTO(bool preempt, + struct task_struct *prev, struct task_struct *next), - TP_ARGS(prev, next), + TP_ARGS(preempt, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) @@ -147,7 +143,7 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; - __entry->prev_state = __trace_sched_switch_state(prev); + __entry->prev_state = __trace_sched_switch_state(preempt, prev); memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2470,7 +2470,6 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { - trace_sched_switch(prev, next); sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); @@ -3121,6 +3120,7 @@ static void __sched __schedule(bool pree rq->curr = next; ++*switch_count; + trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next); /* unlocks the rq */ cpu = cpu_of(rq); } else { --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5697,7 +5697,7 @@ static int alloc_retstack_tasklist(struc } static void -ftrace_graph_probe_sched_switch(void *ignore, +ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *prev, struct task_struct *next) { unsigned long long timestamp; --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -16,7 +16,8 @@ static int sched_ref; static DEFINE_MUTEX(sched_register_mutex); static void -probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) +probe_sched_switch(void *ignore, bool preempt, + struct task_struct *prev, struct task_struct *next) { if (unlikely(!sched_ref)) return; --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -420,7 +420,7 @@ tracing_sched_wakeup_trace(struct trace_ } static void notrace -probe_wakeup_sched_switch(void *ignore, +probe_wakeup_sched_switch(void *ignore, bool preempt, struct task_struct *prev, struct task_struct *next) { struct trace_array_cpu *data; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/