[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170412160432.481ee5fb@gandalf.local.home>
Date: Wed, 12 Apr 2017 22:56:07 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: Mel Gorman <mgorman@...e.de>
Cc: LKML <linux-kernel@...r.kernel.org>,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...nel.org>,
Matt Fleming <matt@...eblueprint.co.uk>,
Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
Subject: [PATCH v2] sched: Enabled schedstat when schedstat tracepoints are
enabled
From: Steven Rostedt (VMware) <rostedt@...dmis.org>
During my tests, I see this in my dmesg:
"Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and
stat_runtime require the kernel parameter schedstats=enabled or
kernel.sched_schedstats=1"
And found the commit:
cb2517653fc ("sched/debug: Make schedstats a runtime tunable that is
disabled by default")
Which states:
"For tracepoints, there is a simple warning as it's not safe to activate
schedstats in the context when it's known the tracepoint may be wanted
but is unavailable."
I'm assuming that Mel did not know about the TRACE_EVENT_FN() and
DEFINE_EVENT_FN() that allow for callbacks for tracepoints as they are
enabled and disabled. I do not see any reason for not enabling
schedstat when one of its tracepoints are enabled.
The state of schedstat is saved when the first tracepoint is enabled,
and that state is put back when the tracepoints are disabled.
Signed-off-by: Steven Rostedt (VMware) <rostedt@...dmis.org>
---
Changes since v1:
o Added #ifdef CONFIG_TRACING around schedstat_tracepoint functions
(thanks to the kbuild zero day bot for finding this)
o Added return 0 to schedstat_tracepoint_reg()
Index: linux-trace.git/include/linux/sched.h
===================================================================
--- linux-trace.git.orig/include/linux/sched.h
+++ linux-trace.git/include/linux/sched.h
@@ -1456,6 +1456,14 @@ static inline int test_tsk_need_resched(
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
+#if defined(CONFIG_TRACING) && defined(CONFIG_SCHEDSTATS)
+int schedstat_tracepoint_reg(void);
+void schedstat_tracepoint_unreg(void);
+#else
+static inline int schedstat_tracepoint_reg(void) { return 0; }
+static inline void schedstat_tracepoint_unreg(void) { }
+#endif
+
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
Index: linux-trace.git/include/trace/events/sched.h
===================================================================
--- linux-trace.git.orig/include/trace/events/sched.h
+++ linux-trace.git/include/trace/events/sched.h
@@ -346,32 +346,36 @@ DECLARE_EVENT_CLASS(sched_stat_template,
* Tracepoint for accounting wait time (time the task is runnable
* but not actually running due to scheduler contention).
*/
-DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_wait,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
+ TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for accounting sleep time (time the task is not runnable,
* including iowait, see below).
*/
-DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_sleep,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
+ TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for accounting iowait time (time the task is not runnable
* due to waiting on IO to complete).
*/
-DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_iowait,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
+ TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for accounting blocked time (time the task is in uninterruptible).
*/
-DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_blocked,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
+ TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for accounting runtime (time the task is executing
@@ -403,9 +407,10 @@ DECLARE_EVENT_CLASS(sched_stat_runtime,
(unsigned long long)__entry->vruntime)
);
-DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+DEFINE_EVENT_FN(sched_stat_runtime, sched_stat_runtime,
TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
- TP_ARGS(tsk, runtime, vruntime));
+ TP_ARGS(tsk, runtime, vruntime), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for showing priority inheritance modifying a tasks
Index: linux-trace.git/kernel/sched/core.c
===================================================================
--- linux-trace.git.orig/kernel/sched/core.c
+++ linux-trace.git/kernel/sched/core.c
@@ -2275,6 +2275,37 @@ static void set_schedstats(bool enabled)
static_branch_disable(&sched_schedstats);
}
+#ifdef CONFIG_TRACING
+static int schedstat_tracepoint_ref;
+static bool schedstat_save_state;
+/*
+ * schedstat_tracepoint_reg() and unreg() are called by the tracepoint
+ * regfunc/unregfunc functions. They are protected by the tracepoint mutex.
+ * See kernel/tracepoint.c:tracepoint_add_func().
+ *
+ * The modifications to schedstat_tracepoint_ref and schedstat_save_state
+ * are only done under that mutex, and do not need further protection.
+ */
+int schedstat_tracepoint_reg(void)
+{
+ if (!schedstat_tracepoint_ref) {
+ schedstat_save_state = schedstat_enabled();
+ if (!schedstat_save_state)
+ set_schedstats(true);
+ }
+ schedstat_tracepoint_ref++;
+ return 0;
+}
+
+void schedstat_tracepoint_unreg(void)
+{
+ schedstat_tracepoint_ref--;
+ if (schedstat_tracepoint_ref || schedstat_save_state)
+ return;
+ set_schedstats(false);
+}
+#endif
+
void force_schedstat_enabled(void)
{
if (!schedstat_enabled()) {
Index: linux-trace.git/kernel/sched/fair.c
===================================================================
--- linux-trace.git.orig/kernel/sched/fair.c
+++ linux-trace.git/kernel/sched/fair.c
@@ -3526,27 +3526,6 @@ place_entity(struct cfs_rq *cfs_rq, stru
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
-static inline void check_schedstat_required(void)
-{
-#ifdef CONFIG_SCHEDSTATS
- if (schedstat_enabled())
- return;
-
- /* Force schedstat enabled if a dependent tracepoint is active */
- if (trace_sched_stat_wait_enabled() ||
- trace_sched_stat_sleep_enabled() ||
- trace_sched_stat_iowait_enabled() ||
- trace_sched_stat_blocked_enabled() ||
- trace_sched_stat_runtime_enabled()) {
- printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
- "stat_blocked and stat_runtime require the "
- "kernel parameter schedstats=enabled or "
- "kernel.sched_schedstats=1\n");
- }
-#endif
-}
-
-
/*
* MIGRATION
*
@@ -3617,7 +3596,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
if (flags & ENQUEUE_WAKEUP)
place_entity(cfs_rq, se, 0);
- check_schedstat_required();
update_stats_enqueue(cfs_rq, se, flags);
check_spread(cfs_rq, se);
if (!curr)
Powered by blists - more mailing lists