lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20170412160432.481ee5fb@gandalf.local.home>
Date:   Wed, 12 Apr 2017 16:04:32 -0400
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Mel Gorman <mgorman@...e.de>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...nel.org>,
        Matt Fleming <matt@...eblueprint.co.uk>,
        Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
Subject: [PATCH] sched: Enabled schedstat when schedstat tracepoints are
 enabled

From: Steven Rostedt (VMware) <rostedt@...dmis.org>

During my tests, I see this in my dmesg:

"Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and
stat_runtime require the kernel parameter schedstats=enabled or kernel.sched_schedstats=1"

And found the commit:

cb2517653fc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")

Which states:

"For tracepoints, there is a simple warning as it's not safe to activate
 schedstats in the context when it's known the tracepoint may be wanted
 but is unavailable."

I'm assuming that Mel did not know about the TRACE_EVENT_FN() and
DEFINE_EVENT_FN() that allow for callbacks for tracepoints as they are
enabled and disabled. I do not see any reason for not enabling
schedstat when one of its tracepoints are enabled.

The state of schedstat is saved when the first tracepoint is enabled,
and that state is put back when the tracepoints are disabled.

Signed-off-by: Steven Rostedt (VMware) <rostedt@...dmis.org>
---
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d67eee8..691ab93f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1456,6 +1456,14 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+#if defined(CONFIG_TRACING) && defined(CONFIG_SCHEDSTATS)
+int schedstat_tracepoint_reg(void);
+void schedstat_tracepoint_unreg(void);
+#else
+static inline int schedstat_tracepoint_reg(void) { return 0; }
+static inline void schedstat_tracepoint_unreg(void) { }
+#endif
+
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9e3ef6c..ab8fa69 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -346,32 +346,36 @@ DECLARE_EVENT_CLASS(sched_stat_template,
  * Tracepoint for accounting wait time (time the task is runnable
  * but not actually running due to scheduler contention).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_wait,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+	     TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for accounting sleep time (time the task is not runnable,
  * including iowait, see below).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_sleep,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+	     TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for accounting iowait time (time the task is not runnable
  * due to waiting on IO to complete).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_iowait,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+	     TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_blocked,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+	     TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for accounting runtime (time the task is executing
@@ -403,9 +407,10 @@ DECLARE_EVENT_CLASS(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
-DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+DEFINE_EVENT_FN(sched_stat_runtime, sched_stat_runtime,
 	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
-	     TP_ARGS(tsk, runtime, vruntime));
+	     TP_ARGS(tsk, runtime, vruntime), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b31fc0..95e9ce9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2266,6 +2266,8 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 
 DEFINE_STATIC_KEY_FALSE(sched_schedstats);
 static bool __initdata __sched_schedstats = false;
+static int schedstat_tracepoint_ref;
+static bool schedstat_save_state;
 
 static void set_schedstats(bool enabled)
 {
@@ -2275,6 +2277,32 @@ static void set_schedstats(bool enabled)
 		static_branch_disable(&sched_schedstats);
 }
 
+/*
+ * schedstat_tracepoint_reg() and unreg() are called by the tracepoint
+ * regfunc/unregfunc functions. They are protected by the tracepoint mutex.
+ *     See kernel/tracepoint.c:tracepoint_add_func().
+ *
+ * The modifications to schedstat_tracepoint_ref and schedstat_save_state
+ * are only done under that mutex, and do not need further protection.
+ */
+int schedstat_tracepoint_reg(void)
+{
+	if (!schedstat_tracepoint_ref) {
+		schedstat_save_state = schedstat_enabled();
+		if (!schedstat_save_state)
+			set_schedstats(true);
+	}
+	schedstat_tracepoint_ref++;
+}
+
+void schedstat_tracepoint_unreg(void)
+{
+	schedstat_tracepoint_ref--;
+	if (schedstat_tracepoint_ref || schedstat_save_state)
+		return;
+	set_schedstats(false);
+}
+
 void force_schedstat_enabled(void)
 {
 	if (!schedstat_enabled()) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dea1389..d6665f6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3526,27 +3526,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
 
-static inline void check_schedstat_required(void)
-{
-#ifdef CONFIG_SCHEDSTATS
-	if (schedstat_enabled())
-		return;
-
-	/* Force schedstat enabled if a dependent tracepoint is active */
-	if (trace_sched_stat_wait_enabled()    ||
-			trace_sched_stat_sleep_enabled()   ||
-			trace_sched_stat_iowait_enabled()  ||
-			trace_sched_stat_blocked_enabled() ||
-			trace_sched_stat_runtime_enabled())  {
-		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
-			     "stat_blocked and stat_runtime require the "
-			     "kernel parameter schedstats=enabled or "
-			     "kernel.sched_schedstats=1\n");
-	}
-#endif
-}
-
-
 /*
  * MIGRATION
  *
@@ -3617,7 +3596,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	if (flags & ENQUEUE_WAKEUP)
 		place_entity(cfs_rq, se, 0);
 
-	check_schedstat_required();
 	update_stats_enqueue(cfs_rq, se, flags);
 	check_spread(cfs_rq, se);
 	if (!curr)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ