lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 12 Apr 2017 22:56:07 -0400
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Mel Gorman <mgorman@...e.de>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...nel.org>,
        Matt Fleming <matt@...eblueprint.co.uk>,
        Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
Subject: [PATCH v2] sched: Enabled schedstat when schedstat tracepoints are
 enabled

From: Steven Rostedt (VMware) <rostedt@...dmis.org>

During my tests, I see this in my dmesg:

"Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and
stat_runtime require the kernel parameter schedstats=enabled or
kernel.sched_schedstats=1"

And found the commit:

cb2517653fc ("sched/debug: Make schedstats a runtime tunable that is
disabled by default")

Which states:

"For tracepoints, there is a simple warning as it's not safe to activate
 schedstats in the context when it's known the tracepoint may be wanted
 but is unavailable."

I'm assuming that Mel did not know about the TRACE_EVENT_FN() and
DEFINE_EVENT_FN() that allow for callbacks for tracepoints as they are
enabled and disabled. I do not see any reason for not enabling
schedstat when one of its tracepoints are enabled.

The state of schedstat is saved when the first tracepoint is enabled,
and that state is put back when the tracepoints are disabled.

Signed-off-by: Steven Rostedt (VMware) <rostedt@...dmis.org>
---
Changes since v1:

  o Added #ifdef CONFIG_TRACING around schedstat_tracepoint functions
    (thanks to the kbuild zero day bot for finding this)

  o Added return 0 to schedstat_tracepoint_reg()

Index: linux-trace.git/include/linux/sched.h
===================================================================
--- linux-trace.git.orig/include/linux/sched.h
+++ linux-trace.git/include/linux/sched.h
@@ -1456,6 +1456,14 @@ static inline int test_tsk_need_resched(
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+#if defined(CONFIG_TRACING) && defined(CONFIG_SCHEDSTATS)
+int schedstat_tracepoint_reg(void);
+void schedstat_tracepoint_unreg(void);
+#else
+static inline int schedstat_tracepoint_reg(void) { return 0; }
+static inline void schedstat_tracepoint_unreg(void) { }
+#endif
+
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
Index: linux-trace.git/include/trace/events/sched.h
===================================================================
--- linux-trace.git.orig/include/trace/events/sched.h
+++ linux-trace.git/include/trace/events/sched.h
@@ -346,32 +346,36 @@ DECLARE_EVENT_CLASS(sched_stat_template,
  * Tracepoint for accounting wait time (time the task is runnable
  * but not actually running due to scheduler contention).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_wait,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+	     TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for accounting sleep time (time the task is not runnable,
  * including iowait, see below).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_sleep,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+	     TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for accounting iowait time (time the task is not runnable
  * due to waiting on IO to complete).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_iowait,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+	     TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_blocked,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+	     TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for accounting runtime (time the task is executing
@@ -403,9 +407,10 @@ DECLARE_EVENT_CLASS(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
-DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+DEFINE_EVENT_FN(sched_stat_runtime, sched_stat_runtime,
 	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
-	     TP_ARGS(tsk, runtime, vruntime));
+	     TP_ARGS(tsk, runtime, vruntime), schedstat_tracepoint_reg,
+	     schedstat_tracepoint_unreg);
 
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
Index: linux-trace.git/kernel/sched/core.c
===================================================================
--- linux-trace.git.orig/kernel/sched/core.c
+++ linux-trace.git/kernel/sched/core.c
@@ -2275,6 +2275,37 @@ static void set_schedstats(bool enabled)
 		static_branch_disable(&sched_schedstats);
 }
 
+#ifdef CONFIG_TRACING
+static int schedstat_tracepoint_ref;
+static bool schedstat_save_state;
+/*
+ * schedstat_tracepoint_reg() and unreg() are called by the tracepoint
+ * regfunc/unregfunc functions. They are protected by the tracepoint mutex.
+ *     See kernel/tracepoint.c:tracepoint_add_func().
+ *
+ * The modifications to schedstat_tracepoint_ref and schedstat_save_state
+ * are only done under that mutex, and do not need further protection.
+ */
+int schedstat_tracepoint_reg(void)
+{
+	if (!schedstat_tracepoint_ref) {
+		schedstat_save_state = schedstat_enabled();
+		if (!schedstat_save_state)
+			set_schedstats(true);
+	}
+	schedstat_tracepoint_ref++;
+	return 0;
+}
+
+void schedstat_tracepoint_unreg(void)
+{
+	schedstat_tracepoint_ref--;
+	if (schedstat_tracepoint_ref || schedstat_save_state)
+		return;
+	set_schedstats(false);
+}
+#endif
+
 void force_schedstat_enabled(void)
 {
 	if (!schedstat_enabled()) {
Index: linux-trace.git/kernel/sched/fair.c
===================================================================
--- linux-trace.git.orig/kernel/sched/fair.c
+++ linux-trace.git/kernel/sched/fair.c
@@ -3526,27 +3526,6 @@ place_entity(struct cfs_rq *cfs_rq, stru
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
 
-static inline void check_schedstat_required(void)
-{
-#ifdef CONFIG_SCHEDSTATS
-	if (schedstat_enabled())
-		return;
-
-	/* Force schedstat enabled if a dependent tracepoint is active */
-	if (trace_sched_stat_wait_enabled()    ||
-			trace_sched_stat_sleep_enabled()   ||
-			trace_sched_stat_iowait_enabled()  ||
-			trace_sched_stat_blocked_enabled() ||
-			trace_sched_stat_runtime_enabled())  {
-		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
-			     "stat_blocked and stat_runtime require the "
-			     "kernel parameter schedstats=enabled or "
-			     "kernel.sched_schedstats=1\n");
-	}
-#endif
-}
-
-
 /*
  * MIGRATION
  *
@@ -3617,7 +3596,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
 	if (flags & ENQUEUE_WAKEUP)
 		place_entity(cfs_rq, se, 0);
 
-	check_schedstat_required();
 	update_stats_enqueue(cfs_rq, se, flags);
 	check_spread(cfs_rq, se);
 	if (!curr)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ