lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.20.1710011150330.3874@nanos>
Date:   Sun, 1 Oct 2017 11:54:33 +0200 (CEST)
From:   Thomas Gleixner <tglx@...utronix.de>
To:     Linus Torvalds <torvalds@...ux-foundation.org>
cc:     LKML <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...nel.org>,
        "H. Peter Anvin" <hpa@...or.com>
Subject: [GIT pull] scheduler fixes for 4.14

Linus,

please pull the latest sched-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched-urgent-for-linus

The scheduler pull request comes with the following updates:

  - Prevent a divide by zero issue by validating the input value of
    sysctl_sched_time_avg

  - Make task state printing consistent all over the place and have
    explicit state characters for IDLE and PARKED so they wont be displayed
    as 'D' state which confuses tools.

Thanks,

	tglx

------------------>
Ethan Zhao (1):
      sched/sysctl: Check user input value of sysctl_sched_time_avg

Peter Zijlstra (8):
      sched/debug: Implement consistent task-state printing
      sched/debug: Convert TASK_state to hex
      sched/debug: Remove unused variable
      sched/tracing: Fix trace_sched_switch task-state printing
      sched/tracing: Use common task-state helpers
      sched/debug: Add explicit TASK_IDLE printing
      sched/debug: Ignore TASK_IDLE for SysRq-W
      sched/debug: Add explicit TASK_PARKED printing


 fs/proc/array.c                   | 35 +++++++++------------
 include/linux/sched.h             | 64 ++++++++++++++++++++++++---------------
 include/trace/events/sched.h      | 19 +++++++-----
 kernel/sched/core.c               | 24 ++++++++++++++-
 kernel/sched/debug.c              |  2 --
 kernel/sysctl.c                   |  3 +-
 kernel/trace/trace_output.c       | 21 ++++---------
 kernel/trace/trace_sched_wakeup.c |  8 ++---
 8 files changed, 102 insertions(+), 74 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 525157ca25cb..77a8eacbe032 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -119,30 +119,25 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
  * simple bit tests.
  */
 static const char * const task_state_array[] = {
-	"R (running)",		/*   0 */
-	"S (sleeping)",		/*   1 */
-	"D (disk sleep)",	/*   2 */
-	"T (stopped)",		/*   4 */
-	"t (tracing stop)",	/*   8 */
-	"X (dead)",		/*  16 */
-	"Z (zombie)",		/*  32 */
+
+	/* states in TASK_REPORT: */
+	"R (running)",		/* 0x00 */
+	"S (sleeping)",		/* 0x01 */
+	"D (disk sleep)",	/* 0x02 */
+	"T (stopped)",		/* 0x04 */
+	"t (tracing stop)",	/* 0x08 */
+	"X (dead)",		/* 0x10 */
+	"Z (zombie)",		/* 0x20 */
+	"P (parked)",		/* 0x40 */
+
+	/* states beyond TASK_REPORT: */
+	"I (idle)",		/* 0x80 */
 };
 
 static inline const char *get_task_state(struct task_struct *tsk)
 {
-	unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT;
-
-	/*
-	 * Parked tasks do not run; they sit in __kthread_parkme().
-	 * Without this check, we would report them as running, which is
-	 * clearly wrong, so we report them as sleeping instead.
-	 */
-	if (tsk->state == TASK_PARKED)
-		state = TASK_INTERRUPTIBLE;
-
-	BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
-
-	return task_state_array[fls(state)];
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array));
+	return task_state_array[__get_task_state(tsk)];
 }
 
 static inline int get_task_umask(struct task_struct *tsk)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 92fb8dd5a9e4..26a7df4e558c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -65,25 +65,23 @@ struct task_group;
  */
 
 /* Used in tsk->state: */
-#define TASK_RUNNING			0
-#define TASK_INTERRUPTIBLE		1
-#define TASK_UNINTERRUPTIBLE		2
-#define __TASK_STOPPED			4
-#define __TASK_TRACED			8
+#define TASK_RUNNING			0x0000
+#define TASK_INTERRUPTIBLE		0x0001
+#define TASK_UNINTERRUPTIBLE		0x0002
+#define __TASK_STOPPED			0x0004
+#define __TASK_TRACED			0x0008
 /* Used in tsk->exit_state: */
-#define EXIT_DEAD			16
-#define EXIT_ZOMBIE			32
+#define EXIT_DEAD			0x0010
+#define EXIT_ZOMBIE			0x0020
 #define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
 /* Used in tsk->state again: */
-#define TASK_DEAD			64
-#define TASK_WAKEKILL			128
-#define TASK_WAKING			256
-#define TASK_PARKED			512
-#define TASK_NOLOAD			1024
-#define TASK_NEW			2048
-#define TASK_STATE_MAX			4096
-
-#define TASK_STATE_TO_CHAR_STR		"RSDTtXZxKWPNn"
+#define TASK_PARKED			0x0040
+#define TASK_DEAD			0x0080
+#define TASK_WAKEKILL			0x0100
+#define TASK_WAKING			0x0200
+#define TASK_NOLOAD			0x0400
+#define TASK_NEW			0x0800
+#define TASK_STATE_MAX			0x1000
 
 /* Convenience macros for the sake of set_current_state: */
 #define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -99,7 +97,8 @@ struct task_group;
 /* get_task_state(): */
 #define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
 					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-					 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
+					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
+					 TASK_PARKED)
 
 #define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
 
@@ -1243,17 +1242,34 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
 	return task_pgrp_nr_ns(tsk, &init_pid_ns);
 }
 
-static inline char task_state_to_char(struct task_struct *task)
+#define TASK_REPORT_IDLE	(TASK_REPORT + 1)
+#define TASK_REPORT_MAX		(TASK_REPORT_IDLE << 1)
+
+static inline unsigned int __get_task_state(struct task_struct *tsk)
+{
+	unsigned int tsk_state = READ_ONCE(tsk->state);
+	unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
+
+	if (tsk_state == TASK_IDLE)
+		state = TASK_REPORT_IDLE;
+
+	return fls(state);
+}
+
+static inline char __task_state_to_char(unsigned int state)
 {
-	const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
-	unsigned long state = task->state;
+	static const char state_char[] = "RSDTtXZPI";
 
-	state = state ? __ffs(state) + 1 : 0;
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
 
-	/* Make sure the string lines up properly with the number of task states: */
-	BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1);
+	return state_char[state];
+}
 
-	return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
+static inline char task_state_to_char(struct task_struct *tsk)
+{
+	return __task_state_to_char(__get_task_state(tsk));
 }
 
 /**
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index ae1409ffe99a..3c8b7f625670 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -114,7 +114,10 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
 	 * Preemption ignores task state, therefore preempted tasks are always
 	 * RUNNING (we will not have dequeued if state != RUNNING).
 	 */
-	return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
+	if (preempt)
+		return TASK_STATE_MAX;
+
+	return __get_task_state(p);
 }
 #endif /* CREATE_TRACE_POINTS */
 
@@ -152,12 +155,14 @@ TRACE_EVENT(sched_switch,
 
 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
-		__entry->prev_state & (TASK_STATE_MAX-1) ?
-		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
-				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
-				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
-				{ 128, "K" }, { 256, "W" }, { 512, "P" },
-				{ 1024, "N" }) : "R",
+
+		(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
+		  __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
+				{ 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
+				{ 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" },
+				{ 0x40, "P" }, { 0x80, "I" }) :
+		  "R",
+
 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 18a6966567da..d17c5da523a0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5166,6 +5166,28 @@ void sched_show_task(struct task_struct *p)
 	put_task_stack(p);
 }
 
+static inline bool
+state_filter_match(unsigned long state_filter, struct task_struct *p)
+{
+	/* no filter, everything matches */
+	if (!state_filter)
+		return true;
+
+	/* filter, but doesn't match */
+	if (!(p->state & state_filter))
+		return false;
+
+	/*
+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
+	 * TASK_KILLABLE).
+	 */
+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
+		return false;
+
+	return true;
+}
+
+
 void show_state_filter(unsigned long state_filter)
 {
 	struct task_struct *g, *p;
@@ -5188,7 +5210,7 @@ void show_state_filter(unsigned long state_filter)
 		 */
 		touch_nmi_watchdog();
 		touch_all_softlockup_watchdogs();
-		if (!state_filter || (p->state & state_filter))
+		if (state_filter_match(state_filter, p))
 			sched_show_task(p);
 	}
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 01217fb5a5de..2f93e4a2d9f6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -466,8 +466,6 @@ static char *task_group_path(struct task_group *tg)
 }
 #endif
 
-static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
-
 static void
 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6648fbbb8157..423554ad3610 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -367,7 +367,8 @@ static struct ctl_table kern_table[] = {
 		.data		= &sysctl_sched_time_avg,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 #ifdef CONFIG_SCHEDSTATS
 	{
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bac629af2285..c738e764e2a5 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -656,15 +656,6 @@ int trace_print_lat_context(struct trace_iterator *iter)
 	return !trace_seq_has_overflowed(s);
 }
 
-static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
-
-static int task_state_char(unsigned long state)
-{
-	int bit = state ? __ffs(state) + 1 : 0;
-
-	return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
-}
-
 /**
  * ftrace_find_event - find a registered event
  * @type: the type of event to look for
@@ -930,8 +921,8 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
-	T = task_state_char(field->next_state);
-	S = task_state_char(field->prev_state);
+	T = __task_state_to_char(field->next_state);
+	S = __task_state_to_char(field->prev_state);
 	trace_find_cmdline(field->next_pid, comm);
 	trace_seq_printf(&iter->seq,
 			 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
@@ -966,8 +957,8 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
 	trace_assign_type(field, iter->ent);
 
 	if (!S)
-		S = task_state_char(field->prev_state);
-	T = task_state_char(field->next_state);
+		S = __task_state_to_char(field->prev_state);
+	T = __task_state_to_char(field->next_state);
 	trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
 			 field->prev_pid,
 			 field->prev_prio,
@@ -1002,8 +993,8 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
 	trace_assign_type(field, iter->ent);
 
 	if (!S)
-		S = task_state_char(field->prev_state);
-	T = task_state_char(field->next_state);
+		S = __task_state_to_char(field->prev_state);
+	T = __task_state_to_char(field->next_state);
 
 	SEQ_PUT_HEX_FIELD(s, field->prev_pid);
 	SEQ_PUT_HEX_FIELD(s, field->prev_prio);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index ddec53b67646..0c331978b1a6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry	= ring_buffer_event_data(event);
 	entry->prev_pid			= prev->pid;
 	entry->prev_prio		= prev->prio;
-	entry->prev_state		= prev->state;
+	entry->prev_state		= __get_task_state(prev);
 	entry->next_pid			= next->pid;
 	entry->next_prio		= next->prio;
-	entry->next_state		= next->state;
+	entry->next_state		= __get_task_state(next);
 	entry->next_cpu	= task_cpu(next);
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
@@ -425,10 +425,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry	= ring_buffer_event_data(event);
 	entry->prev_pid			= curr->pid;
 	entry->prev_prio		= curr->prio;
-	entry->prev_state		= curr->state;
+	entry->prev_state		= __get_task_state(curr);
 	entry->next_pid			= wakee->pid;
 	entry->next_prio		= wakee->prio;
-	entry->next_state		= wakee->state;
+	entry->next_state		= __get_task_state(wakee);
 	entry->next_cpu			= task_cpu(wakee);
 
 	if (!call_filter_check_discard(call, entry, buffer, event))

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ