lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1407797345-28227-15-git-send-email-paulmck@linux.vnet.ibm.com>
Date:	Mon, 11 Aug 2014 15:49:04 -0700
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	linux-kernel@...r.kernel.org
Cc:	mingo@...nel.org, laijs@...fujitsu.com, dipankar@...ibm.com,
	akpm@...ux-foundation.org, mathieu.desnoyers@...icios.com,
	josh@...htriplett.org, tglx@...utronix.de, peterz@...radead.org,
	rostedt@...dmis.org, dhowells@...hat.com, edumazet@...gle.com,
	dvhart@...ux.intel.com, fweisbec@...il.com, oleg@...hat.com,
	bobby.prani@...il.com,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: [PATCH v5 tip/core/rcu 15/16] rcu: Make RCU-tasks wait for idle tasks

From: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>

Because idle-task code may need to be patched, RCU-tasks need to wait
for idle tasks to schedule.  This commit therefore detects this case
via context switch.  Block CPU hotplug during this time to avoid sending
IPIs to offline CPUs.

Note that checking for changes in the dyntick-idle counters is tempting,
but wrong.  The reason that it is wrong is that a interrupt or NMI can
increment these counters without necessarily allowing the idle tasks to
make any forward progress.

Signed-off-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
---
 kernel/rcu/update.c | 65 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 11 deletions(-)

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 2ae6fb8752d4..9ea2a26487c5 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -48,6 +48,7 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
+#include "../sched/sched.h" /* cpu_rq()->idle */
 
 #define CREATE_TRACE_POINTS
 
@@ -464,15 +465,33 @@ EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
 static void check_holdout_task(struct task_struct *t,
 			       bool needreport, bool *firstreport)
 {
-	if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
-	    t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
-	    !ACCESS_ONCE(t->on_rq) ||
-	    (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
-	     !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
-		ACCESS_ONCE(t->rcu_tasks_holdout) = 0;
-		list_del_init(&t->rcu_tasks_holdout_list);
-		put_task_struct(t);
-		return;
+	if (!ACCESS_ONCE(t->rcu_tasks_holdout))
+		goto not_holdout; /* Other detection of non-holdout status. */
+	if (t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw))
+		goto not_holdout; /* Voluntary context switch. */
+	if (!ACCESS_ONCE(t->on_rq))
+		goto not_holdout; /* Not on runqueue. */
+	if (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
+	    !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)
+		goto not_holdout; /* NO_HZ_FULL userspace execution. */
+	if (is_idle_task(t)) {
+		int cpu;
+
+		cpu = task_cpu(t);
+		if (cpu >= 0 && cpu_curr(cpu) != t)
+			goto not_holdout; /* Idle task not running. */
+
+		if (cpu >= 0) {
+			/*
+			 * We must schedule on the idle CPU.  Note that
+			 * checking for changes in dyntick-idle counters
+			 * is not sufficient, as an interrupt or NMI can
+			 * change these counters without guaranteeing that
+			 * the underlying idle task has made progress.
+			 */
+			set_cpus_allowed_ptr(current, cpumask_of(cpu));
+			set_cpus_allowed_ptr(current, cpu_online_mask);
+		}
 	}
 	if (!needreport)
 		return;
@@ -481,11 +500,17 @@ static void check_holdout_task(struct task_struct *t,
 		*firstreport = false;
 	}
 	sched_show_task(t);
+	return;
+not_holdout:
+	ACCESS_ONCE(t->rcu_tasks_holdout) = 0;
+	list_del_init(&t->rcu_tasks_holdout_list);
+	put_task_struct(t);
 }
 
 /* RCU-tasks kthread that detects grace periods and invokes callbacks. */
 static int __noreturn rcu_tasks_kthread(void *arg)
 {
+	int cpu;
 	unsigned long flags;
 	struct task_struct *g, *t;
 	unsigned long lastreport;
@@ -546,8 +571,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 		 */
 		rcu_read_lock();
 		for_each_process_thread(g, t) {
-			if (t != current && ACCESS_ONCE(t->on_rq) &&
-			    !is_idle_task(t)) {
+			if (t != current && ACCESS_ONCE(t->on_rq)) {
 				get_task_struct(t);
 				t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
 				ACCESS_ONCE(t->rcu_tasks_holdout) = 1;
@@ -558,6 +582,24 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 		rcu_read_unlock();
 
 		/*
+		 * Next, queue up any currently running idle tasks.
+		 * Exclude CPU hotplug during the time we are working
+		 * with idle tasks, as it is considered bad form to
+		 * send IPIs to offline CPUs.
+		 */
+		get_online_cpus();
+		for_each_online_cpu(cpu) {
+			t = cpu_rq(cpu)->idle;
+			if (t == cpu_curr(cpu)) {
+				get_task_struct(t);
+				t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
+				ACCESS_ONCE(t->rcu_tasks_holdout) = 1;
+				list_add(&t->rcu_tasks_holdout_list,
+					 &rcu_tasks_holdouts);
+			}
+		}
+
+		/*
 		 * Wait for tasks that are in the process of exiting.
 		 * This does only part of the job, ensuring that all
 		 * tasks that were previously exiting reach the point
@@ -592,6 +634,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 				cond_resched();
 			}
 		}
+		put_online_cpus();
 
 		/*
 		 * Because ->on_rq and ->nvcsw are not guaranteed
-- 
1.8.1.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ