linux-kernel - Re: [RFC][PATCH 12/13] stop

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150623130826.GG18673@twins.programming.kicks-ass.net>
Date:	Tue, 23 Jun 2015 15:08:26 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Oleg Nesterov <oleg@...hat.com>
Cc:	paulmck@...ux.vnet.ibm.com, tj@...nel.org, mingo@...hat.com,
	linux-kernel@...r.kernel.org, der.herr@...r.at, dave@...olabs.net,
	riel@...hat.com, viro@...IV.linux.org.uk,
	torvalds@...ux-foundation.org
Subject: Re: [RFC][PATCH 12/13] stop_machine: Remove lglock

On Tue, Jun 23, 2015 at 01:20:41PM +0200, Peter Zijlstra wrote:
> Paul, why does this use stop_machine anyway? I seemed to remember you
> sending resched IPIs around.
> 
> The rcu_sched_qs() thing would set passed_quiesce, which you can then
> collect to gauge progress.
> 
> Shooting IPIs around is bad enough, but running a full blown
> stop_machine is really blunt and heavy.

Is there anything obviously amiss with the below? It does stop_one_cpu()
in a loop instead of the multi cpu stop_machine and is therefore much
friendlier (albeit still heavier than bare resched IPIs) since the CPUs
do not have to go an sync up.

After all, all we're really interested in is that each CPUs has
scheduled at least once, we do not care about the cross cpu syncup.

---
 include/linux/stop_machine.h |  7 ----
 kernel/rcu/tree.c            | 99 +++++---------------------------------------
 kernel/stop_machine.c        | 30 --------------
 3 files changed, 10 insertions(+), 126 deletions(-)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index d2abbdb8c6aa..f992da7ee492 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -32,7 +32,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 			 struct cpu_stop_work *work_buf);
 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
-int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 
 #else	/* CONFIG_SMP */
 
@@ -83,12 +82,6 @@ static inline int stop_cpus(const struct cpumask *cpumask,
 	return -ENOENT;
 }
 
-static inline int try_stop_cpus(const struct cpumask *cpumask,
-				cpu_stop_fn_t fn, void *arg)
-{
-	return stop_cpus(cpumask, fn, arg);
-}
-
 #endif	/* CONFIG_SMP */
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index add042926a66..4a8cde155dce 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3257,7 +3257,7 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
 {
 	/*
 	 * There must be a full memory barrier on each affected CPU
-	 * between the time that try_stop_cpus() is called and the
+	 * between the time that stop_one_cpu() is called and the
 	 * time that it returns.
 	 *
 	 * In the current initial implementation of cpu_stop, the
@@ -3291,25 +3291,12 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
  * grace period.  We are then done, so we use atomic_cmpxchg() to
  * update sync_sched_expedited_done to match our snapshot -- but
  * only if someone else has not already advanced past our snapshot.
- *
- * On the other hand, if try_stop_cpus() fails, we check the value
- * of sync_sched_expedited_done.  If it has advanced past our
- * initial snapshot, then someone else must have forced a grace period
- * some time after we took our snapshot.  In this case, our work is
- * done for us, and we can simply return.  Otherwise, we try again,
- * but keep our initial snapshot for purposes of checking for someone
- * doing our work for us.
- *
- * If we fail too many times in a row, we fall back to synchronize_sched().
  */
 void synchronize_sched_expedited(void)
 {
-	cpumask_var_t cm;
-	bool cma = false;
-	int cpu;
-	long firstsnap, s, snap;
-	int trycount = 0;
 	struct rcu_state *rsp = &rcu_sched_state;
+	long s, snap;
+	int cpu;
 
 	/*
 	 * If we are in danger of counter wrap, just do synchronize_sched().
@@ -3332,7 +3319,6 @@ void synchronize_sched_expedited(void)
 	 * full memory barrier.
 	 */
 	snap = atomic_long_inc_return(&rsp->expedited_start);
-	firstsnap = snap;
 	if (!try_get_online_cpus()) {
 		/* CPU hotplug operation in flight, fall back to normal GP. */
 		wait_rcu_gp(call_rcu_sched);
@@ -3341,82 +3327,17 @@ void synchronize_sched_expedited(void)
 	}
 	WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
 
-	/* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
-	cma = zalloc_cpumask_var(&cm, GFP_KERNEL);
-	if (cma) {
-		cpumask_copy(cm, cpu_online_mask);
-		cpumask_clear_cpu(raw_smp_processor_id(), cm);
-		for_each_cpu(cpu, cm) {
-			struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
-
-			if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
-				cpumask_clear_cpu(cpu, cm);
-		}
-		if (cpumask_weight(cm) == 0)
-			goto all_cpus_idle;
-	}
-
-	/*
-	 * Each pass through the following loop attempts to force a
-	 * context switch on each CPU.
-	 */
-	while (try_stop_cpus(cma ? cm : cpu_online_mask,
-			     synchronize_sched_expedited_cpu_stop,
-			     NULL) == -EAGAIN) {
-		put_online_cpus();
-		atomic_long_inc(&rsp->expedited_tryfail);
-
-		/* Check to see if someone else did our work for us. */
-		s = atomic_long_read(&rsp->expedited_done);
-		if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
-			/* ensure test happens before caller kfree */
-			smp_mb__before_atomic(); /* ^^^ */
-			atomic_long_inc(&rsp->expedited_workdone1);
-			free_cpumask_var(cm);
-			return;
-		}
-
-		/* No joy, try again later.  Or just synchronize_sched(). */
-		if (trycount++ < 10) {
-			udelay(trycount * num_online_cpus());
-		} else {
-			wait_rcu_gp(call_rcu_sched);
-			atomic_long_inc(&rsp->expedited_normal);
-			free_cpumask_var(cm);
-			return;
-		}
+	for_each_online_cpu(cpu) {
+		struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 
-		/* Recheck to see if someone else did our work for us. */
-		s = atomic_long_read(&rsp->expedited_done);
-		if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
-			/* ensure test happens before caller kfree */
-			smp_mb__before_atomic(); /* ^^^ */
-			atomic_long_inc(&rsp->expedited_workdone2);
-			free_cpumask_var(cm);
-			return;
-		}
+		/* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
+		if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+			continue;
 
-		/*
-		 * Refetching sync_sched_expedited_started allows later
-		 * callers to piggyback on our grace period.  We retry
-		 * after they started, so our grace period works for them,
-		 * and they started after our first try, so their grace
-		 * period works for us.
-		 */
-		if (!try_get_online_cpus()) {
-			/* CPU hotplug operation in flight, use normal GP. */
-			wait_rcu_gp(call_rcu_sched);
-			atomic_long_inc(&rsp->expedited_normal);
-			free_cpumask_var(cm);
-			return;
-		}
-		snap = atomic_long_read(&rsp->expedited_start);
-		smp_mb(); /* ensure read is before try_stop_cpus(). */
+		stop_one_cpu(cpu, synchronize_sched_expedited_cpu_stop, NULL);
 	}
-	atomic_long_inc(&rsp->expedited_stoppedcpus);
 
-all_cpus_idle:
-	free_cpumask_var(cm);
+	atomic_long_inc(&rsp->expedited_stoppedcpus);
 
 	/*
 	 * Everyone up to our most recent fetch is covered by our grace
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index fd643d8c4b42..b1329a213503 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -371,36 +371,6 @@ int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 	return ret;
 }
 
-/**
- * try_stop_cpus - try to stop multiple cpus
- * @cpumask: cpus to stop
- * @fn: function to execute
- * @arg: argument to @fn
- *
- * Identical to stop_cpus() except that it fails with -EAGAIN if
- * someone else is already using the facility.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * -EAGAIN if someone else is already stopping cpus, -ENOENT if
- * @fn(@arg) was not executed at all because all cpus in @cpumask were
- * offline; otherwise, 0 if all executions of @fn returned 0, any non
- * zero return value if any returned non zero.
- */
-int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
-{
-	int ret;
-
-	/* static works are used, process one request at a time */
-	if (!mutex_trylock(&stop_cpus_mutex))
-		return -EAGAIN;
-	ret = __stop_cpus(cpumask, fn, arg);
-	mutex_unlock(&stop_cpus_mutex);
-	return ret;
-}
-
 static int cpu_stop_should_run(unsigned int cpu)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/