lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1437292973.3505.83.camel@gmail.com>
Date:	Sun, 19 Jul 2015 10:02:53 +0200
From:	Mike Galbraith <umgwanakikbuti@...il.com>
To:	Frederic Weisbecker <fweisbec@...il.com>
Cc:	Tejun Heo <tj@...nel.org>,
	Daniel Bristot de Oliveira <bristot@...hat.com>,
	LKML <linux-kernel@...r.kernel.org>,
	Lai Jiangshan <jiangshanlai@...il.com>,
	Rik van Riel <riel@...hat.com>,
	"Luis Claudio R. Goncalves" <lclaudio@...g.org>
Subject: Re: [RFC] workqueue: avoiding unbounded wq on isolated CPUs by
 default

On Sat, 2015-07-18 at 15:36 +0200, Frederic Weisbecker wrote:

> But we can't leave it half-way like it is currently with everything preset on
> top of nohz: rcu nocb mask, watchdog mask, cpu_isolation_map and exclude workqueue.

To automate or not aside...

WRT wq_unbound_cpumask, it's very nice to have but anyone watching their
box should notice generic allegedly unbound work landing on the bound
system_wq, thus the quiet zone isn't protected from these work items.  

For example, my little perturbation measurement proggy emits a stat line
periodically, which leads to tty_schedule_flip() -> schedule_work() thus
it perturbs itself seemingly needlessly.  Lord knows how many other ways
there are to do the same.

The hack below is not intended to be anything remotely resembling a
proper answer to that problem, it's my box encouraging me to ask the
question by surviving (modulo destroy, redirect there is bad idea).

Why do we do nothing about these allegedly unbound work items?

---
 include/linux/sched.h |    2 ++
 kernel/workqueue.c    |   24 ++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1430,6 +1430,8 @@ struct task_struct {
 	unsigned sched_contributes_to_load:1;
 	unsigned sched_migrated:1;
 
+	unsigned work_redirect_disable:1;
+
 #ifdef CONFIG_MEMCG_KMEM
 	unsigned memcg_kmem_skip_account:1;
 #endif
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1294,6 +1294,21 @@ static bool is_chained_work(struct workq
 	return worker && worker->current_pwq->wq == wq;
 }
 
+static struct workqueue_struct *
+redirect_generic_unbound_work(int cpu, struct workqueue_struct *wq)
+{
+	if (cpu != WORK_CPU_UNBOUND || wq != system_wq)
+		return wq;
+	if (current->work_redirect_disable)
+		return wq;
+	if (cpumask_test_cpu(raw_smp_processor_id(), wq_unbound_cpumask))
+		return wq;
+	if (wq->flags & __WQ_DRAINING || system_unbound_wq->flags & __WQ_DRAINING)
+		return wq;
+
+	return system_unbound_wq;
+}
+
 static void __queue_work(int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
@@ -1317,6 +1332,7 @@ static void __queue_work(int cpu, struct
 	if (unlikely(wq->flags & __WQ_DRAINING) &&
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
+	wq = redirect_generic_unbound_work(req_cpu, wq);
 retry:
 	if (req_cpu == WORK_CPU_UNBOUND)
 		cpu = raw_smp_processor_id();
@@ -3926,6 +3942,8 @@ void destroy_workqueue(struct workqueue_
 	struct pool_workqueue *pwq;
 	int node;
 
+	current->work_redirect_disable = 1;
+
 	/* drain it before proceeding with destruction */
 	drain_workqueue(wq);
 
@@ -3937,7 +3955,7 @@ void destroy_workqueue(struct workqueue_
 		for (i = 0; i < WORK_NR_COLORS; i++) {
 			if (WARN_ON(pwq->nr_in_flight[i])) {
 				mutex_unlock(&wq->mutex);
-				return;
+				goto out;
 			}
 		}
 
@@ -3945,7 +3963,7 @@ void destroy_workqueue(struct workqueue_
 		    WARN_ON(pwq->nr_active) ||
 		    WARN_ON(!list_empty(&pwq->delayed_works))) {
 			mutex_unlock(&wq->mutex);
-			return;
+			goto out;
 		}
 	}
 	mutex_unlock(&wq->mutex);
@@ -3991,6 +4009,8 @@ void destroy_workqueue(struct workqueue_
 		wq->dfl_pwq = NULL;
 		put_pwq_unlocked(pwq);
 	}
+out:
+	current->work_redirect_disable = 0;
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ