lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1394815131-17271-4-git-send-email-fweisbec@gmail.com>
Date:	Fri, 14 Mar 2014 17:38:51 +0100
From:	Frederic Weisbecker <fweisbec@...il.com>
To:	LKML <linux-kernel@...r.kernel.org>
Cc:	Frederic Weisbecker <fweisbec@...il.com>,
	Christoph Lameter <cl@...ux.com>,
	Kevin Hilman <khilman@...aro.org>,
	Mike Galbraith <bitbucket@...ine.de>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Tejun Heo <tj@...nel.org>,
	Viresh Kumar <viresh.kumar@...aro.org>
Subject: [PATCH 3/3] workqueue: Add anon workqueue sysfs hierarchy

We call "anon workqueues" the set of unbound workqueues that don't
carry the WQ_SYSFS flag.

They are a problem nowaday because people who work on CPU isolation
(HPC, Real time, etc...) want to be able to migrate all the unbound
workqueues away to a single CPU. This control is possible through sysfs
but only with WQ_SYSFS workqueues.

Now we need to deal with the other unbound workqueues. There is two
possible solutions:

1) Implement a sysfs directory for each unbound !WQ_SYSFS. This could
be done with a specific Kconfig to make sure that these workqueue
won't be considered as a stable ABI. But we all know that all distros
will enable this Kconfig symbol and that a warning in the Kconfig help
text won't protect against anything.

2) Implement a single sysfs directory containing only the cpumask file
to the control the affinity of all the !WQ_SYSFS workqueues.

This patch implements the second solution.

Two issues I have seen though:

* This triggers the following warning in apply_workqueue_attrs():

	/* creating multiple pwqs breaks ordering guarantee */
	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
		return -EINVAL;

  I haven't yet checked into the details.

* wq_calc_node_cpumask() tells that if NUMA affinity is not enabled,
  cpumask is always used. Which suggest that if NUMA affinity is enabled
  the cpumask may be ignored?

Cc: Christoph Lameter <cl@...ux.com>
Cc: Kevin Hilman <khilman@...aro.org>
Cc: Mike Galbraith <bitbucket@...ine.de>
Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Cc: Tejun Heo <tj@...nel.org>
Cc: Viresh Kumar <viresh.kumar@...aro.org>
Not-Yet-Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
---
 kernel/workqueue.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ad8f727..aabee1f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -289,7 +289,8 @@ static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
-static DEFINE_MUTEX(wq_unbound_mutex);	/* protects list of unbound workqueues */
+/* protects list of unbound workqueues and wq_anon_cpumask*/
+static DEFINE_MUTEX(wq_unbound_mutex);
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
 static LIST_HEAD(workqueues);		/* PL: list of all workqueues */
@@ -3311,13 +3312,122 @@ static struct device_attribute wq_sysfs_unbound_attrs[] = {
 	__ATTR_NULL,
 };
 
+/* Protected by wq_unbound_mutex */
+static cpumask_t wq_anon_cpumask;
+static ssize_t wq_anon_cpumask_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	int written;
+
+	mutex_lock(&wq_unbound_mutex);
+	written = cpumask_scnprintf(buf, PAGE_SIZE, &wq_anon_cpumask);
+	mutex_unlock(&wq_unbound_mutex);
+
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
+
+	return written;
+}
+
+/* Must be called with wq_unbound_mutex held */
+static int wq_anon_cpumask_set(cpumask_var_t cpumask)
+{
+	struct workqueue_attrs *attrs;
+	struct workqueue_struct *wq;
+	int ret;
+
+	list_for_each_entry(wq, &workqueues_unbound, unbound_list) {
+		if (wq->flags & WQ_SYSFS)
+			continue;
+		attrs = wq_sysfs_prep_attrs(wq);
+		if (!attrs)
+			return -ENOMEM;
+
+		cpumask_copy(attrs->cpumask, cpumask);
+		ret = apply_workqueue_attrs(wq, attrs);
+		free_workqueue_attrs(attrs);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static ssize_t wq_anon_cpumask_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	cpumask_var_t cpumask;
+	int ret = -EINVAL;
+
+	if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = cpumask_parse(buf, cpumask);
+	if (ret)
+		goto out;
+
+	get_online_cpus();
+	if (cpumask_intersects(cpumask, cpu_online_mask)) {
+		mutex_lock(&wq_unbound_mutex);
+		ret = wq_anon_cpumask_set(cpumask);
+		if (!ret)
+			cpumask_copy(&wq_anon_cpumask, cpumask);
+		mutex_unlock(&wq_unbound_mutex);
+	}
+	put_online_cpus();
+out:
+	free_cpumask_var(cpumask);
+	return ret ? ret : count;
+}
+
+static void device_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+static struct device_attribute wq_sysfs_anon_attr =
+	__ATTR(cpumask, 0644, wq_anon_cpumask_show, wq_anon_cpumask_store);
+
 static struct bus_type wq_subsys = {
 	.name				= "workqueue",
 };
 
 static int __init wq_sysfs_init(void)
 {
-	return subsys_virtual_register(&wq_subsys, NULL);
+	struct device *anon_dev;
+	int ret;
+
+	ret = subsys_virtual_register(&wq_subsys, NULL);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&wq_unbound_mutex);
+	cpumask_copy(&wq_anon_cpumask, cpu_possible_mask);
+	mutex_unlock(&wq_unbound_mutex);
+
+	anon_dev = kzalloc(sizeof(*anon_dev), GFP_KERNEL);
+	if (!anon_dev)
+		return -ENOMEM;
+
+	anon_dev->bus = &wq_subsys;
+	anon_dev->init_name = "anon_wqs";
+	anon_dev->release = device_release;
+
+	ret = device_register(anon_dev);
+	if (ret) {
+		kfree(anon_dev);
+		return ret;
+	}
+
+	ret = device_create_file(anon_dev, &wq_sysfs_anon_attr);
+	if (ret) {
+		device_unregister(anon_dev);
+		return ret;
+	}
+
+	kobject_uevent(&anon_dev->kobj, KOBJ_ADD);
+
+	return 0;
 }
 core_initcall(wq_sysfs_init);
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ