[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251205125445.4154667-4-jackzxcui1989@163.com>
Date: Fri, 5 Dec 2025 20:54:45 +0800
From: Xin Zhao <jackzxcui1989@....com>
To: tj@...nel.org,
jiangshanlai@...il.com
Cc: hch@...radead.org,
jackzxcui1989@....com,
linux-kernel@...r.kernel.org
Subject: [RFC PATCH 3/3] workqueue: Support private workqueue by sysfs
Globally, unbound workqueues with same attributes share one worker pool.
Directly change schedule attributes of a specific kworker thread by tools
like 'chrt' or 'taskset' may affect other work which runs on the same
worker_pool. During a discussion with Tejun regarding whether to rewrite
the code using kthread_work or continue using kworker, Tejun pointed out
that it is possible to allow a workqueue to become "private" by sysfs. In
this way, the scheduling attributes of the kworker threads associated with
this private workqueue can be set individually.
However, simply adding 'private' node does not address all situations, as
kworker threads are created and destroyed dynamically.
In this patch series, the support for 'private' is meaningful. While we
can adjust the 'nr_idle_extra' attribute supported in the previous patch
to increase the reservation of idle kworkers, there may still be a
significant number of workqueues with the same attributes globally. If, at
a certain moment, a large number of concurrent work items enter the same
worker pool, it can still cause 'tail latency' a concept described in the
previous patch in the patch series. Increasing nr_idle_extra may help
alleviate the delays caused by this sudden influx, but indiscriminately
setting nr_idle_extra too high can lead to thread resource waste.
Supporting the private configuration aims to deterministically ensure that
tasks within one workqueue are not affected by tasks from other workqueues
with the same attributes. If the user has high real-time requirements,
they can increase the nr_idle_extra supported in the previous patch while
also setting the workqueue 'private', allowing it to independently use
kworker threads, thus ensuring scheduling-related work delays never occur.
Suggested-by: Tejun Heo <tj@...nel.org>
Signed-off-by: Xin Zhao <jackzxcui1989@....com>
---
include/linux/workqueue.h | 7 +++++++
kernel/workqueue.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index c8f40fd6f..faa554384 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -171,6 +171,13 @@ struct workqueue_attrs {
*/
int nr_idle_extra;
+ /**
+ * @private: whether use individual worker_pool
+ *
+ * true means do not share with others even if attributes are the same
+ */
+ bool private;
+
/**
* @cpumask: allowed CPUs
*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d2bdde40b..bd0a1c1ff 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4711,6 +4711,7 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
to->policy = from->policy;
to->prio = from->prio;
to->nr_idle_extra = from->nr_idle_extra;
+ to->private = from->private;
cpumask_copy(to->cpumask, from->cpumask);
cpumask_copy(to->__pod_cpumask, from->__pod_cpumask);
to->affn_strict = from->affn_strict;
@@ -4761,6 +4762,8 @@ static bool wqattrs_equal(const struct workqueue_attrs *a,
return false;
if (a->nr_idle_extra != b->nr_idle_extra)
return false;
+ if (a->private || b->private)
+ return false;
if (a->affn_strict != b->affn_strict)
return false;
if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask))
@@ -7100,6 +7103,7 @@ module_param_cb(default_affinity_scope, &wq_affn_dfl_ops, NULL, 0644);
* nice RW int : nice value of the workers
* rtprio RW int : rtprio value of the workers
* nr_idle_extra RW int : number of extra idle thread reserved
+ * private RW int : number of extra idle thread reserved
* cpumask RW mask : bitmask of allowed CPUs for the workers
* affinity_scope RW str : worker CPU affinity scope (cache, numa, none)
* affinity_strict RW bool : worker CPU affinity is strict
@@ -7351,6 +7355,38 @@ static ssize_t wq_idle_extra_store(struct device *dev, struct device_attribute *
return ret ?: count;
}
+static ssize_t wq_private_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ wq->unbound_attrs->private);
+}
+
+static ssize_t wq_private_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ struct workqueue_attrs *attrs;
+ int ret = -ENOMEM;
+
+ apply_wqattrs_lock();
+
+ attrs = wq_sysfs_prep_attrs(wq);
+ if (!attrs)
+ goto out_unlock;
+
+ ret = -EINVAL;
+ if (!kstrtobool(buf, &attrs->private))
+ ret = apply_workqueue_attrs_locked(wq, attrs);
+
+out_unlock:
+ apply_wqattrs_unlock();
+ free_workqueue_attrs(attrs);
+ return ret ?: count;
+}
+
static ssize_t wq_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -7465,6 +7501,7 @@ static struct device_attribute wq_sysfs_unbound_attrs[] = {
__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
__ATTR(rtprio, 0644, wq_rtprio_show, wq_rtprio_store),
__ATTR(nr_idle_extra, 0644, wq_idle_extra_show, wq_idle_extra_store),
+ __ATTR(private, 0644, wq_private_show, wq_private_store),
__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
__ATTR(affinity_scope, 0644, wq_affn_scope_show, wq_affn_scope_store),
__ATTR(affinity_strict, 0644, wq_affinity_strict_show, wq_affinity_strict_store),
--
2.34.1
Powered by blists - more mailing lists