linux-kernel - Re: [PATCH 4/4] workqueue: Allow modifying low level unbound workqueue cpumask

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5502969A.9080006@cn.fujitsu.com>
Date:	Fri, 13 Mar 2015 15:49:46 +0800
From:	Lai Jiangshan <laijs@...fujitsu.com>
To:	<linux-kernel@...r.kernel.org>
CC:	Christoph Lameter <cl@...ux.com>,
	Kevin Hilman <khilman@...aro.org>,
	"Mike Galbraith" <bitbucket@...ine.de>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Tejun Heo <tj@...nel.org>,
	Viresh Kumar <viresh.kumar@...aro.org>,
	Frederic Weisbecker <fweisbec@...il.com>
Subject: Re: [PATCH 4/4] workqueue: Allow modifying low level unbound workqueue
 cpumask

On 03/12/2015 01:00 PM, Lai Jiangshan wrote:
> Allow to modify the low-level unbound workqueues cpumask through
> sysfs. This is performed by traversing the entire workqueue list
> and calling wq_unbound_install_ctx_prepare() on the unbound workqueues
> with the low level mask passed in. Only after all the preparation are done,
> we commit them all together.
> 
> The oreder-workquue is ignore from the low level unbound workqueue cpumask,
> it will be handled in near future.
> 
> The per-nodes' pwqs are mandatorily controlled by the low level cpumask, while
> the default pwq ignores the low level cpumask when (and ONLY when) the cpumask set
> by the user doesn't overlap with the low level cpumask. In this case, we can't
> apply the empty cpumask to the default pwq, so we use the user-set cpumask
> directly.
> 
> Cc: Christoph Lameter <cl@...ux.com>
> Cc: Kevin Hilman <khilman@...aro.org>
> Cc: Lai Jiangshan <laijs@...fujitsu.com>
> Cc: Mike Galbraith <bitbucket@...ine.de>
> Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
> Cc: Tejun Heo <tj@...nel.org>
> Cc: Viresh Kumar <viresh.kumar@...aro.org>
> Cc: Frederic Weisbecker <fweisbec@...il.com>
> Original-patch-by: Frederic Weisbecker <fweisbec@...il.com>
> Signed-off-by: Lai Jiangshan <laijs@...fujitsu.com>

miss a part in wq_update_unbound_numa()

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index facaaae..4027ec9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3722,6 +3722,9 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
 	 * wq's, the default pwq should be used.
 	 */
 	if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
+		cpumask_and(cpumask, cpumask, wq_unbound_cpumask);
+		if (cpumask_empty(cpumask))
+			goto use_dfl_pwq;
 		if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
 			goto out_unlock;
 	} else {

> ---
>  kernel/workqueue.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 88 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 61b5bfa..facaaae 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -299,7 +299,7 @@ static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
>  static LIST_HEAD(workqueues);		/* PR: list of all workqueues */
>  static bool workqueue_freezing;		/* PL: have wqs started freezing? */
>  
> -static cpumask_var_t wq_unbound_cpumask;
> +static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
>  
>  /* the per-cpu worker pools */
>  static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
> @@ -3491,6 +3491,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
>  struct wq_unbound_install_ctx {
>  	struct workqueue_struct	*wq;	/* target to be installed */
>  	struct workqueue_attrs	*attrs;	/* attrs for installing */
> +	struct list_head	list;	/* queued for batching commit */
>  	struct pool_workqueue	*dfl_pwq;
>  	struct pool_workqueue	*pwq_tbl[];
>  };
> @@ -3513,10 +3514,11 @@ static void wq_unbound_install_ctx_free(struct wq_unbound_install_ctx *ctx)
>  
>  static struct wq_unbound_install_ctx *
>  wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
> -			       const struct workqueue_attrs *attrs)
> +			       const struct workqueue_attrs *attrs,
> +			       cpumask_var_t unbound_cpumask)
>  {
>  	struct wq_unbound_install_ctx *ctx;
> -	struct workqueue_attrs *new_attrs, *tmp_attrs;
> +	struct workqueue_attrs *new_attrs, *pwq_attrs, *tmp_attrs;
>  	int node;
>  
>  	lockdep_assert_held(&wq_pool_mutex);
> @@ -3525,13 +3527,16 @@ wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
>  		      GFP_KERNEL);
>  
>  	new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
> +	pwq_attrs = alloc_workqueue_attrs(GFP_KERNEL);
>  	tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
>  	if (!ctx || !new_attrs || !tmp_attrs)
>  		goto out_free;
>  
>  	/* make a copy of @attrs and sanitize it */
>  	copy_workqueue_attrs(new_attrs, attrs);
> -	cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
> +	copy_workqueue_attrs(pwq_attrs, attrs);
> +	cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
> +	cpumask_and(pwq_attrs->cpumask, pwq_attrs->cpumask, unbound_cpumask);
>  
>  	/*
>  	 * We may create multiple pwqs with differing cpumasks.  Make a
> @@ -3544,13 +3549,21 @@ wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
>  	 * If something goes wrong during CPU up/down, we'll fall back to
>  	 * the default pwq covering whole @attrs->cpumask.  Always create
>  	 * it even if we don't use it immediately.
> +	 *
> +	 * If the cpumask set by the user doesn't overlap with the global
> +	 * wq_unbound_cpumask, we ignore the wq_unbound_cpumask for this wq
> +	 * which means all its nodes' pwqs are its default pwq and its default
> +	 * pwq's workers' cpumask is totally equals to the user setting.
>  	 */
> -	ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
> +	if (cpumask_empty(pwq_attrs->cpumask))
> +		ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
> +	else
> +		ctx->dfl_pwq = alloc_unbound_pwq(wq, pwq_attrs);
>  	if (!ctx->dfl_pwq)
>  		goto out_free;
>  
>  	for_each_node(node) {
> -		if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
> +		if (wq_calc_node_cpumask(pwq_attrs, node, -1, tmp_attrs->cpumask)) {
>  			ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
>  			if (!ctx->pwq_tbl[node])
>  				goto out_free;
> @@ -3564,6 +3577,7 @@ wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
>  	ctx->attrs = new_attrs;
>  
>  out_free:
> +	free_workqueue_attrs(pwq_attrs);
>  	free_workqueue_attrs(tmp_attrs);
>  
>  	if (!ctx || !ctx->wq) {
> @@ -3634,7 +3648,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
>  	get_online_cpus();
>  
>  	mutex_lock(&wq_pool_mutex);
> -	ctx = wq_unbound_install_ctx_prepare(wq, attrs);
> +	ctx = wq_unbound_install_ctx_prepare(wq, attrs, wq_unbound_cpumask);
>  	mutex_unlock(&wq_pool_mutex);
>  
>  	put_online_cpus();
> @@ -3961,19 +3975,85 @@ static struct bus_type wq_subsys = {
>  	.dev_groups			= wq_sysfs_groups,
>  };
>  
> +static int unbounds_cpumask_apply(cpumask_var_t cpumask)
> +{
> +	LIST_HEAD(ctxs);
> +	int ret = 0;
> +	struct workqueue_struct *wq;
> +	struct wq_unbound_install_ctx *ctx, *n;
> +
> +	lockdep_assert_held(&wq_pool_mutex);
> +
> +	list_for_each_entry(wq, &workqueues, list) {
> +		if (!(wq->flags & WQ_UNBOUND))
> +			continue;
> +		/* creating multiple pwqs breaks ordering guarantee */
> +		if (wq->flags & __WQ_ORDERED)
> +			continue;
> +
> +		ctx = wq_unbound_install_ctx_prepare(wq, wq->unbound_attrs,
> +						     cpumask);
> +		if (!ctx) {
> +			ret = -ENOMEM;
> +			break;
> +		}
> +
> +		list_add_tail(&ctx->list, &ctxs);
> +	}
> +
> +	list_for_each_entry_safe(ctx, n, &ctxs, list) {
> +		if (ret >= 0)
> +			wq_unbound_install_ctx_commit(ctx);
> +		wq_unbound_install_ctx_free(ctx);
> +	}
> +
> +	return ret;
> +}
> +
> +static ssize_t unbounds_cpumask_store(struct device *dev,
> +				      struct device_attribute *attr,
> +				      const char *buf, size_t count)
> +{
> +	cpumask_var_t cpumask;
> +	int ret = -EINVAL;
> +
> +	if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	ret = cpumask_parse(buf, cpumask);
> +	if (ret)
> +		goto out;
> +
> +	get_online_cpus();
> +	cpumask_and(cpumask, cpumask, cpu_possible_mask);
> +	if (cpumask_intersects(cpumask, cpu_online_mask)) {
> +		mutex_lock(&wq_pool_mutex);
> +		ret = unbounds_cpumask_apply(cpumask);
> +		if (ret >= 0)
> +			cpumask_copy(wq_unbound_cpumask, cpumask);
> +		mutex_unlock(&wq_pool_mutex);
> +	}
> +	put_online_cpus();
> +out:
> +	free_cpumask_var(cpumask);
> +	return ret ? ret : count;
> +}
> +
>  static ssize_t unbounds_cpumask_show(struct device *dev,
>  				     struct device_attribute *attr, char *buf)
>  {
>  	int written;
>  
> +	mutex_lock(&wq_pool_mutex);
>  	written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
>  			    cpumask_pr_args(wq_unbound_cpumask));
> +	mutex_unlock(&wq_pool_mutex);
>  
>  	return written;
>  }
>  
>  static struct device_attribute wq_sysfs_cpumask_attr =
> -	__ATTR(cpumask, 0444, unbounds_cpumask_show, NULL);
> +	__ATTR(cpumask, 0644, unbounds_cpumask_show, unbounds_cpumask_store);
>  
>  static int __init wq_sysfs_init(void)
>  {
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/