linux-kernel - Re: [PATCH 1/5] workqueue: wq_pool_mutex protects the attrs-installation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160711165044.09aec407@gandalf.local.home>
Date:	Mon, 11 Jul 2016 16:50:44 -0400
From:	Steven Rostedt <rostedt@...dmis.org>
To:	Tejun Heo <tj@...nel.org>
Cc:	Lai Jiangshan <laijs@...fujitsu.com>, linux-kernel@...r.kernel.org,
	Thomas Gleixner <tglx@...utronix.de>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: Re: [PATCH 1/5] workqueue: wq_pool_mutex protects the
 attrs-installation


[ crickets ]

-- Steve

On Thu, 10 Mar 2016 16:44:11 -0500
Steven Rostedt <rostedt@...dmis.org> (by way of Steven Rostedt
<rostedt@...dmis.org>) wrote:

> This patch was recently backported to 4.1.19, and when I merged it with -rt,
> the following bug triggered:
> 
> ===============================
> [ INFO: suspicious RCU usage. ]
> 4.1.19-test-rt22+ #1 Not tainted
> -------------------------------
> /work/rt/stable-rt.git/kernel/workqueue.c:608 sched RCU, wq->mutex or wq_pool_mutex should be held!
> 
> other info that might help us debug this:
> 
> 
> rcu_scheduler_active = 1, debug_locks = 0
> 2 locks held by swapper/0/1:
>  #0:  ((pendingb_lock).lock){+.+...}, at: [<ffffffff8105e4b7>] __local_lock_irq+0x21/0x74
>  #1:  (rcu_read_lock){......}, at: [<ffffffff8105fbdc>] rcu_read_lock+0x0/0x6c
> 
> stack backtrace:
> ^AdCPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.1.19-test-rt22+ #1
> ^AdHardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
>  0000000000000000 ffff8802101dfd08 ffffffff816083ae ffff880210240000
>  0000000000000001 ffff8802101dfd38 ffffffff81087cf1 ffff88021588e800
>  0000000000000000 0000000000000100 ffff88021588e800 ffff8802101dfd58
> Call Trace:
>  [<ffffffff816083ae>] dump_stack+0x67/0x90
>  [<ffffffff81087cf1>] lockdep_rcu_suspicious+0x107/0x110
>  [<ffffffff8105f9fe>] unbound_pwq_by_node+0x6c/0x93
>  [<ffffffff81060e62>] __queue_work+0xc8/0x2e7
>  [<ffffffff8106f0cc>] ? migrate_disable+0x28/0xe6
>  [<ffffffff81061126>] queue_work_on+0x85/0xb8
>  [<ffffffff81f54188>] ? acpi_battery_init+0x16/0x16
>  [<ffffffff8106a382>] __async_schedule+0x18b/0x19d
>  [<ffffffff81f54172>] ? acpi_memory_hotplug_init+0x12/0x12
>  [<ffffffff8106a3b9>] async_schedule+0x15/0x17
>  [<ffffffff81f54184>] acpi_battery_init+0x12/0x16
>  [<ffffffff81000415>] do_one_initcall+0xf7/0x18a
>  [<ffffffff8106692f>] ? parse_args+0x258/0x35f
>  [<ffffffff81f140be>] kernel_init_freeable+0x205/0x29e
>  [<ffffffff81f137d0>] ? do_early_param+0x86/0x86
>  [<ffffffff8160d9bc>] ? _raw_spin_unlock_irq+0x5d/0x72
>  [<ffffffff815fc28f>] ? rest_init+0x143/0x143
>  [<ffffffff815fc29d>] kernel_init+0xe/0xdf
>  [<ffffffff8160e712>] ret_from_fork+0x42/0x70
>  [<ffffffff815fc28f>] ? rest_init+0x143/0x143
> 
> 
> 
> On Mon, May 11, 2015 at 05:35:48PM +0800, Lai Jiangshan wrote:
> > ---
> >  kernel/workqueue.c | 44 ++++++++++++++++++++++++++------------------
> >  1 file changed, 26 insertions(+), 18 deletions(-)
> > 
> > diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> > index a3915ab..fa8b949 100644
> > --- a/kernel/workqueue.c
> > +++ b/kernel/workqueue.c
> > @@ -127,6 +127,12 @@ enum {
> >   *
> >   * PR: wq_pool_mutex protected for writes.  Sched-RCU protected for reads.
> >   *
> > + * PW: wq_pool_mutex and wq->mutex protected for writes.  Any one of them
> > + *     protected for reads.
> > + *
> > + * PWR: wq_pool_mutex and wq->mutex protected for writes. Any one of them
> > + *      or sched-RCU for reads.  
> 
> How exactly is sched-RCU protecting this here? The cause for the 4.1-rt issue
> is that we converted the local_irq_save() in queue_work_on() into a
> "local_lock_irqsave()" which when PREEMPT_RT is enabled will be a mutex that
> disables migration (can not migrate). This also prevents the current CPU from
> going offline.
> 
> Does this code really need to be protected from being preempted, or is
> disabling migration good enough?
> 
> Thanks!
> 
> -- Steve
> 
> 
> > + *
> >   * WQ: wq->mutex protected.
> >   *
> >   * WR: wq->mutex protected for writes.  Sched-RCU protected for reads.
> > @@ -247,8 +253,8 @@ struct workqueue_struct {
> >  	int			nr_drainers;	/* WQ: drain in progress */
> >  	int			saved_max_active; /* WQ: saved pwq max_active */
> >  
> > -	struct workqueue_attrs	*unbound_attrs;	/* WQ: only for unbound wqs */
> > -	struct pool_workqueue	*dfl_pwq;	/* WQ: only for unbound wqs */
> > +	struct workqueue_attrs	*unbound_attrs;	/* PW: only for unbound wqs */
> > +	struct pool_workqueue	*dfl_pwq;	/* PW: only for unbound wqs */
> >  
> >  #ifdef CONFIG_SYSFS
> >  	struct wq_device	*wq_dev;	/* I: for sysfs interface */
> > @@ -268,7 +274,7 @@ struct workqueue_struct {
> >  	/* hot fields used during command issue, aligned to cacheline */
> >  	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
> >  	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
> > -	struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
> > +	struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
> >  };
> >  
> >  static struct kmem_cache *pwq_cache;
> > @@ -349,6 +355,12 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
> >  			   lockdep_is_held(&wq->mutex),			\
> >  			   "sched RCU or wq->mutex should be held")
> >  
> > +#define assert_rcu_or_wq_mutex_or_pool_mutex(wq)			\
> > +	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
> > +			   lockdep_is_held(&wq->mutex) ||		\
> > +			   lockdep_is_held(&wq_pool_mutex),		\
> > +			   "sched RCU, wq->mutex or wq_pool_mutex should be held")
> > +
> >  #define for_each_cpu_worker_pool(pool, cpu)				\
> >  	for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];		\
> >  	     (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
> > @@ -553,7 +565,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
> >   * @wq: the target workqueue
> >   * @node: the node ID
> >   *
> > - * This must be called either with pwq_lock held or sched RCU read locked.
> > + * This must be called either with wq_pool_mutex held or sched RCU read locked.
> >   * If the pwq needs to be used beyond the locking in effect, the caller is
> >   * responsible for guaranteeing that the pwq stays online.
> >   *
> > @@ -562,7 +574,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
> >  static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
> >  						  int node)
> >  {
> > -	assert_rcu_or_wq_mutex(wq);
> > +	assert_rcu_or_wq_mutex_or_pool_mutex(wq);
> >  	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
> >  }
> >  
> > @@ -3480,6 +3492,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
> >  	struct pool_workqueue *old_pwq;
> >  
> >  	lockdep_assert_held(&wq->mutex);
> > +	lockdep_assert_held(&wq_pool_mutex);
> >  
> >  	/* link_pwq() can handle duplicate calls */
> >  	link_pwq(pwq);
> > @@ -3644,10 +3657,9 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
> >  	 * pwqs accordingly.
> >  	 */
> >  	get_online_cpus();
> > -
> >  	mutex_lock(&wq_pool_mutex);
> > +
> >  	ctx = apply_wqattrs_prepare(wq, attrs);
> > -	mutex_unlock(&wq_pool_mutex);
> >  
> >  	/* the ctx has been prepared successfully, let's commit it */
> >  	if (ctx) {
> > @@ -3655,10 +3667,11 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
> >  		ret = 0;
> >  	}
> >  
> > -	put_online_cpus();
> > -
> >  	apply_wqattrs_cleanup(ctx);
> >  
> > +	mutex_unlock(&wq_pool_mutex);
> > +	put_online_cpus();
> > +
> >  	return ret;
> >  }
> >  
> > @@ -3695,7 +3708,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
> >  
> >  	lockdep_assert_held(&wq_pool_mutex);
> >  
> > -	if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND))
> > +	if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
> > +	    wq->unbound_attrs->no_numa)
> >  		return;
> >  
> >  	/*
> > @@ -3706,10 +3720,6 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
> >  	target_attrs = wq_update_unbound_numa_attrs_buf;
> >  	cpumask = target_attrs->cpumask;
> >  
> > -	mutex_lock(&wq->mutex);
> > -	if (wq->unbound_attrs->no_numa)
> > -		goto out_unlock;
> > -
> >  	copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
> >  	pwq = unbound_pwq_by_node(wq, node);
> >  
> > @@ -3721,19 +3731,16 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
> >  	 */
> >  	if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
> >  		if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
> > -			goto out_unlock;
> > +			return;
> >  	} else {
> >  		goto use_dfl_pwq;
> >  	}
> >  
> > -	mutex_unlock(&wq->mutex);
> > -
> >  	/* create a new pwq */
> >  	pwq = alloc_unbound_pwq(wq, target_attrs);
> >  	if (!pwq) {
> >  		pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
> >  			wq->name);
> > -		mutex_lock(&wq->mutex);
> >  		goto use_dfl_pwq;
> >  	}
> >  
> > @@ -3748,6 +3755,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
> >  	goto out_unlock;
> >  
> >  use_dfl_pwq:
> > +	mutex_lock(&wq->mutex);
> >  	spin_lock_irq(&wq->dfl_pwq->pool->lock);
> >  	get_pwq(wq->dfl_pwq);
> >  	spin_unlock_irq(&wq->dfl_pwq->pool->lock);
> > -- 
> > 2.1.0
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@...r.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/