lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140610175001.GF27015@kvack.org>
Date:	Tue, 10 Jun 2014 13:50:01 -0400
From:	Benjamin LaHaise <bcrl@...ck.org>
To:	Mike Galbraith <umgwanakikbuti@...il.com>,
	Kent Overstreet <kmo@...erainc.com>
Cc:	Lai Jiangshan <laijs@...fujitsu.com>,
	RT <linux-rt-users@...r.kernel.org>,
	LKML <linux-kernel@...r.kernel.org>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
	Steven Rostedt <rostedt@...dmis.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: Re: [RFC PATCH V2] rt/aio: fix rcu garbage collection might_sleep() splat

On Tue, Jun 10, 2014 at 05:47:28AM +0200, Mike Galbraith wrote:
> On Mon, 2014-06-09 at 10:08 +0800, Lai Jiangshan wrote: 
> > Hi, rt-people
> > 
> > I don't think it is the correct direction.
> > Softirq (including local_bh_disable()) in RT kernel should be preemptible.
> 
> How about the below then?
> 
> I was sorely tempted to post a tiny variant that dropped taking ctx_lock
> in free_ioctx_users() entirely, as someone diddling with no reference
> didn't make sense.  Cc Ben, he would know.

That should be okay...  Let's ask Kent to chime in on whether this looks 
safe to him on the percpu ref front as well, since he's the one who wrote 
this code.

		-ben


> [  172.743098] BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768
> [  172.743116] in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2
> [  172.743117] 2 locks held by rcuos/2/26:
> [  172.743128]  #0:  (rcu_callback){.+.+..}, at: [<ffffffff810b1a12>] rcu_nocb_kthread+0x1e2/0x380
> [  172.743135]  #1:  (rcu_read_lock_sched){.+.+..}, at: [<ffffffff812acd26>] percpu_ref_kill_rcu+0xa6/0x1c0
> [  172.743138] Preemption disabled at:[<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
> [  172.743138]
> [  172.743142] CPU: 0 PID: 26 Comm: rcuos/2 Not tainted 3.14.4-rt5 #31
> [  172.743143] Hardware name: MEDIONPC MS-7502/MS-7502, BIOS 6.00 PG 12/26/2007
> [  172.743148]  ffff8802231aa190 ffff8802231a5d08 ffffffff81582e9e 0000000000000000
> [  172.743151]  ffff8802231a5d28 ffffffff81077aeb ffff880209f68140 ffff880209f681c0
> [  172.743154]  ffff8802231a5d48 ffffffff81589304 ffff880209f68000 ffff880209f68000
> [  172.743155] Call Trace:
> [  172.743160]  [<ffffffff81582e9e>] dump_stack+0x4e/0x9c
> [  172.743163]  [<ffffffff81077aeb>] __might_sleep+0xfb/0x170
> [  172.743167]  [<ffffffff81589304>] rt_spin_lock+0x24/0x70
> [  172.743171]  [<ffffffff811c5790>] free_ioctx_users+0x30/0x130
> [  172.743174]  [<ffffffff812ace34>] percpu_ref_kill_rcu+0x1b4/0x1c0
> [  172.743177]  [<ffffffff812acd26>] ? percpu_ref_kill_rcu+0xa6/0x1c0
> [  172.743180]  [<ffffffff812acc80>] ? percpu_ref_kill_and_confirm+0x70/0x70
> [  172.743183]  [<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
> [  172.743185]  [<ffffffff810b1a12>] ? rcu_nocb_kthread+0x1e2/0x380
> [  172.743189]  [<ffffffff810b1830>] ? rcu_report_exp_rnp.isra.52+0xc0/0xc0
> [  172.743192]  [<ffffffff8106e046>] kthread+0xd6/0xf0
> [  172.743194]  [<ffffffff8158900c>] ? _raw_spin_unlock_irq+0x2c/0x70
> [  172.743197]  [<ffffffff8106df70>] ? __kthread_parkme+0x70/0x70
> [  172.743200]  [<ffffffff81591eec>] ret_from_fork+0x7c/0xb0
> [  172.743203]  [<ffffffff8106df70>] ? __kthread_parkme+0x70/0x70
> 
> crash> gdb list *percpu_ref_kill_rcu+0x1b4
> 0xffffffff812ace34 is in percpu_ref_kill_rcu (include/linux/percpu-refcount.h:169).
> 164             pcpu_count = ACCESS_ONCE(ref->pcpu_count);
> 165
> 166             if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR))
> 167                     __this_cpu_dec(*pcpu_count);
> 168             else if (unlikely(atomic_dec_and_test(&ref->count)))
> 169                     ref->release(ref);
> 170
> 171             rcu_read_unlock_sched();
> 172     }
> 173
> 
> ref->release() path can't take sleeping locks, but in an rt kernel it does.
> Defer ->release() work via call_rcu() to move sleeping locks out from under
> rcu_read_lock_sched().
> 
> Signed-off-by: Mike Galbraith <umgwanakikbuti@...il.com>
> ---
>  fs/aio.c |   38 +++++++++++++++++++++++++-------------
>  1 file changed, 25 insertions(+), 13 deletions(-)
> 
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -110,8 +110,6 @@ struct kioctx {
>  	struct page		**ring_pages;
>  	long			nr_pages;
>  
> -	struct work_struct	free_work;
> -
>  	struct {
>  		/*
>  		 * This counts the number of available slots in the ringbuffer,
> @@ -143,6 +141,7 @@ struct kioctx {
>  	struct file		*aio_ring_file;
>  
>  	unsigned		id;
> +	struct rcu_head		rcu;
>  };
>  
>  /*------ sysctl variables----*/
> @@ -493,9 +492,9 @@ static int kiocb_cancel(struct kioctx *c
>  	return cancel(kiocb);
>  }
>  
> -static void free_ioctx(struct work_struct *work)
> +static void free_ioctx_rcu(struct rcu_head *rcu)
>  {
> -	struct kioctx *ctx = container_of(work, struct kioctx, free_work);
> +	struct kioctx *ctx = container_of(rcu, struct kioctx, rcu);
>  
>  	pr_debug("freeing %p\n", ctx);
>  
> @@ -508,18 +507,12 @@ static void free_ioctx_reqs(struct percp
>  {
>  	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
>  
> -	INIT_WORK(&ctx->free_work, free_ioctx);
> -	schedule_work(&ctx->free_work);
> +	call_rcu(&ctx->rcu, free_ioctx_rcu);
>  }
>  
> -/*
> - * When this function runs, the kioctx has been removed from the "hash table"
> - * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
> - * now it's safe to cancel any that need to be.
> - */
> -static void free_ioctx_users(struct percpu_ref *ref)
> +static void free_ioctx_users_rcu(struct rcu_head *rcu)
>  {
> -	struct kioctx *ctx = container_of(ref, struct kioctx, users);
> +	struct kioctx *ctx = container_of(rcu, struct kioctx, rcu);
>  	struct kiocb *req;
>  
>  	spin_lock_irq(&ctx->ctx_lock);
> @@ -538,6 +531,25 @@ static void free_ioctx_users(struct perc
>  	percpu_ref_put(&ctx->reqs);
>  }
>  
> +/*
> + * When this function runs, the kioctx has been removed from the "hash table"
> + * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
> + * now it's safe to cancel any that need to be.
> + *
> + * NOTE: -rt must defer this to avoid taking sleeping ctx_lock while under
> + * rcu_real_lock_sched() during ->release().
> + */
> +static void free_ioctx_users(struct percpu_ref *ref)
> +{
> +	struct kioctx *ctx = container_of(ref, struct kioctx, users);
> +
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +	call_rcu(&ctx->rcu, free_ioctx_users_rcu);
> +#else
> +	free_ioctx_users_rcu(&ctx->rcu);
> +#endif
> +}
> +
>  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
>  {
>  	unsigned i, new_nr;
> 
> 

-- 
"Thought is the essence of where you are now."
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ