linux-kernel - Re: possible deadlock in __wake_up_common

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1a779207-4fa8-4b8e-95d7-e0568791e6ac@kernel.dk>
Date: Mon, 9 Dec 2024 07:59:29 -0700
From: Jens Axboe <axboe@...nel.dk>
To: chase xd <sl1589472800@...il.com>, Pavel Begunkov
 <asml.silence@...il.com>, io-uring@...r.kernel.org,
 linux-kernel@...r.kernel.org
Subject: Re: possible deadlock in __wake_up_common_lock

On 12/9/24 5:03 AM, chase xd wrote:
> ============================================
> WARNING: possible recursive locking detected
> 6.1.119-dirty #3 Not tainted
> --------------------------------------------
> syz-executor199/6820 is trying to acquire lock:
> ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
> __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
> 
> but task is already holding lock:
> ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
> __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
> 
> other info that might help us debug this:
>  Possible unsafe locking scenario:
> 
>        CPU0
>        ----
>   lock(&ctx->cq_wait);
>   lock(&ctx->cq_wait);
> 
>  *** DEADLOCK ***
> 
>  May be due to missing lock nesting notation
> 
> 2 locks held by syz-executor199/6820:
>  #0: ffff88807c3860a8 (&ctx->uring_lock){+.+.}-{3:3}, at:
> __do_sys_io_uring_enter+0x8fc/0x2130 io_uring/io_uring.c:3313
>  #1: ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
> __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
> 
> stack backtrace:
> CPU: 7 PID: 6820 Comm: syz-executor199 Not tainted 6.1.119-dirty #3
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
> Call Trace:
>  <TASK>
>  __dump_stack lib/dump_stack.c:88 [inline]
>  dump_stack_lvl+0x5b/0x85 lib/dump_stack.c:106
>  print_deadlock_bug kernel/locking/lockdep.c:2983 [inline]
>  check_deadlock kernel/locking/lockdep.c:3026 [inline]
>  validate_chain kernel/locking/lockdep.c:3812 [inline]
>  __lock_acquire.cold+0x219/0x3bd kernel/locking/lockdep.c:5049
>  lock_acquire kernel/locking/lockdep.c:5662 [inline]
>  lock_acquire+0x1e3/0x5e0 kernel/locking/lockdep.c:5627
>  __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
>  _raw_spin_lock_irqsave+0x3d/0x60 kernel/locking/spinlock.c:162
>  __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
>  __io_cqring_wake io_uring/io_uring.h:224 [inline]
>  __io_cqring_wake io_uring/io_uring.h:211 [inline]
>  io_req_local_work_add io_uring/io_uring.c:1135 [inline]
>  __io_req_task_work_add+0x4a4/0xd60 io_uring/io_uring.c:1146
>  io_poll_wake+0x3cb/0x550 io_uring/poll.c:465
>  __wake_up_common+0x14c/0x650 kernel/sched/wait.c:107
>  __wake_up_common_lock+0xd4/0x140 kernel/sched/wait.c:138
>  __io_cqring_wake io_uring/io_uring.h:224 [inline]
>  __io_cqring_wake io_uring/io_uring.h:211 [inline]
>  io_cqring_wake io_uring/io_uring.h:231 [inline]
>  io_cqring_ev_posted io_uring/io_uring.c:578 [inline]
>  __io_cq_unlock_post io_uring/io_uring.c:586 [inline]
>  __io_submit_flush_completions+0x778/0xba0 io_uring/io_uring.c:1346
>  io_submit_flush_completions io_uring/io_uring.c:159 [inline]
>  io_submit_state_end io_uring/io_uring.c:2203 [inline]
>  io_submit_sqes+0xa78/0x1ce0 io_uring/io_uring.c:2317
>  __do_sys_io_uring_enter+0x907/0x2130 io_uring/io_uring.c:3314
>  do_syscall_x64 arch/x86/entry/common.c:51 [inline]
>  do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:81
>  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
> RIP: 0033:0x7fa54e70640d
> Code: 28 c3 e8 46 1e 00 00 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48
> 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d
> 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
> RSP: 002b:00007ffd0ad80be8 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa
> RAX: ffffffffffffffda RBX: 00007ffd0ad80df8 RCX: 00007fa54e70640d
> RDX: 0000000000000000 RSI: 000000000000331b RDI: 0000000000000003
> RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
> R13: 00007ffd0ad80de8 R14: 00007fa54e783530 R15: 0000000000000001
>  </TASK>

I think this backport of:

3181e22fb799 ("io_uring: wake up optimisations")

should fix that. Can you try?


diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4f0ae938b146..0b1361663267 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -582,6 +582,16 @@ static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
 	io_cqring_ev_posted(ctx);
 }
 
+static inline void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
+	__releases(ctx->completion_lock)
+{
+	io_commit_cqring(ctx);
+	spin_unlock(&ctx->completion_lock);
+	io_commit_cqring_flush(ctx);
+	if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
+		__io_cqring_wake(ctx);
+}
+
 void io_cq_unlock_post(struct io_ring_ctx *ctx)
 {
 	__io_cq_unlock_post(ctx);
@@ -1339,7 +1349,7 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 		if (!(req->flags & REQ_F_CQE_SKIP))
 			__io_fill_cqe_req(ctx, req);
 	}
-	__io_cq_unlock_post(ctx);
+	__io_cq_unlock_post_flush(ctx);
 
 	io_free_batch_list(ctx, state->compl_reqs.first);
 	INIT_WQ_LIST(&state->compl_reqs);

-- 
Jens Axboe