[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1a779207-4fa8-4b8e-95d7-e0568791e6ac@kernel.dk>
Date: Mon, 9 Dec 2024 07:59:29 -0700
From: Jens Axboe <axboe@...nel.dk>
To: chase xd <sl1589472800@...il.com>, Pavel Begunkov
<asml.silence@...il.com>, io-uring@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: possible deadlock in __wake_up_common_lock
On 12/9/24 5:03 AM, chase xd wrote:
> ============================================
> WARNING: possible recursive locking detected
> 6.1.119-dirty #3 Not tainted
> --------------------------------------------
> syz-executor199/6820 is trying to acquire lock:
> ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
> __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
>
> but task is already holding lock:
> ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
> __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
>
> other info that might help us debug this:
> Possible unsafe locking scenario:
>
> CPU0
> ----
> lock(&ctx->cq_wait);
> lock(&ctx->cq_wait);
>
> *** DEADLOCK ***
>
> May be due to missing lock nesting notation
>
> 2 locks held by syz-executor199/6820:
> #0: ffff88807c3860a8 (&ctx->uring_lock){+.+.}-{3:3}, at:
> __do_sys_io_uring_enter+0x8fc/0x2130 io_uring/io_uring.c:3313
> #1: ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
> __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
>
> stack backtrace:
> CPU: 7 PID: 6820 Comm: syz-executor199 Not tainted 6.1.119-dirty #3
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
> Call Trace:
> <TASK>
> __dump_stack lib/dump_stack.c:88 [inline]
> dump_stack_lvl+0x5b/0x85 lib/dump_stack.c:106
> print_deadlock_bug kernel/locking/lockdep.c:2983 [inline]
> check_deadlock kernel/locking/lockdep.c:3026 [inline]
> validate_chain kernel/locking/lockdep.c:3812 [inline]
> __lock_acquire.cold+0x219/0x3bd kernel/locking/lockdep.c:5049
> lock_acquire kernel/locking/lockdep.c:5662 [inline]
> lock_acquire+0x1e3/0x5e0 kernel/locking/lockdep.c:5627
> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> _raw_spin_lock_irqsave+0x3d/0x60 kernel/locking/spinlock.c:162
> __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
> __io_cqring_wake io_uring/io_uring.h:224 [inline]
> __io_cqring_wake io_uring/io_uring.h:211 [inline]
> io_req_local_work_add io_uring/io_uring.c:1135 [inline]
> __io_req_task_work_add+0x4a4/0xd60 io_uring/io_uring.c:1146
> io_poll_wake+0x3cb/0x550 io_uring/poll.c:465
> __wake_up_common+0x14c/0x650 kernel/sched/wait.c:107
> __wake_up_common_lock+0xd4/0x140 kernel/sched/wait.c:138
> __io_cqring_wake io_uring/io_uring.h:224 [inline]
> __io_cqring_wake io_uring/io_uring.h:211 [inline]
> io_cqring_wake io_uring/io_uring.h:231 [inline]
> io_cqring_ev_posted io_uring/io_uring.c:578 [inline]
> __io_cq_unlock_post io_uring/io_uring.c:586 [inline]
> __io_submit_flush_completions+0x778/0xba0 io_uring/io_uring.c:1346
> io_submit_flush_completions io_uring/io_uring.c:159 [inline]
> io_submit_state_end io_uring/io_uring.c:2203 [inline]
> io_submit_sqes+0xa78/0x1ce0 io_uring/io_uring.c:2317
> __do_sys_io_uring_enter+0x907/0x2130 io_uring/io_uring.c:3314
> do_syscall_x64 arch/x86/entry/common.c:51 [inline]
> do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:81
> entry_SYSCALL_64_after_hwframe+0x6e/0xd8
> RIP: 0033:0x7fa54e70640d
> Code: 28 c3 e8 46 1e 00 00 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48
> 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d
> 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
> RSP: 002b:00007ffd0ad80be8 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa
> RAX: ffffffffffffffda RBX: 00007ffd0ad80df8 RCX: 00007fa54e70640d
> RDX: 0000000000000000 RSI: 000000000000331b RDI: 0000000000000003
> RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
> R13: 00007ffd0ad80de8 R14: 00007fa54e783530 R15: 0000000000000001
> </TASK>
I think this backport of:
3181e22fb799 ("io_uring: wake up optimisations")
should fix that. Can you try?
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4f0ae938b146..0b1361663267 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -582,6 +582,16 @@ static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
io_cqring_ev_posted(ctx);
}
+static inline void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
+ __releases(ctx->completion_lock)
+{
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ io_commit_cqring_flush(ctx);
+ if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
+ __io_cqring_wake(ctx);
+}
+
void io_cq_unlock_post(struct io_ring_ctx *ctx)
{
__io_cq_unlock_post(ctx);
@@ -1339,7 +1349,7 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
if (!(req->flags & REQ_F_CQE_SKIP))
__io_fill_cqe_req(ctx, req);
}
- __io_cq_unlock_post(ctx);
+ __io_cq_unlock_post_flush(ctx);
io_free_batch_list(ctx, state->compl_reqs.first);
INIT_WQ_LIST(&state->compl_reqs);
--
Jens Axboe
Powered by blists - more mailing lists