[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241108063214.578120-1-kunwu.chan@linux.dev>
Date: Fri, 8 Nov 2024 14:32:14 +0800
From: Kunwu Chan <kunwu.chan@...ux.dev>
To: ast@...nel.org,
daniel@...earbox.net,
andrii@...nel.org,
martin.lau@...ux.dev,
eddyz87@...il.com,
song@...nel.org,
yonghong.song@...ux.dev,
john.fastabend@...il.com,
kpsingh@...nel.org,
sdf@...ichev.me,
haoluo@...gle.com,
jolsa@...nel.org,
bigeasy@...utronix.de,
clrkwllms@...nel.org,
rostedt@...dmis.org,
tglx@...utronix.de
Cc: bpf@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-rt-devel@...ts.linux.dev,
Kunwu Chan <chentao@...inos.cn>,
syzbot+b506de56cbbb63148c33@...kaller.appspotmail.com
Subject: [PATCH] bpf: Convert lpm_trie::lock to 'raw_spinlock_t'
From: Kunwu Chan <chentao@...inos.cn>
When PREEMPT_RT is enabled, 'spinlock_t' becomes preemptible
and bpf program has owned a raw_spinlock under a interrupt handler,
which results in invalid lock acquire context.
[ BUG: Invalid wait context ]
6.12.0-rc5-next-20241031-syzkaller #0 Not tainted
-----------------------------
swapper/0/0 is trying to lock:
ffff8880261e7a00 (&trie->lock){....}-{3:3},
at: trie_delete_elem+0x96/0x6a0 kernel/bpf/lpm_trie.c:462
other info that might help us debug this:
context-{3:3}
5 locks held by swapper/0/0:
#0: ffff888020bb75c8 (&vp_dev->lock){-...}-{3:3},
at: vp_vring_interrupt drivers/virtio/virtio_pci_common.c:80 [inline]
#0: ffff888020bb75c8 (&vp_dev->lock){-...}-{3:3},
at: vp_interrupt+0x142/0x200 drivers/virtio/virtio_pci_common.c:113
#1: ffff88814174a120 (&vb->stop_update_lock){-...}-{3:3},
at: spin_lock include/linux/spinlock.h:351 [inline]
#1: ffff88814174a120 (&vb->stop_update_lock){-...}-{3:3},
at: stats_request+0x6f/0x230 drivers/virtio/virtio_balloon.c:438
#2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline]
#2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_read_lock include/linux/rcupdate.h:849 [inline]
#2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: __queue_work+0x199/0xf50 kernel/workqueue.c:2259
#3: ffff8880b863dd18 (&pool->lock){-.-.}-{2:2},
at: __queue_work+0x759/0xf50
#4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline]
#4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_read_lock include/linux/rcupdate.h:849 [inline]
#4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: __bpf_trace_run kernel/trace/bpf_trace.c:2339 [inline]
#4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: bpf_trace_run1+0x1d6/0x520 kernel/trace/bpf_trace.c:2380
stack backtrace:
CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted
6.12.0-rc5-next-20241031-syzkaller #0
Hardware name: Google Compute Engine/Google Compute Engine,
BIOS Google 09/13/2024
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
print_lock_invalid_wait_context kernel/locking/lockdep.c:4826 [inline]
check_wait_context kernel/locking/lockdep.c:4898 [inline]
__lock_acquire+0x15a8/0x2100 kernel/locking/lockdep.c:5176
lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162
trie_delete_elem+0x96/0x6a0 kernel/bpf/lpm_trie.c:462
bpf_prog_2c29ac5cdc6b1842+0x43/0x47
bpf_dispatcher_nop_func include/linux/bpf.h:1290 [inline]
__bpf_prog_run include/linux/filter.h:701 [inline]
bpf_prog_run include/linux/filter.h:708 [inline]
__bpf_trace_run kernel/trace/bpf_trace.c:2340 [inline]
bpf_trace_run1+0x2ca/0x520 kernel/trace/bpf_trace.c:2380
trace_workqueue_activate_work+0x186/0x1f0 include/trace/events/workqueue.h:59
__queue_work+0xc7b/0xf50 kernel/workqueue.c:2338
queue_work_on+0x1c2/0x380 kernel/workqueue.c:2390
queue_work include/linux/workqueue.h:662 [inline]
stats_request+0x1a3/0x230 drivers/virtio/virtio_balloon.c:441
vring_interrupt+0x21d/0x380 drivers/virtio/virtio_ring.c:2595
vp_vring_interrupt drivers/virtio/virtio_pci_common.c:82 [inline]
vp_interrupt+0x192/0x200 drivers/virtio/virtio_pci_common.c:113
__handle_irq_event_percpu+0x29a/0xa80 kernel/irq/handle.c:158
handle_irq_event_percpu kernel/irq/handle.c:193 [inline]
handle_irq_event+0x89/0x1f0 kernel/irq/handle.c:210
handle_fasteoi_irq+0x48a/0xae0 kernel/irq/chip.c:720
generic_handle_irq_desc include/linux/irqdesc.h:173 [inline]
handle_irq arch/x86/kernel/irq.c:247 [inline]
call_irq_handler arch/x86/kernel/irq.c:259 [inline]
__common_interrupt+0x136/0x230 arch/x86/kernel/irq.c:285
common_interrupt+0xb4/0xd0 arch/x86/kernel/irq.c:278
</IRQ>
Reported-by: syzbot+b506de56cbbb63148c33@...kaller.appspotmail.com
Closes: https://lore.kernel.org/bpf/6723db4a.050a0220.35b515.0168.GAE@google.com/
Fixes: 66150d0dde03 ("bpf, lpm: Make locking RT friendly")
Signed-off-by: Kunwu Chan <chentao@...inos.cn>
---
kernel/bpf/lpm_trie.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 9b60eda0f727..373cdcfa0505 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -35,7 +35,7 @@ struct lpm_trie {
size_t n_entries;
size_t max_prefixlen;
size_t data_size;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
/* This trie implements a longest prefix match algorithm that can be used to
@@ -330,7 +330,7 @@ static long trie_update_elem(struct bpf_map *map,
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
- spin_lock_irqsave(&trie->lock, irq_flags);
+ raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Allocate and fill a new node */
@@ -437,7 +437,7 @@ static long trie_update_elem(struct bpf_map *map,
kfree(im_node);
}
- spin_unlock_irqrestore(&trie->lock, irq_flags);
+ raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_node, rcu);
return ret;
@@ -459,7 +459,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
- spin_lock_irqsave(&trie->lock, irq_flags);
+ raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Walk the tree looking for an exact key/length match and keeping
* track of the path we traverse. We will need to know the node
@@ -535,7 +535,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
free_node = node;
out:
- spin_unlock_irqrestore(&trie->lock, irq_flags);
+ raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_parent, rcu);
kfree_rcu(free_node, rcu);
@@ -581,7 +581,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
offsetof(struct bpf_lpm_trie_key_u8, data);
trie->max_prefixlen = trie->data_size * 8;
- spin_lock_init(&trie->lock);
+ raw_spin_lock_init(&trie->lock);
return &trie->map;
}
--
2.34.1
Powered by blists - more mailing lists