[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7937d287a3ff24ce7c7e3eb2cd18788521975511.camel@web.de>
Date: Mon, 16 Jun 2025 00:12:49 +0200
From: Bert Karwatzki <spasswolf@....de>
To: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Cc: linux-kernel@...r.kernel.org, linux-next@...r.kernel.org,
bpf@...r.kernel.org, linux-rt-users@...r.kernel.org,
linux-rt-devel@...ts.linux.dev, Thomas Gleixner <tglx@...utronix.de>,
Alexei Starovoitov <alexei.starovoitov@...il.com>, Steven Rostedt
<rostedt@...dmis.org>, spasswolf@....de
Subject: Re: BUG: scheduling while atomic with PREEMPT_RT=y and bpf selftests
These three patches fixes all the dmesg warning (with CONFIG_LOCKDEP) issues when running the
bpf test_progs and does not cause deadlocks without CONFIG_LOCKDEP.
The warning when running the cgroup examplesÂ
[ T2916] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
[ T2916] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2916, name: test_progs
[ T2916] preempt_count: 1, expected: 0
[ T2916] RCU nest depth: 2, expected: 2
[...]
[ T2916] __might_resched.cold+0xfa/0x135
[ T2916] rt_spin_lock+0x5f/0x190
[ T2916] ? task_get_cgroup1+0xe8/0x340
[ T2916] task_get_cgroup1+0xe8/0x340
[ T2916] bpf_task_get_cgroup1+0xe/0x20
[ T2916] bpf_prog_8d22669ef1ee8049_on_enter+0x62/0x1d4
[ T2916] bpf_trace_run2+0xd3/0x260
is fixed by this:
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 183fa2aa2935..49257cb90209 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -58,9 +58,9 @@ static notrace void \
__bpf_trace_##call(void *__data, proto) \
{ \
might_fault(); \
- preempt_disable_notrace(); \
+ migrate_disable(); \
CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \
- preempt_enable_notrace(); \
+ migrate_enable(); \
}
#undef DECLARE_EVENT_SYSCALL_CLASS
The warning when running the (wq, timer, free_timer, timer_mim, timer_lockup) examples
[ T4696] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
[ T4696] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 4696, name: test_progs
[ T4696] preempt_count: 1, expected: 0
[ T4696] RCU nest depth: 0, expected: 0
[...]
[ T4696] __kmalloc_node_noprof+0xee/0x490
[ T4696] bpf_map_kmalloc_node+0x72/0x220
[ T4696] __bpf_async_init+0x107/0x310
[ T4696] bpf_timer_init+0x33/0x40
[ T4696] bpf_prog_7e15f1bc7d1d26d0_start_cb+0x5d/0x91
[ T4696] bpf_prog_d85f43676fabf521_start_timer+0x65/0x8a
[ T4696] bpf_prog_test_run_syscall+0x103/0x250
is fixed by this (this might leak
memory due to the allcoation before the lock(?))
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e3a2662f4e33..94fcd8c8661c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1263,19 +1263,16 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
return -EINVAL;
}
- __bpf_spin_lock_irqsave(&async->lock);
t = async->timer;
- if (t) {
- ret = -EBUSY;
- goto out;
- }
+ if (t)
+ return -EBUSY;
/* allocate hrtimer via map_kmalloc to use memcg accounting */
cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node);
- if (!cb) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!cb)
+ return -ENOMEM;
+
+ __bpf_spin_lock_irqsave(&async->lock);
switch (type) {
case BPF_ASYNC_TYPE_TIMER:
@@ -1315,7 +1312,6 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
kfree(cb);
ret = -EPERM;
}
-out:
__bpf_spin_unlock_irqrestore(&async->lock);
return ret;
}
In addition to these there's a deadlock warning from lockdep when running the
timer_lockup example
[ 127.373597] [ C1] ============================================
[ 127.373598] [ C1] WARNING: possible recursive locking detected
[ 127.373601] [ C1] 6.15.0-bpf-00006-g31cf22212ed9 #41 Tainted: G O
[ 127.373602] [ C1] --------------------------------------------
[ 127.373603] [ C1] ktimers/1/85 is trying to acquire lock:
[ 127.373605] [ C1] ffff98f62e61c1b8 (&base->softirq_expiry_lock){+...}-{3:3}, at: hrtimer_cancel_wait_running+0x4d/0x80
[ 127.373614] [ C1]
but task is already holding lock:
[ 127.373615] [ C1] ffff98f62e65c1b8 (&base->softirq_expiry_lock){+...}-{3:3}, at: hrtimer_run_softirq+0x37/0x100
[ 127.373621] [ C1]
other info that might help us debug this:
[ 127.373621] [ C1] Possible unsafe locking scenario:
[ 127.373622] [ C1] CPU0
[ 127.373623] [ C1] ----
[ 127.373624] [ C1] lock(&base->softirq_expiry_lock);
[ 127.373626] [ C1] lock(&base->softirq_expiry_lock);
[ 127.373627] [ C1]
*** DEADLOCK ***
[ 127.373628] [ C1] May be due to missing lock nesting notation
which can by fixed (?) by reintroducing spin_lock_bh_nested()
(which was introduced in v4.0 and removed in v4.11 ...)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d3561c4a080e..b6635466b35f 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -219,6 +219,8 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define raw_spin_lock_nested(lock, subclass) \
_raw_spin_lock_nested(lock, subclass)
+# define raw_spin_lock_bh_nested(lock, subclass) \
+ _raw_spin_lock_bh_nested(lock, subclass)
# define raw_spin_lock_nest_lock(lock, nest_lock) \
do { \
@@ -233,6 +235,8 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
*/
# define raw_spin_lock_nested(lock, subclass) \
_raw_spin_lock(((void)(subclass), (lock)))
+# define raw_spin_lock_bh_nested(lock, subclass) \
+ _raw_spin_lock_bh(((void)(subclass), (lock)))
# define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock)
#endif
@@ -366,6 +370,11 @@ do { \
raw_spin_lock_nested(spinlock_check(lock), subclass); \
} while (0)
+#define spin_lock_bh_nested(lock, subclass) \
+do { \
+ raw_spin_lock_bh_nested(spinlock_check(lock), subclass); \
+} while (0)
+
#define spin_lock_nest_lock(lock, nest_lock) \
do { \
raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \
@@ -552,6 +561,10 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_bh, raw_spinlock_t,
raw_spin_lock_bh(_T->lock),
raw_spin_unlock_bh(_T->lock))
+DEFINE_LOCK_GUARD_1(raw_spinlock_bh_nested, raw_spinlock_t,
+ raw_spin_lock_bh_nested(_T->lock, SINGLE_DEPTH_NESTING),
+ raw_spin_unlock_bh(_T->lock))
+
DEFINE_LOCK_GUARD_1_COND(raw_spinlock_bh, _try, raw_spin_trylock_bh(_T->lock))
DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t,
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 9ecb0ab504e3..791867efad04 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -22,6 +22,8 @@ int in_lock_functions(unsigned long addr);
void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
__acquires(lock);
+void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
+ __acquires(lock);
void __lockfunc
_raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
__acquires(lock);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index 819aeba1c87e..a42887a0c846 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -57,6 +57,7 @@
#define _raw_spin_lock(lock) __LOCK(lock)
#define _raw_spin_lock_nested(lock, subclass) __LOCK(lock)
+#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock)
#define _raw_read_lock(lock) __LOCK(lock)
#define _raw_write_lock(lock) __LOCK(lock)
#define _raw_write_lock_nested(lock, subclass) __LOCK(lock)
diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index f6499c37157d..b89dc9e272cc 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -88,6 +88,13 @@ static __always_inline void spin_lock_bh(spinlock_t *lock)
rt_spin_lock(lock);
}
+static __always_inline void spin_lock_bh_nested(spinlock_t *lock, int subclass)
+{
+ /* Investigate: Drop bh when blocking ? */
+ local_bh_disable();
+ rt_spin_lock_nested(lock, subclass);
+}
+
static __always_inline void spin_lock_irq(spinlock_t *lock)
{
rt_spin_lock(lock);
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 7685defd7c52..c4aa80441a1d 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -380,6 +380,14 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
}
EXPORT_SYMBOL(_raw_spin_lock_nested);
+void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
+{
+ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
+}
+EXPORT_SYMBOL(_raw_spin_lock_bh_nested);
+
unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
int subclass)
{
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
index db1e11b45de6..5b76072c4838 100644
--- a/kernel/locking/spinlock_rt.c
+++ b/kernel/locking/spinlock_rt.c
@@ -58,7 +58,6 @@ void __sched rt_spin_lock(spinlock_t *lock) __acquires(RCU)
}
EXPORT_SYMBOL(rt_spin_lock);
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched rt_spin_lock_nested(spinlock_t *lock, int subclass)
{
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
@@ -66,6 +65,7 @@ void __sched rt_spin_lock_nested(spinlock_t *lock, int subclass)
}
EXPORT_SYMBOL(rt_spin_lock_nested);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched rt_spin_lock_nest_lock(spinlock_t *lock,
struct lockdep_map *nest_lock)
{
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 30899a8cc52c..d62455f62fc4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1457,7 +1457,7 @@ void hrtimer_cancel_wait_running(const struct hrtimer *timer)
* unlikely and just causes another wait loop.
*/
atomic_inc(&base->cpu_base->timer_waiters);
- spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
+ spin_lock_bh_nested(&base->cpu_base->softirq_expiry_lock, SINGLE_DEPTH_NESTING);
atomic_dec(&base->cpu_base->timer_waiters);
spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index e1fba28e4a86..7cfb9473a526 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -7,6 +7,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
#include "errno.h"
char _license[] SEC("license") = "GPL";
Bert Karwatzki
Powered by blists - more mailing lists