[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250616181001.GA905960@joelnvbox>
Date: Mon, 16 Jun 2025 14:10:01 -0400
From: Joel Fernandes <joelagnelf@...dia.com>
To: Lyude Paul <lyude@...hat.com>
Cc: rust-for-linux@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
Boqun Feng <boqun.feng@...il.com>, linux-kernel@...r.kernel.org,
Daniel Almeida <daniel.almeida@...labora.com>,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
Will Deacon <will@...nel.org>, Waiman Long <longman@...hat.com>,
Miguel Ojeda <ojeda@...nel.org>,
Alex Gaynor <alex.gaynor@...il.com>, Gary Guo <gary@...yguo.net>,
Björn Roy Baron <bjorn3_gh@...tonmail.com>,
Benno Lossin <lossin@...nel.org>,
Andreas Hindborg <a.hindborg@...nel.org>,
Alice Ryhl <aliceryhl@...gle.com>, Trevor Gross <tmgross@...ch.edu>,
Danilo Krummrich <dakr@...nel.org>,
David Woodhouse <dwmw@...zon.co.uk>, Jens Axboe <axboe@...nel.dk>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
NeilBrown <neilb@...e.de>,
Caleb Sander Mateos <csander@...estorage.com>,
Ryo Takakura <ryotkkr98@...il.com>,
K Prateek Nayak <kprateek.nayak@....com>
Subject: Re: [RFC RESEND v10 03/14] irq & spin_lock: Add counted interrupt
disabling/enabling
On Tue, May 27, 2025 at 06:21:44PM -0400, Lyude Paul wrote:
> From: Boqun Feng <boqun.feng@...il.com>
>
> Currently the nested interrupt disabling and enabling is present by
> _irqsave() and _irqrestore() APIs, which are relatively unsafe, for
> example:
[...]
> diff --git a/include/linux/irqflags_types.h b/include/linux/irqflags_types.h
> index c13f0d915097a..277433f7f53eb 100644
> --- a/include/linux/irqflags_types.h
> +++ b/include/linux/irqflags_types.h
> @@ -19,4 +19,10 @@ struct irqtrace_events {
>
> #endif
>
> +/* Per-cpu interrupt disabling state for local_interrupt_{disable,enable}() */
> +struct interrupt_disable_state {
> + unsigned long flags;
> + long count;
Is count unused? I found it in earlier series but not this one. Now count
should be in the preempt counter, not in this new per-cpu var?
Sorry if I missed it from some other patch in this series. thanks,
- Joel
> +};
> +
> #endif /* _LINUX_IRQFLAGS_TYPES_H */
> diff --git a/include/linux/preempt.h b/include/linux/preempt.h
> index 809af7b57470a..c1c5795be5d0f 100644
> --- a/include/linux/preempt.h
> +++ b/include/linux/preempt.h
> @@ -148,6 +148,10 @@ static __always_inline unsigned char interrupt_context_level(void)
> #define in_softirq() (softirq_count())
> #define in_interrupt() (irq_count())
>
> +#define hardirq_disable_count() ((preempt_count() & HARDIRQ_DISABLE_MASK) >> HARDIRQ_DISABLE_SHIFT)
> +#define hardirq_disable_enter() __preempt_count_add_return(HARDIRQ_DISABLE_OFFSET)
> +#define hardirq_disable_exit() __preempt_count_sub_return(HARDIRQ_DISABLE_OFFSET)
> +
> /*
> * The preempt_count offset after preempt_disable();
> */
> diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
> index d3561c4a080e2..b21da4bd51a42 100644
> --- a/include/linux/spinlock.h
> +++ b/include/linux/spinlock.h
> @@ -272,9 +272,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
> #endif
>
> #define raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock)
> +#define raw_spin_lock_irq_disable(lock) _raw_spin_lock_irq_disable(lock)
> #define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock)
> #define raw_spin_unlock(lock) _raw_spin_unlock(lock)
> #define raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock)
> +#define raw_spin_unlock_irq_enable(lock) _raw_spin_unlock_irq_enable(lock)
>
> #define raw_spin_unlock_irqrestore(lock, flags) \
> do { \
> @@ -300,11 +302,56 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
> 1 : ({ local_irq_restore(flags); 0; }); \
> })
>
> +#define raw_spin_trylock_irq_disable(lock) \
> +({ \
> + local_interrupt_disable(); \
> + raw_spin_trylock(lock) ? \
> + 1 : ({ local_interrupt_enable(); 0; }); \
> +})
> +
> #ifndef CONFIG_PREEMPT_RT
> /* Include rwlock functions for !RT */
> #include <linux/rwlock.h>
> #endif
>
> +DECLARE_PER_CPU(struct interrupt_disable_state, local_interrupt_disable_state);
> +
> +static inline void local_interrupt_disable(void)
> +{
> + unsigned long flags;
> + int new_count;
> +
> + new_count = hardirq_disable_enter();
> +
> + if ((new_count & HARDIRQ_DISABLE_MASK) == HARDIRQ_DISABLE_OFFSET) {
> + local_irq_save(flags);
> + raw_cpu_write(local_interrupt_disable_state.flags, flags);
> + }
> +}
> +
> +static inline void local_interrupt_enable(void)
> +{
> + int new_count;
> +
> + new_count = hardirq_disable_exit();
> +
> + if ((new_count & HARDIRQ_DISABLE_MASK) == 0) {
> + unsigned long flags;
> +
> + flags = raw_cpu_read(local_interrupt_disable_state.flags);
> + local_irq_restore(flags);
> + /*
> + * TODO: re-read preempt count can be avoided, but it needs
> + * should_resched() taking another parameter as the current
> + * preempt count
> + */
> +#ifdef PREEMPTION
> + if (should_resched(0))
> + __preempt_schedule();
> +#endif
> + }
> +}
> +
> /*
> * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
> */
> @@ -376,6 +423,11 @@ static __always_inline void spin_lock_irq(spinlock_t *lock)
> raw_spin_lock_irq(&lock->rlock);
> }
>
> +static __always_inline void spin_lock_irq_disable(spinlock_t *lock)
> +{
> + raw_spin_lock_irq_disable(&lock->rlock);
> +}
> +
> #define spin_lock_irqsave(lock, flags) \
> do { \
> raw_spin_lock_irqsave(spinlock_check(lock), flags); \
> @@ -401,6 +453,11 @@ static __always_inline void spin_unlock_irq(spinlock_t *lock)
> raw_spin_unlock_irq(&lock->rlock);
> }
>
> +static __always_inline void spin_unlock_irq_enable(spinlock_t *lock)
> +{
> + raw_spin_unlock_irq_enable(&lock->rlock);
> +}
> +
> static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
> {
> raw_spin_unlock_irqrestore(&lock->rlock, flags);
> @@ -421,6 +478,11 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock)
> raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
> })
>
> +static __always_inline int spin_trylock_irq_disable(spinlock_t *lock)
> +{
> + return raw_spin_trylock_irq_disable(&lock->rlock);
> +}
> +
> /**
> * spin_is_locked() - Check whether a spinlock is locked.
> * @lock: Pointer to the spinlock.
> diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
> index 9ecb0ab504e32..92532103b9eaa 100644
> --- a/include/linux/spinlock_api_smp.h
> +++ b/include/linux/spinlock_api_smp.h
> @@ -28,6 +28,8 @@ _raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
> void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock) __acquires(lock);
> void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
> __acquires(lock);
> +void __lockfunc _raw_spin_lock_irq_disable(raw_spinlock_t *lock)
> + __acquires(lock);
>
> unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
> __acquires(lock);
> @@ -39,6 +41,7 @@ int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock);
> void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
> void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) __releases(lock);
> void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock) __releases(lock);
> +void __lockfunc _raw_spin_unlock_irq_enable(raw_spinlock_t *lock) __releases(lock);
> void __lockfunc
> _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
> __releases(lock);
> @@ -55,6 +58,11 @@ _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
> #define _raw_spin_lock_irq(lock) __raw_spin_lock_irq(lock)
> #endif
>
> +/* Use the same config as spin_lock_irq() temporarily. */
> +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQ
> +#define _raw_spin_lock_irq_disable(lock) __raw_spin_lock_irq_disable(lock)
> +#endif
> +
> #ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
> #define _raw_spin_lock_irqsave(lock) __raw_spin_lock_irqsave(lock)
> #endif
> @@ -79,6 +87,11 @@ _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
> #define _raw_spin_unlock_irq(lock) __raw_spin_unlock_irq(lock)
> #endif
>
> +/* Use the same config as spin_unlock_irq() temporarily. */
> +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ
> +#define _raw_spin_unlock_irq_enable(lock) __raw_spin_unlock_irq_enable(lock)
> +#endif
> +
> #ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
> #define _raw_spin_unlock_irqrestore(lock, flags) __raw_spin_unlock_irqrestore(lock, flags)
> #endif
> @@ -120,6 +133,13 @@ static inline void __raw_spin_lock_irq(raw_spinlock_t *lock)
> LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
> }
>
> +static inline void __raw_spin_lock_irq_disable(raw_spinlock_t *lock)
> +{
> + local_interrupt_disable();
> + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
> + LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
> +}
> +
> static inline void __raw_spin_lock_bh(raw_spinlock_t *lock)
> {
> __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
> @@ -160,6 +180,13 @@ static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock)
> preempt_enable();
> }
>
> +static inline void __raw_spin_unlock_irq_enable(raw_spinlock_t *lock)
> +{
> + spin_release(&lock->dep_map, _RET_IP_);
> + do_raw_spin_unlock(lock);
> + local_interrupt_enable();
> +}
> +
> static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock)
> {
> spin_release(&lock->dep_map, _RET_IP_);
> diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
> index 819aeba1c87e6..d02a73671713b 100644
> --- a/include/linux/spinlock_api_up.h
> +++ b/include/linux/spinlock_api_up.h
> @@ -36,6 +36,9 @@
> #define __LOCK_IRQ(lock) \
> do { local_irq_disable(); __LOCK(lock); } while (0)
>
> +#define __LOCK_IRQ_DISABLE(lock) \
> + do { local_interrupt_disable(); __LOCK(lock); } while (0)
> +
> #define __LOCK_IRQSAVE(lock, flags) \
> do { local_irq_save(flags); __LOCK(lock); } while (0)
>
> @@ -52,6 +55,9 @@
> #define __UNLOCK_IRQ(lock) \
> do { local_irq_enable(); __UNLOCK(lock); } while (0)
>
> +#define __UNLOCK_IRQ_ENABLE(lock) \
> + do { __UNLOCK(lock); local_interrupt_enable(); } while (0)
> +
> #define __UNLOCK_IRQRESTORE(lock, flags) \
> do { local_irq_restore(flags); __UNLOCK(lock); } while (0)
>
> @@ -64,6 +70,7 @@
> #define _raw_read_lock_bh(lock) __LOCK_BH(lock)
> #define _raw_write_lock_bh(lock) __LOCK_BH(lock)
> #define _raw_spin_lock_irq(lock) __LOCK_IRQ(lock)
> +#define _raw_spin_lock_irq_disable(lock) __LOCK_IRQ_DISABLE(lock)
> #define _raw_read_lock_irq(lock) __LOCK_IRQ(lock)
> #define _raw_write_lock_irq(lock) __LOCK_IRQ(lock)
> #define _raw_spin_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags)
> @@ -80,6 +87,7 @@
> #define _raw_write_unlock_bh(lock) __UNLOCK_BH(lock)
> #define _raw_read_unlock_bh(lock) __UNLOCK_BH(lock)
> #define _raw_spin_unlock_irq(lock) __UNLOCK_IRQ(lock)
> +#define _raw_spin_unlock_irq_enable(lock) __UNLOCK_IRQ_ENABLE(lock)
> #define _raw_read_unlock_irq(lock) __UNLOCK_IRQ(lock)
> #define _raw_write_unlock_irq(lock) __UNLOCK_IRQ(lock)
> #define _raw_spin_unlock_irqrestore(lock, flags) \
> diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
> index f6499c37157df..6ea08fafa6d7b 100644
> --- a/include/linux/spinlock_rt.h
> +++ b/include/linux/spinlock_rt.h
> @@ -93,6 +93,11 @@ static __always_inline void spin_lock_irq(spinlock_t *lock)
> rt_spin_lock(lock);
> }
>
> +static __always_inline void spin_lock_irq_disable(spinlock_t *lock)
> +{
> + rt_spin_lock(lock);
> +}
> +
> #define spin_lock_irqsave(lock, flags) \
> do { \
> typecheck(unsigned long, flags); \
> @@ -116,6 +121,11 @@ static __always_inline void spin_unlock_irq(spinlock_t *lock)
> rt_spin_unlock(lock);
> }
>
> +static __always_inline void spin_unlock_irq_enable(spinlock_t *lock)
> +{
> + rt_spin_unlock(lock);
> +}
> +
> static __always_inline void spin_unlock_irqrestore(spinlock_t *lock,
> unsigned long flags)
> {
> diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
> index 7685defd7c526..13f91117794fd 100644
> --- a/kernel/locking/spinlock.c
> +++ b/kernel/locking/spinlock.c
> @@ -125,6 +125,21 @@ static void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
> */
> BUILD_LOCK_OPS(spin, raw_spinlock);
>
> +/* No rwlock_t variants for now, so just build this function by hand */
> +static void __lockfunc __raw_spin_lock_irq_disable(raw_spinlock_t *lock)
> +{
> + for (;;) {
> + preempt_disable();
> + local_interrupt_disable();
> + if (likely(do_raw_spin_trylock(lock)))
> + break;
> + local_interrupt_enable();
> + preempt_enable();
> +
> + arch_spin_relax(&lock->raw_lock);
> + }
> +}
> +
> #ifndef CONFIG_PREEMPT_RT
> BUILD_LOCK_OPS(read, rwlock);
> BUILD_LOCK_OPS(write, rwlock);
> @@ -172,6 +187,14 @@ noinline void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
> EXPORT_SYMBOL(_raw_spin_lock_irq);
> #endif
>
> +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
> +noinline void __lockfunc _raw_spin_lock_irq_disable(raw_spinlock_t *lock)
> +{
> + __raw_spin_lock_irq_disable(lock);
> +}
> +EXPORT_SYMBOL_GPL(_raw_spin_lock_irq_disable);
> +#endif
> +
> #ifndef CONFIG_INLINE_SPIN_LOCK_BH
> noinline void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
> {
> @@ -204,6 +227,14 @@ noinline void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
> EXPORT_SYMBOL(_raw_spin_unlock_irq);
> #endif
>
> +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
> +noinline void __lockfunc _raw_spin_unlock_irq_enable(raw_spinlock_t *lock)
> +{
> + __raw_spin_unlock_irq_enable(lock);
> +}
> +EXPORT_SYMBOL_GPL(_raw_spin_unlock_irq_enable);
> +#endif
> +
> #ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
> noinline void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
> {
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index 513b1945987cc..f7a2ff4d123be 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -88,6 +88,9 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
> EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
> #endif
>
> +DEFINE_PER_CPU(struct interrupt_disable_state, local_interrupt_disable_state);
> +EXPORT_PER_CPU_SYMBOL_GPL(local_interrupt_disable_state);
> +
> /*
> * SOFTIRQ_OFFSET usage:
> *
> --
> 2.49.0
>
Powered by blists - more mailing lists