[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <tx2ry3uwlgqenvz4fsy2hugdiq36jrtshwyo4a2jpxufeypesi@uceeo7ykvd6w>
Date: Thu, 21 Aug 2025 10:35:19 -0700
From: Breno Leitao <leitao@...ian.org>
To: Mike Galbraith <efault@....de>
Cc: Pavel Begunkov <asml.silence@...il.com>,
Jakub Kicinski <kuba@...nel.org>, Johannes Berg <johannes@...solutions.net>, paulmck@...nel.org,
LKML <linux-kernel@...r.kernel.org>, netdev@...r.kernel.org, boqun.feng@...il.com
Subject: Re: netconsole: HARDIRQ-safe -> HARDIRQ-unsafe lock order warning
On Thu, Aug 21, 2025 at 05:51:59AM +0200, Mike Galbraith wrote:
> On Thu, 2025-08-21 at 05:37 +0200, Mike Galbraith wrote:
> >
> > > What is this patch you have?
> >
> > Make boxen stop bitching at NETCONSOLE_NBCON version below.
>
> Grr, whitespace damaged. Doesn't really matter given it's not a
> submission, but it annoys me when I meet it on lkml, so take2.
>
> netpoll: Make it RT friendly
>
> PREEMPT_RT cannot alloc/free memory when not preemptible, making
> disabling of IRQs across transmission an issue for RT.
>
> Use local_bh_disable() to provide local exclusion for RT (via
> softirq_ctrl.lock) for normal and fallback transmission paths
> instead of disabling IRQs. Since zap_completion_queue() callers
> ensure pointer stability, replace get_cpu_var() therein with
> this_cpu_ptr() to keep it preemptible across kfree().
>
> Disable a couple warnings for RT, and we're done.
>
> v0.kinda-works -> v1:
> remove get_cpu_var() from zap_completion_queue().
> fix/test netpoll_tx_running() to work for RT/!RT.
> fix/test xmit fallback path for RT.
>
> Signed-off-by: Mike Galbraith <efault@....de>
> ---
> drivers/net/netconsole.c | 4 ++--
> include/linux/netpoll.h | 4 +++-
> net/core/netpoll.c | 47 ++++++++++++++++++++++++++++++++++++-----------
> 3 files changed, 41 insertions(+), 14 deletions(-)
>
> --- a/drivers/net/netconsole.c
> +++ b/drivers/net/netconsole.c
> @@ -1952,12 +1952,12 @@ static void netcon_write_thread(struct c
> static void netconsole_device_lock(struct console *con, unsigned long *flags)
> {
> /* protects all the targets at the same time */
> - spin_lock_irqsave(&target_list_lock, *flags);
> + spin_lock(&target_list_lock);
I personally think this target_list_lock can be moved to an RCU lock.
If that is doable, then we probably make netconsole_device_lock()
to a simple `rcu_read_lock()`, which would solve this problem as well.
> static void netconsole_device_unlock(struct console *con, unsigned long flags)
> {
> - spin_unlock_irqrestore(&target_list_lock, flags);
> + spin_unlock(&target_list_lock);
> }
> #endif
>
> --- a/include/linux/netpoll.h
> +++ b/include/linux/netpoll.h
> @@ -100,9 +100,11 @@ static inline void netpoll_poll_unlock(v
> smp_store_release(&napi->poll_owner, -1);
> }
>
> +DECLARE_PER_CPU(int, _netpoll_tx_running);
> +
> static inline bool netpoll_tx_running(struct net_device *dev)
> {
> - return irqs_disabled();
> + return this_cpu_read(_netpoll_tx_running);
> }
>
> #else
> --- a/net/core/netpoll.c
> +++ b/net/core/netpoll.c
> @@ -58,6 +58,29 @@ static void zap_completion_queue(void);
> static unsigned int carrier_timeout = 4;
> module_param(carrier_timeout, uint, 0644);
>
> +DEFINE_PER_CPU(int, _netpoll_tx_running);
> +EXPORT_PER_CPU_SYMBOL(_netpoll_tx_running);
> +
> +#define netpoll_tx_begin(flags) \
> + do { \
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) || \
> + IS_ENABLED(CONFIG_NETCONSOLE_NBCON)) \
> + local_bh_disable(); \
> + else \
> + local_irq_save(flags); \
> + this_cpu_write(_netpoll_tx_running, 1); \
> + } while (0)
Why can't we just use local_bh_disable() in both cases?
> +
> +#define netpoll_tx_end(flags) \
> + do { \
> + this_cpu_write(_netpoll_tx_running, 0); \
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) || \
> + IS_ENABLED(CONFIG_NETCONSOLE_NBCON)) \
> + local_bh_enable(); \
> + else \
> + local_irq_restore(flags); \
> + } while (0)
> +
> static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
> struct net_device *dev,
> struct netdev_queue *txq)
> @@ -90,7 +113,7 @@ static void queue_process(struct work_st
> struct netpoll_info *npinfo =
> container_of(work, struct netpoll_info, tx_work.work);
> struct sk_buff *skb;
> - unsigned long flags;
> + unsigned long __maybe_unused flags;
>
> while ((skb = skb_dequeue(&npinfo->txq))) {
> struct net_device *dev = skb->dev;
> @@ -102,7 +125,7 @@ static void queue_process(struct work_st
> continue;
> }
>
> - local_irq_save(flags);
It is unclear to me why we have irqs disabled in here. Nothing below
seems to depend on irq being disabled?
> + netpoll_tx_begin(flags);
> /* check if skb->queue_mapping is still valid */
> q_index = skb_get_queue_mapping(skb);
> if (unlikely(q_index >= dev->real_num_tx_queues)) {
> @@ -115,13 +138,13 @@ static void queue_process(struct work_st
> !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
> skb_queue_head(&npinfo->txq, skb);
> HARD_TX_UNLOCK(dev, txq);
It seems the queue is already protected in here, so, why do we need to
local_irq_save above?
> - local_irq_restore(flags);
> + netpoll_tx_end(flags);
>
> schedule_delayed_work(&npinfo->tx_work, HZ/10);
> return;
> }
> HARD_TX_UNLOCK(dev, txq);
> - local_irq_restore(flags);
> + netpoll_tx_end(flags);
> }
> }
>
> @@ -246,7 +269,7 @@ static void refill_skbs(struct netpoll *
> static void zap_completion_queue(void)
> {
> unsigned long flags;
> - struct softnet_data *sd = &get_cpu_var(softnet_data);
> + struct softnet_data *sd = this_cpu_ptr(&softnet_data);
How do I check if this is safe to do ?
Powered by blists - more mailing lists