linux-kernel - Re: netconsole: HARDIRQ-safe -> HARDIRQ-unsafe lock order warning

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <tx2ry3uwlgqenvz4fsy2hugdiq36jrtshwyo4a2jpxufeypesi@uceeo7ykvd6w>
Date: Thu, 21 Aug 2025 10:35:19 -0700
From: Breno Leitao <leitao@...ian.org>
To: Mike Galbraith <efault@....de>
Cc: Pavel Begunkov <asml.silence@...il.com>, 
	Jakub Kicinski <kuba@...nel.org>, Johannes Berg <johannes@...solutions.net>, paulmck@...nel.org, 
	LKML <linux-kernel@...r.kernel.org>, netdev@...r.kernel.org, boqun.feng@...il.com
Subject: Re: netconsole: HARDIRQ-safe -> HARDIRQ-unsafe lock order warning

On Thu, Aug 21, 2025 at 05:51:59AM +0200, Mike Galbraith wrote:
> On Thu, 2025-08-21 at 05:37 +0200, Mike Galbraith wrote:
> > 
> > > What is this patch you have?
> > 
> > Make boxen stop bitching at NETCONSOLE_NBCON version below.
> 
> Grr, whitespace damaged. Doesn't really matter given it's not a
> submission, but it annoys me when I meet it on lkml, so take2.
> 
> netpoll: Make it RT friendly
> 
> PREEMPT_RT cannot alloc/free memory when not preemptible, making
> disabling of IRQs across transmission an issue for RT.
> 
> Use local_bh_disable() to provide local exclusion for RT (via
> softirq_ctrl.lock) for normal and fallback transmission paths
> instead of disabling IRQs. Since zap_completion_queue() callers
> ensure pointer stability, replace get_cpu_var() therein with
> this_cpu_ptr() to keep it preemptible across kfree().
> 
> Disable a couple warnings for RT, and we're done.
> 
> v0.kinda-works -> v1:
>     remove get_cpu_var() from zap_completion_queue().
>     fix/test netpoll_tx_running() to work for RT/!RT.
>     fix/test xmit fallback path for RT.
> 
> Signed-off-by: Mike Galbraith <efault@....de>
> ---
>  drivers/net/netconsole.c |    4 ++--
>  include/linux/netpoll.h  |    4 +++-
>  net/core/netpoll.c       |   47 ++++++++++++++++++++++++++++++++++++-----------
>  3 files changed, 41 insertions(+), 14 deletions(-)
> 
> --- a/drivers/net/netconsole.c
> +++ b/drivers/net/netconsole.c
> @@ -1952,12 +1952,12 @@ static void netcon_write_thread(struct c
>  static void netconsole_device_lock(struct console *con, unsigned long *flags)
>  {
>  	/* protects all the targets at the same time */
> -	spin_lock_irqsave(&target_list_lock, *flags);
> +	spin_lock(&target_list_lock);

I personally think this target_list_lock can be moved to an RCU lock. 

If that is doable, then we probably make netconsole_device_lock()
to a simple `rcu_read_lock()`, which would solve this problem as well.

>  static void netconsole_device_unlock(struct console *con, unsigned long flags)
>  {
> -	spin_unlock_irqrestore(&target_list_lock, flags);
> +	spin_unlock(&target_list_lock);
>  }
>  #endif
>  
> --- a/include/linux/netpoll.h
> +++ b/include/linux/netpoll.h
> @@ -100,9 +100,11 @@ static inline void netpoll_poll_unlock(v
>  		smp_store_release(&napi->poll_owner, -1);
>  }
>  
> +DECLARE_PER_CPU(int, _netpoll_tx_running);
> +
>  static inline bool netpoll_tx_running(struct net_device *dev)
>  {
> -	return irqs_disabled();
> +	return this_cpu_read(_netpoll_tx_running);
>  }
>  
>  #else
> --- a/net/core/netpoll.c
> +++ b/net/core/netpoll.c
> @@ -58,6 +58,29 @@ static void zap_completion_queue(void);
>  static unsigned int carrier_timeout = 4;
>  module_param(carrier_timeout, uint, 0644);
>  
> +DEFINE_PER_CPU(int, _netpoll_tx_running);
> +EXPORT_PER_CPU_SYMBOL(_netpoll_tx_running);
> +
> +#define netpoll_tx_begin(flags)					\
> +	do {							\
> +		if (IS_ENABLED(CONFIG_PREEMPT_RT) ||		\
> +		    IS_ENABLED(CONFIG_NETCONSOLE_NBCON))	\
> +			local_bh_disable();			\
> +		else						\
> +			local_irq_save(flags);			\
> +		this_cpu_write(_netpoll_tx_running, 1);		\
> +	} while (0)

Why can't we just use local_bh_disable() in both cases?

> +
> +#define netpoll_tx_end(flags)					\
> +	do {							\
> +		this_cpu_write(_netpoll_tx_running, 0);		\
> +		if (IS_ENABLED(CONFIG_PREEMPT_RT) ||		\
> +		    IS_ENABLED(CONFIG_NETCONSOLE_NBCON))	\
> +			local_bh_enable();			\
> +		else						\
> +			local_irq_restore(flags);		\
> +	} while (0)
> +
>  static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
>  				      struct net_device *dev,
>  				      struct netdev_queue *txq)
> @@ -90,7 +113,7 @@ static void queue_process(struct work_st
>  	struct netpoll_info *npinfo =
>  		container_of(work, struct netpoll_info, tx_work.work);
>  	struct sk_buff *skb;
> -	unsigned long flags;
> +	unsigned long __maybe_unused flags;
>  
>  	while ((skb = skb_dequeue(&npinfo->txq))) {
>  		struct net_device *dev = skb->dev;
> @@ -102,7 +125,7 @@ static void queue_process(struct work_st
>  			continue;
>  		}
>  
> -		local_irq_save(flags);

It is unclear to me why we have irqs disabled in here. Nothing below
seems to depend on irq being disabled?

> +		netpoll_tx_begin(flags);
>  		/* check if skb->queue_mapping is still valid */
>  		q_index = skb_get_queue_mapping(skb);
>  		if (unlikely(q_index >= dev->real_num_tx_queues)) {
> @@ -115,13 +138,13 @@ static void queue_process(struct work_st
>  		    !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
>  			skb_queue_head(&npinfo->txq, skb);
>  			HARD_TX_UNLOCK(dev, txq);

It seems the queue is already protected in here, so, why do we need to
local_irq_save above?

> -			local_irq_restore(flags);
> +			netpoll_tx_end(flags);
>  
>  			schedule_delayed_work(&npinfo->tx_work, HZ/10);
>  			return;
>  		}
>  		HARD_TX_UNLOCK(dev, txq);
> -		local_irq_restore(flags);
> +		netpoll_tx_end(flags);
>  	}
>  }
>  
> @@ -246,7 +269,7 @@ static void refill_skbs(struct netpoll *
>  static void zap_completion_queue(void)
>  {
>  	unsigned long flags;
> -	struct softnet_data *sd = &get_cpu_var(softnet_data);
> +	struct softnet_data *sd = this_cpu_ptr(&softnet_data);

How do I check if this is safe to do ?