[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d1679c5809ffdc82e4546c1d7366452d9e8433f0.camel@gmx.de>
Date: Thu, 21 Aug 2025 05:37:56 +0200
From: Mike Galbraith <efault@....de>
To: Breno Leitao <leitao@...ian.org>
Cc: Pavel Begunkov <asml.silence@...il.com>, Jakub Kicinski
<kuba@...nel.org>, Johannes Berg <johannes@...solutions.net>,
paulmck@...nel.org, LKML <linux-kernel@...r.kernel.org>,
netdev@...r.kernel.org, boqun.feng@...il.com
Subject: Re: netconsole: HARDIRQ-safe -> HARDIRQ-unsafe lock order warning
On Wed, 2025-08-20 at 10:36 -0700, Breno Leitao wrote:
> On Wed, Aug 20, 2025 at 02:31:02PM +0200, Mike Galbraith wrote:
> > On Tue, 2025-08-19 at 10:27 -0700, Breno Leitao wrote:
> > >
> > > I’ve continued investigating possible solutions, and it looks like
> > > moving netconsole over to the non‑blocking console (nbcon) framework
> > > might be the right approach. Unlike the classic console path, nbcon
> > > doesn’t rely on the global console lock, which was one of the main
> > > concerns regarding the possible deadlock.
> >
> > ATM at least, classic can remotely log a crash whereas nbcon can't per
> > test drive, so it would be nice for classic to stick around until nbcon
> > learns some atomic packet blasting.
>
> Oh, does it mean that during crash nbcon invokes `write_atomic` call
> back, and because this patch doesn't implement it, it will not send
> those pkts? Am I reading it correct?
No, I'm just saying that the kernel's last gasp doesn't make it out of
the box with CONFIG_NETCONSOLE_NBCON=y as your patch sits.
It doesn't with my wireless or RT lockdep et al appeasement hackery
either, but I don't care deeply, as long as I can capture kernel spew
inspired by LTP and whatnot, I'm happy.. kdump logs most death rattles.
> > > The new path is protected by NETCONSOLE_NBCON, which is disabled by
> > > default. This allows us to experiment and test both approaches.
> >
> > As patch sits, interrupts being disabled is still a problem, gripes
> > below.
>
> You mean that the IRQs are disabled at the acquire of target_list_lock?
> If so, an option is to turn that list an RCU list ?!
Yeah. I switched to local_bh_disable(), but was originally using
rcu_read_lock_bh() for RT.
> > Not disabling IRQs makes nbcon gripe free, but creates the
> > issue of netpoll_tx_running() lying to the rest of NETPOLL consumers.
> >
> > RT and the wireless stack have in common that IRQs being disabled in
> > netpoll.c sucks rocks for them. I've been carrying a hack to allow RT
> > to use netconsole since 5.15
(hm, grepping my modest forest I see it's actually 4.10.. hohum)
> What is this patch you have?
Make boxen stop bitching at NETCONSOLE_NBCON version below.
netpoll: Make it RT friendly
PREEMPT_RT cannot alloc/free memory when not preemptible, making
disabling of IRQs across transmission an issue for RT.
Use local_bh_disable() to provide local exclusion for RT (via
softirq_ctrl.lock) for normal and fallback transmission paths
instead of disabling IRQs. Since zap_completion_queue() callers
ensure pointer stability, replace get_cpu_var() therein with
this_cpu_ptr() to keep it preemptible across kfree().
Disable a couple warnings for RT, and we're done.
v0.kinda-works -> v1:
remove get_cpu_var() from zap_completion_queue().
fix/test netpoll_tx_running() to work for RT/!RT.
fix/test xmit fallback path for RT.
(blessed by nobody, plz keep all shrapnel etc etc:)
Signed-off-by: Mike Galbraith <efault@....de>
---
drivers/net/netconsole.c | 4 ++--
include/linux/netpoll.h | 4 +++-
net/core/netpoll.c | 47 ++++++++++++++++++++++++++++++++++++--
---------
3 files changed, 41 insertions(+), 14 deletions(-)
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -1952,12 +1952,12 @@ static void netcon_write_thread(struct c
static void netconsole_device_lock(struct console *con, unsigned long
*flags)
{
/* protects all the targets at the same time */
- spin_lock_irqsave(&target_list_lock, *flags);
+ spin_lock(&target_list_lock);
}
static void netconsole_device_unlock(struct console *con, unsigned
long flags)
{
- spin_unlock_irqrestore(&target_list_lock, flags);
+ spin_unlock(&target_list_lock);
}
#endif
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -100,9 +100,11 @@ static inline void netpoll_poll_unlock(v
smp_store_release(&napi->poll_owner, -1);
}
+DECLARE_PER_CPU(int, _netpoll_tx_running);
+
static inline bool netpoll_tx_running(struct net_device *dev)
{
- return irqs_disabled();
+ return this_cpu_read(_netpoll_tx_running);
}
#else
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,6 +58,29 @@ static void zap_completion_queue(void);
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);
+DEFINE_PER_CPU(int, _netpoll_tx_running);
+EXPORT_PER_CPU_SYMBOL(_netpoll_tx_running);
+
+#define
netpoll_tx_begin(flags) \
+ do { \
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || \
+ IS_ENABLED(CONFIG_NETCONSOLE_NBCON)) \
+ local_bh_disable(); \
+ else \
+ local_irq_save(flags); \
+ this_cpu_write(_netpoll_tx_running,
1); \
+ } while (0)
+
+#define netpoll_tx_end(flags) \
+ do { \
+ this_cpu_write(_netpoll_tx_running,
0); \
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || \
+ IS_ENABLED(CONFIG_NETCONSOLE_NBCON)) \
+ local_bh_enable(); \
+ else \
+ local_irq_restore(flags); \
+ } while (0)
+
static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
struct net_device *dev,
struct netdev_queue *txq)
@@ -90,7 +113,7 @@ static void queue_process(struct work_st
struct netpoll_info *npinfo =
container_of(work, struct netpoll_info, tx_work.work);
struct sk_buff *skb;
- unsigned long flags;
+ unsigned long __maybe_unused flags;
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
@@ -102,7 +125,7 @@ static void queue_process(struct work_st
continue;
}
- local_irq_save(flags);
+ netpoll_tx_begin(flags);
/* check if skb->queue_mapping is still valid */
q_index = skb_get_queue_mapping(skb);
if (unlikely(q_index >= dev->real_num_tx_queues)) {
@@ -115,13 +138,13 @@ static void queue_process(struct work_st
!dev_xmit_complete(netpoll_start_xmit(skb, dev,
txq))) {
skb_queue_head(&npinfo->txq, skb);
HARD_TX_UNLOCK(dev, txq);
- local_irq_restore(flags);
+ netpoll_tx_end(flags);
schedule_delayed_work(&npinfo->tx_work,
HZ/10);
return;
}
HARD_TX_UNLOCK(dev, txq);
- local_irq_restore(flags);
+ netpoll_tx_end(flags);
}
}
@@ -246,7 +269,7 @@ static void refill_skbs(struct netpoll *
static void zap_completion_queue(void)
{
unsigned long flags;
- struct softnet_data *sd = &get_cpu_var(softnet_data);
+ struct softnet_data *sd = this_cpu_ptr(&softnet_data);
if (sd->completion_queue) {
struct sk_buff *clist;
@@ -267,8 +290,6 @@ static void zap_completion_queue(void)
}
}
}
-
- put_cpu_var(softnet_data);
}
static struct sk_buff *find_skb(struct netpoll *np, int len, int
reserve)
@@ -319,7 +340,9 @@ static netdev_tx_t __netpoll_send_skb(st
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo;
+#if (!defined(CONFIG_PREEMPT_RT) && !defined(CONFIG_NETCONSOLE_NBCON))
lockdep_assert_irqs_disabled();
+#endif
dev = np->dev;
rcu_read_lock();
@@ -356,9 +379,11 @@ static netdev_tx_t __netpoll_send_skb(st
udelay(USEC_PER_POLL);
}
+#if (!defined(CONFIG_PREEMPT_RT) && !defined(CONFIG_NETCONSOLE_NBCON))
WARN_ONCE(!irqs_disabled(),
"netpoll_send_skb_on_dev(): %s enabled
interrupts in poll (%pS)\n",
dev->name, dev->netdev_ops->ndo_start_xmit);
+#endif
}
@@ -399,16 +424,16 @@ static void netpoll_udp_checksum(struct
netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
- unsigned long flags;
+ unsigned long __maybe_unused flags;
netdev_tx_t ret;
if (unlikely(!np)) {
dev_kfree_skb_irq(skb);
ret = NET_XMIT_DROP;
} else {
- local_irq_save(flags);
+ netpoll_tx_begin(flags);
ret = __netpoll_send_skb(np, skb);
- local_irq_restore(flags);
+ netpoll_tx_end(flags);
}
return ret;
}
@@ -501,7 +526,7 @@ int netpoll_send_udp(struct netpoll *np,
int total_len, ip_len, udp_len;
struct sk_buff *skb;
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
!IS_ENABLED(CONFIG_NETCONSOLE_NBCON))
WARN_ON_ONCE(!irqs_disabled());
udp_len = len + sizeof(struct udphdr);
Powered by blists - more mailing lists