lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e7e282eeb90627d0d2a379ebb24b345675693a48.camel@gmx.de>
Date:   Tue, 21 Dec 2021 10:44:08 +0100
From:   Mike Galbraith <efault@....de>
To:     Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Cc:     Thomas Gleixner <tglx@...utronix.de>,
        LKML <linux-kernel@...r.kernel.org>,
        RT <linux-rt-users@...r.kernel.org>
Subject: Re: [rcf/patch] netpoll: Make it RT friendly

On Fri, 2021-12-17 at 17:41 +0100, Mike Galbraith wrote:
> On Fri, 2021-12-17 at 14:59 +0100, Sebastian Andrzej Siewior wrote:
> > On 2021-12-15 20:54:40 [+0100], Mike Galbraith wrote:
> > >                   In fact, I had already taken the silence as a "Surely
> > > you jest" or such.
> >
> > No, not really. netpoll is currently the least of my worries and I
> > wanted to finish my other network patches before I start wrapping my
> > mind around this in case something changes. But I have currently no idea
> > how to do netpoll run/ detect and so on.
>
> I was gonna toss a rock or two at those "and so on" bits since they're
> wired into stuff my box uses regularly.. but I've yet to find a means
> to make them be more that compiler fodder, so they're safe.

Nah, I was just insufficiently bored.

It's surprising that patchlet worked at all with netpoll_tx_running()
being busted, but whatever, bridge now does its forwarding thing for RT
and !RT, transmitting twice per kmsg, though apparently only 1 actually
hits copper to fly over to console logger. See attached trace. Note
that for that trace I sabotaged it to alternately xmit via fallback.

This will likely end up being done differently (way prettier), but what
the heck, it beats a lump of coal in your xmas stocking.

	Ho Ho Hum :)

	-Mike

netpoll: Make it RT friendly

PREEMPT_RT cannot alloc/free memory when not preemptible, making
disabling of IRQs across transmission an issue for RT.

Use rcu_read_lock_bh() to provide local exclusion for RT (via
softirq_ctrl.lock) for normal and fallback transmission paths
instead of disabling IRQs. Since zap_completion_queue() callers
ensure pointer stability, replace get_cpu_var() therein with
this_cpu_ptr() to keep it preemptible across kfree().

Disable a couple warnings for RT, and we're done.

v0.kinda-works -> v1 feedback fixes:
    remove get_cpu_var() from zap_completion_queue().
    fix/test netpoll_tx_running() to work for RT/!RT.
    fix/test xmit fallback path for RT.

Signed-off-by: Mike Galbraith <efault@....de>
---
 include/linux/netpoll.h |    4 +++-
 net/core/netpoll.c      |   44 ++++++++++++++++++++++++++++++++------------
 2 files changed, 35 insertions(+), 13 deletions(-)

--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -89,9 +89,11 @@ static inline void netpoll_poll_unlock(v
 		smp_store_release(&napi->poll_owner, -1);
 }

+DECLARE_PER_CPU(int, _netpoll_tx_running);
+
 static inline bool netpoll_tx_running(struct net_device *dev)
 {
-	return irqs_disabled();
+	return this_cpu_read(_netpoll_tx_running);
 }

 #else
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -70,6 +70,27 @@ module_param(carrier_timeout, uint, 0644
 #define np_notice(np, fmt, ...)				\
 	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

+DEFINE_PER_CPU(int, _netpoll_tx_running);
+EXPORT_PER_CPU_SYMBOL(_netpoll_tx_running);
+
+#define netpoll_tx_begin(flags)				\
+	do {						\
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))	\
+			local_irq_save(flags);		\
+		else					\
+			rcu_read_lock_bh();		\
+		this_cpu_write(_netpoll_tx_running, 1);	\
+	} while (0)
+
+#define netpoll_tx_end(flags)				\
+	do {						\
+		this_cpu_write(_netpoll_tx_running, 0);	\
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))	\
+			local_irq_restore(flags);	\
+		else					\
+			rcu_read_unlock_bh();		\
+	} while (0)
+
 static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev,
 				      struct netdev_queue *txq)
@@ -102,7 +123,7 @@ static void queue_process(struct work_st
 	struct netpoll_info *npinfo =
 		container_of(work, struct netpoll_info, tx_work.work);
 	struct sk_buff *skb;
-	unsigned long flags;
+	unsigned long __maybe_unused flags;

 	while ((skb = skb_dequeue(&npinfo->txq))) {
 		struct net_device *dev = skb->dev;
@@ -114,7 +135,7 @@ static void queue_process(struct work_st
 			continue;
 		}

-		local_irq_save(flags);
+		netpoll_tx_begin(flags);
 		/* check if skb->queue_mapping is still valid */
 		q_index = skb_get_queue_mapping(skb);
 		if (unlikely(q_index >= dev->real_num_tx_queues)) {
@@ -127,13 +148,13 @@ static void queue_process(struct work_st
 		    !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
 			skb_queue_head(&npinfo->txq, skb);
 			HARD_TX_UNLOCK(dev, txq);
-			local_irq_restore(flags);
+			netpoll_tx_end(flags);

 			schedule_delayed_work(&npinfo->tx_work, HZ/10);
 			return;
 		}
 		HARD_TX_UNLOCK(dev, txq);
-		local_irq_restore(flags);
+		netpoll_tx_end(flags);
 	}
 }

@@ -243,7 +264,7 @@ static void refill_skbs(void)
 static void zap_completion_queue(void)
 {
 	unsigned long flags;
-	struct softnet_data *sd = &get_cpu_var(softnet_data);
+	struct softnet_data *sd = this_cpu_ptr(&softnet_data);

 	if (sd->completion_queue) {
 		struct sk_buff *clist;
@@ -264,8 +285,6 @@ static void zap_completion_queue(void)
 			}
 		}
 	}
-
-	put_cpu_var(softnet_data);
 }

 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
@@ -314,7 +333,8 @@ static netdev_tx_t __netpoll_send_skb(st
 	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo;

-	lockdep_assert_irqs_disabled();
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		lockdep_assert_irqs_disabled();

 	dev = np->dev;
 	npinfo = rcu_dereference_bh(dev->npinfo);
@@ -350,7 +370,7 @@ static netdev_tx_t __netpoll_send_skb(st
 			udelay(USEC_PER_POLL);
 		}

-		WARN_ONCE(!irqs_disabled(),
+		WARN_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RT) && !irqs_disabled(),
 			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n",
 			dev->name, dev->netdev_ops->ndo_start_xmit);

@@ -365,16 +385,16 @@ static netdev_tx_t __netpoll_send_skb(st

 netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
-	unsigned long flags;
+	unsigned long __maybe_unused flags;
 	netdev_tx_t ret;

 	if (unlikely(!np)) {
 		dev_kfree_skb_irq(skb);
 		ret = NET_XMIT_DROP;
 	} else {
-		local_irq_save(flags);
+		netpoll_tx_begin(flags);
 		ret = __netpoll_send_skb(np, skb);
-		local_irq_restore(flags);
+		netpoll_tx_end(flags);
 	}
 	return ret;
 }



View attachment "trace" of type "text/plain" (5157 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ