[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e7e282eeb90627d0d2a379ebb24b345675693a48.camel@gmx.de>
Date: Tue, 21 Dec 2021 10:44:08 +0100
From: Mike Galbraith <efault@....de>
To: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Cc: Thomas Gleixner <tglx@...utronix.de>,
LKML <linux-kernel@...r.kernel.org>,
RT <linux-rt-users@...r.kernel.org>
Subject: Re: [rcf/patch] netpoll: Make it RT friendly
On Fri, 2021-12-17 at 17:41 +0100, Mike Galbraith wrote:
> On Fri, 2021-12-17 at 14:59 +0100, Sebastian Andrzej Siewior wrote:
> > On 2021-12-15 20:54:40 [+0100], Mike Galbraith wrote:
> > > In fact, I had already taken the silence as a "Surely
> > > you jest" or such.
> >
> > No, not really. netpoll is currently the least of my worries and I
> > wanted to finish my other network patches before I start wrapping my
> > mind around this in case something changes. But I have currently no idea
> > how to do netpoll run/ detect and so on.
>
> I was gonna toss a rock or two at those "and so on" bits since they're
> wired into stuff my box uses regularly.. but I've yet to find a means
> to make them be more that compiler fodder, so they're safe.
Nah, I was just insufficiently bored.
It's surprising that patchlet worked at all with netpoll_tx_running()
being busted, but whatever, bridge now does its forwarding thing for RT
and !RT, transmitting twice per kmsg, though apparently only 1 actually
hits copper to fly over to console logger. See attached trace. Note
that for that trace I sabotaged it to alternately xmit via fallback.
This will likely end up being done differently (way prettier), but what
the heck, it beats a lump of coal in your xmas stocking.
Ho Ho Hum :)
-Mike
netpoll: Make it RT friendly
PREEMPT_RT cannot alloc/free memory when not preemptible, making
disabling of IRQs across transmission an issue for RT.
Use rcu_read_lock_bh() to provide local exclusion for RT (via
softirq_ctrl.lock) for normal and fallback transmission paths
instead of disabling IRQs. Since zap_completion_queue() callers
ensure pointer stability, replace get_cpu_var() therein with
this_cpu_ptr() to keep it preemptible across kfree().
Disable a couple warnings for RT, and we're done.
v0.kinda-works -> v1 feedback fixes:
remove get_cpu_var() from zap_completion_queue().
fix/test netpoll_tx_running() to work for RT/!RT.
fix/test xmit fallback path for RT.
Signed-off-by: Mike Galbraith <efault@....de>
---
include/linux/netpoll.h | 4 +++-
net/core/netpoll.c | 44 ++++++++++++++++++++++++++++++++------------
2 files changed, 35 insertions(+), 13 deletions(-)
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -89,9 +89,11 @@ static inline void netpoll_poll_unlock(v
smp_store_release(&napi->poll_owner, -1);
}
+DECLARE_PER_CPU(int, _netpoll_tx_running);
+
static inline bool netpoll_tx_running(struct net_device *dev)
{
- return irqs_disabled();
+ return this_cpu_read(_netpoll_tx_running);
}
#else
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -70,6 +70,27 @@ module_param(carrier_timeout, uint, 0644
#define np_notice(np, fmt, ...) \
pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
+DEFINE_PER_CPU(int, _netpoll_tx_running);
+EXPORT_PER_CPU_SYMBOL(_netpoll_tx_running);
+
+#define netpoll_tx_begin(flags) \
+ do { \
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \
+ local_irq_save(flags); \
+ else \
+ rcu_read_lock_bh(); \
+ this_cpu_write(_netpoll_tx_running, 1); \
+ } while (0)
+
+#define netpoll_tx_end(flags) \
+ do { \
+ this_cpu_write(_netpoll_tx_running, 0); \
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \
+ local_irq_restore(flags); \
+ else \
+ rcu_read_unlock_bh(); \
+ } while (0)
+
static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
struct net_device *dev,
struct netdev_queue *txq)
@@ -102,7 +123,7 @@ static void queue_process(struct work_st
struct netpoll_info *npinfo =
container_of(work, struct netpoll_info, tx_work.work);
struct sk_buff *skb;
- unsigned long flags;
+ unsigned long __maybe_unused flags;
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
@@ -114,7 +135,7 @@ static void queue_process(struct work_st
continue;
}
- local_irq_save(flags);
+ netpoll_tx_begin(flags);
/* check if skb->queue_mapping is still valid */
q_index = skb_get_queue_mapping(skb);
if (unlikely(q_index >= dev->real_num_tx_queues)) {
@@ -127,13 +148,13 @@ static void queue_process(struct work_st
!dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
skb_queue_head(&npinfo->txq, skb);
HARD_TX_UNLOCK(dev, txq);
- local_irq_restore(flags);
+ netpoll_tx_end(flags);
schedule_delayed_work(&npinfo->tx_work, HZ/10);
return;
}
HARD_TX_UNLOCK(dev, txq);
- local_irq_restore(flags);
+ netpoll_tx_end(flags);
}
}
@@ -243,7 +264,7 @@ static void refill_skbs(void)
static void zap_completion_queue(void)
{
unsigned long flags;
- struct softnet_data *sd = &get_cpu_var(softnet_data);
+ struct softnet_data *sd = this_cpu_ptr(&softnet_data);
if (sd->completion_queue) {
struct sk_buff *clist;
@@ -264,8 +285,6 @@ static void zap_completion_queue(void)
}
}
}
-
- put_cpu_var(softnet_data);
}
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
@@ -314,7 +333,8 @@ static netdev_tx_t __netpoll_send_skb(st
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo;
- lockdep_assert_irqs_disabled();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ lockdep_assert_irqs_disabled();
dev = np->dev;
npinfo = rcu_dereference_bh(dev->npinfo);
@@ -350,7 +370,7 @@ static netdev_tx_t __netpoll_send_skb(st
udelay(USEC_PER_POLL);
}
- WARN_ONCE(!irqs_disabled(),
+ WARN_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RT) && !irqs_disabled(),
"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n",
dev->name, dev->netdev_ops->ndo_start_xmit);
@@ -365,16 +385,16 @@ static netdev_tx_t __netpoll_send_skb(st
netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
- unsigned long flags;
+ unsigned long __maybe_unused flags;
netdev_tx_t ret;
if (unlikely(!np)) {
dev_kfree_skb_irq(skb);
ret = NET_XMIT_DROP;
} else {
- local_irq_save(flags);
+ netpoll_tx_begin(flags);
ret = __netpoll_send_skb(np, skb);
- local_irq_restore(flags);
+ netpoll_tx_end(flags);
}
return ret;
}
View attachment "trace" of type "text/plain" (5157 bytes)
Powered by blists - more mailing lists