[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89i+dL6JUpbZgJ9DEGeVWpmrfv9q=Y_daFvHAPM4ZsjinQg@mail.gmail.com>
Date: Mon, 10 Nov 2025 09:34:26 -0800
From: Eric Dumazet <edumazet@...gle.com>
To: Toke Høiland-Jørgensen <toke@...hat.com>
Cc: Jonas Köppeler <j.koeppeler@...berlin.de>,
"David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Simon Horman <horms@...nel.org>, Jamal Hadi Salim <jhs@...atatu.com>,
Cong Wang <xiyou.wangcong@...il.com>, Jiri Pirko <jiri@...nulli.us>,
Kuniyuki Iwashima <kuniyu@...gle.com>, Willem de Bruijn <willemb@...gle.com>, netdev@...r.kernel.org,
eric.dumazet@...il.com
Subject: Re: [PATCH v1 net-next 5/5] net: dev_queue_xmit() llist adoption
On Mon, Nov 10, 2025 at 6:49 AM Toke Høiland-Jørgensen <toke@...hat.com> wrote:
>
> Eric Dumazet <edumazet@...gle.com> writes:
>
> > I can work on a patch today.
>
> This sounds like an excellent idea in any case - thanks! :)
The following (on top of my last series) seems to work for me
include/net/pkt_sched.h | 5 +++--
include/net/sch_generic.h | 24 +++++++++++++++++++++++-
net/core/dev.c | 33 +++++++++++++++++++--------------
net/sched/sch_cake.c | 4 +++-
4 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 4678db45832a1e3bf7b8a07756fb89ab868bd5d2..e703c507d0daa97ae7c3bf131e322b1eafcc5664
100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -114,12 +114,13 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
void __qdisc_run(struct Qdisc *q);
-static inline void qdisc_run(struct Qdisc *q)
+static inline struct sk_buff *qdisc_run(struct Qdisc *q)
{
if (qdisc_run_begin(q)) {
__qdisc_run(q);
- qdisc_run_end(q);
+ return qdisc_run_end(q);
}
+ return NULL;
}
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 79501499dafba56271b9ebd97a8f379ffdc83cac..19cd2bc13bdba48f941b1599f896c15c8c7860ae
100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -88,6 +88,8 @@ struct Qdisc {
#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */
#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */
+#define TCQ_F_DEQUEUE_DROPS 0x400 /* ->dequeue() can drop packets
in q->to_free */
+
u32 limit;
const struct Qdisc_ops *ops;
struct qdisc_size_table __rcu *stab;
@@ -119,6 +121,8 @@ struct Qdisc {
/* Note : we only change qstats.backlog in fast path. */
struct gnet_stats_queue qstats;
+
+ struct sk_buff *to_free;
__cacheline_group_end(Qdisc_write);
@@ -218,8 +222,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
return true;
}
-static inline void qdisc_run_end(struct Qdisc *qdisc)
+static inline struct sk_buff *qdisc_run_end(struct Qdisc *qdisc)
{
+ struct sk_buff *to_free = NULL;
+
+ if (qdisc->flags & TCQ_F_DEQUEUE_DROPS) {
+ to_free = qdisc->to_free;
+ if (to_free)
+ qdisc->to_free = NULL;
+ }
if (qdisc->flags & TCQ_F_NOLOCK) {
spin_unlock(&qdisc->seqlock);
@@ -235,6 +246,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
} else {
WRITE_ONCE(qdisc->running, false);
}
+ return to_free;
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -1105,6 +1117,16 @@ static inline void tcf_set_drop_reason(const
struct sk_buff *skb,
tc_skb_cb(skb)->drop_reason = reason;
}
+static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb,
+ enum skb_drop_reason reason)
+{
+ DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS));
+
+ tcf_set_drop_reason(skb, reason);
+ skb->next = q->to_free;
+ q->to_free = skb;
+}
+
/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
diff --git a/net/core/dev.c b/net/core/dev.c
index ac994974e2a81889fcc0a2e664edcdb7cfd0496d..18cfcd765b1b3e4af1c5339e36df517e7abc914f
100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4141,7 +4141,7 @@ static inline int __dev_xmit_skb(struct sk_buff
*skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
{
- struct sk_buff *next, *to_free = NULL;
+ struct sk_buff *next, *to_free = NULL, *to_free2 = NULL;
spinlock_t *root_lock = qdisc_lock(q);
struct llist_node *ll_list, *first_n;
unsigned long defer_count = 0;
@@ -4160,9 +4160,9 @@ static inline int __dev_xmit_skb(struct sk_buff
*skb, struct Qdisc *q,
if (unlikely(!nolock_qdisc_is_empty(q))) {
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
__qdisc_run(q);
- qdisc_run_end(q);
+ to_free2 = qdisc_run_end(q);
- goto no_lock_out;
+ goto free_out;
}
qdisc_bstats_cpu_update(q, skb);
@@ -4170,18 +4170,15 @@ static inline int __dev_xmit_skb(struct
sk_buff *skb, struct Qdisc *q,
!nolock_qdisc_is_empty(q))
__qdisc_run(q);
- qdisc_run_end(q);
- return NET_XMIT_SUCCESS;
+ to_free2 = qdisc_run_end(q);
+ rc = NET_XMIT_SUCCESS;
+ goto free_out;
}
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
- qdisc_run(q);
+ to_free2 = qdisc_run(q);
-no_lock_out:
- if (unlikely(to_free))
- kfree_skb_list_reason(to_free,
- tcf_get_drop_reason(to_free));
- return rc;
+ goto free_out;
}
/* Open code llist_add(&skb->ll_node, &q->defer_list) + queue limit.
@@ -4239,7 +4236,7 @@ static inline int __dev_xmit_skb(struct sk_buff
*skb, struct Qdisc *q,
qdisc_bstats_update(q, skb);
if (sch_direct_xmit(skb, q, dev, txq, root_lock, true))
__qdisc_run(q);
- qdisc_run_end(q);
+ to_free2 = qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
int count = 0;
@@ -4251,15 +4248,19 @@ static inline int __dev_xmit_skb(struct
sk_buff *skb, struct Qdisc *q,
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
count++;
}
- qdisc_run(q);
+ to_free2 = qdisc_run(q);
if (count != 1)
rc = NET_XMIT_SUCCESS;
}
unlock:
spin_unlock(root_lock);
+free_out:
if (unlikely(to_free))
kfree_skb_list_reason(to_free,
tcf_get_drop_reason(to_free));
+ if (unlikely(to_free2))
+ kfree_skb_list_reason(to_free2,
+ tcf_get_drop_reason(to_free2));
return rc;
}
@@ -5741,6 +5742,7 @@ static __latent_entropy void net_tx_action(void)
}
if (sd->output_queue) {
+ struct sk_buff *to_free;
struct Qdisc *head;
local_irq_disable();
@@ -5780,9 +5782,12 @@ static __latent_entropy void net_tx_action(void)
}
clear_bit(__QDISC_STATE_SCHED, &q->state);
- qdisc_run(q);
+ to_free = qdisc_run(q);
if (root_lock)
spin_unlock(root_lock);
+ if (unlikely(to_free))
+ kfree_skb_list_reason(to_free,
+ tcf_get_drop_reason(to_free));
}
rcu_read_unlock();
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 312f5b000ffb67d74faf70f26d808e26315b4ab8..a717cc4e0606e80123ec9c76331d454dad699b69
100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -2183,7 +2183,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
b->tin_dropped++;
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_qstats_drop(sch);
- kfree_skb_reason(skb, reason);
+ qdisc_dequeue_drop(sch, skb, reason);
if (q->rate_flags & CAKE_FLAG_INGRESS)
goto retry;
}
@@ -2569,6 +2569,8 @@ static void cake_reconfigure(struct Qdisc *sch)
sch->flags &= ~TCQ_F_CAN_BYPASS;
+ sch->flags |= TCQ_F_DEQUEUE_DROPS;
+
q->buffer_limit = min(q->buffer_limit,
max(sch->limit * psched_mtu(qdisc_dev(sch)),
q->buffer_config_limit));
Powered by blists - more mailing lists