lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iLqUtGkgXj0BgrXJD8ckqrHkMriapKpwHNcMP06V_fAGQ@mail.gmail.com>
Date: Sun, 9 Nov 2025 04:54:26 -0800
From: Eric Dumazet <edumazet@...gle.com>
To: Toke Høiland-Jørgensen <toke@...hat.com>
Cc: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, 
	Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>, 
	Jamal Hadi Salim <jhs@...atatu.com>, Cong Wang <xiyou.wangcong@...il.com>, 
	Jiri Pirko <jiri@...nulli.us>, Kuniyuki Iwashima <kuniyu@...gle.com>, 
	Willem de Bruijn <willemb@...gle.com>, netdev@...r.kernel.org, eric.dumazet@...il.com
Subject: Re: [PATCH v1 net-next 5/5] net: dev_queue_xmit() llist adoption

On Sun, Nov 9, 2025 at 2:09 AM Eric Dumazet <edumazet@...gle.com> wrote:
>
>
> This might be something related to XDP, because I ran the following
> test (IDPF, 32 TX queues)
>
> tc qd replace dev eth1 root cake
> ./super_netperf 16 -H tjbp27 -t UDP_STREAM -l 1000 -- -m 64 -Nn &
>
> Before my series : ~360 Kpps
> After my series : ~550 Kpps

Or ... being faster uncovered an old qdisc bug.

I mentioned the 'requeues' because I have seen this counter lately,
and was wondering if this could
be a driver bug.

It seems the bug is in generic qdisc code: try_bulk_dequeue_skb() is
trusting BQL, but can not see the driver might block before BQL.

 I am testing the following patch, it would be great if this solution
works for you.

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d9a98d02a55fc361a223f3201e37b6a2b698bb5e..e18584604f0faab4e4d86a29565d7d982c9eb41d
100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -180,9 +180,10 @@ static inline void dev_requeue_skb(struct sk_buff
*skb, struct Qdisc *q)
 static void try_bulk_dequeue_skb(struct Qdisc *q,
                                 struct sk_buff *skb,
                                 const struct netdev_queue *txq,
-                                int *packets)
+                                int *packets, int quota)
 {
        int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
+       int cnt = 0;

        while (bytelimit > 0) {
                struct sk_buff *nskb = q->dequeue(q);
@@ -193,8 +194,10 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
                bytelimit -= nskb->len; /* covers GSO len */
                skb->next = nskb;
                skb = nskb;
-               (*packets)++; /* GSO counts as one pkt */
+               if (++cnt >= quota)
+                       break;
        }
+       (*packets) += cnt;
        skb_mark_not_on_list(skb);
 }

@@ -228,7 +231,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
  * A requeued skb (via q->gso_skb) can also be a SKB list.
  */
 static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
-                                  int *packets)
+                                  int *packets, int quota)
 {
        const struct netdev_queue *txq = q->dev_queue;
        struct sk_buff *skb = NULL;
@@ -295,7 +298,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc
*q, bool *validate,
        if (skb) {
 bulk:
                if (qdisc_may_bulk(q))
-                       try_bulk_dequeue_skb(q, skb, txq, packets);
+                       try_bulk_dequeue_skb(q, skb, txq, packets, quota);
                else
                        try_bulk_dequeue_skb_slow(q, skb, packets);
        }
@@ -387,7 +390,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
  *                             >0 - queue is not empty.
  *
  */
-static inline bool qdisc_restart(struct Qdisc *q, int *packets)
+static inline bool qdisc_restart(struct Qdisc *q, int *packets, int quota)
 {
        spinlock_t *root_lock = NULL;
        struct netdev_queue *txq;
@@ -396,7 +399,7 @@ static inline bool qdisc_restart(struct Qdisc *q,
int *packets)
        bool validate;

        /* Dequeue packet */
-       skb = dequeue_skb(q, &validate, packets);
+       skb = dequeue_skb(q, &validate, packets, quota);
        if (unlikely(!skb))
                return false;

@@ -414,7 +417,7 @@ void __qdisc_run(struct Qdisc *q)
        int quota = READ_ONCE(net_hotdata.dev_tx_weight);
        int packets;

-       while (qdisc_restart(q, &packets)) {
+       while (qdisc_restart(q, &packets, quota)) {
                quota -= packets;
                if (quota <= 0) {
                        if (q->flags & TCQ_F_NOLOCK)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ