[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260107104159.3669285-1-edumazet@google.com>
Date: Wed, 7 Jan 2026 10:41:59 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>, netdev@...r.kernel.org, eric.dumazet@...il.com,
Eric Dumazet <edumazet@...gle.com>, Neal Cardwell <ncardwell@...gle.com>
Subject: [PATCH net] net: add net.core.qdisc_max_burst
In blamed commit, I added a check against the temporary queue
built in __dev_xmit_skb(). Idea was to drop packets early,
before any spinlock was acquired.
if (unlikely(defer_count > READ_ONCE(q->limit))) {
kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
return NET_XMIT_DROP;
}
It turned out that HTB Qdisc has a zero q->limit.
HTB limits packets on a per-class basis.
Some of our tests became flaky.
Add a new sysctl : net.core.qdisc_max_burst to control
how many packets can be stored in the temporary lockless queue.
Also add a new QDISC_BURST_DROP drop reason to better diagnose
future issues.
Thanks Neal !
Fixes: 100dfa74cad9 ("net: dev_queue_xmit() llist adoption")
Reported-and-bisected-by: Neal Cardwell <ncardwell@...gle.com>
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
Documentation/admin-guide/sysctl/net.rst | 8 ++++++++
include/net/dropreason-core.h | 6 ++++++
include/net/hotdata.h | 1 +
net/core/dev.c | 6 +++---
net/core/hotdata.c | 1 +
net/core/sysctl_net_core.c | 7 +++++++
6 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 369a738a68193e897d880eeb2c5a22cd90833938..91fa4ccd326c2b6351fd028a1c5d1c69126bee5f 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -303,6 +303,14 @@ netdev_max_backlog
Maximum number of packets, queued on the INPUT side, when the interface
receives packets faster than kernel can process them.
+qdisc_max_burst
+------------------
+
+Maximum number of packets that can be temporarily stored before
+reaching qdisc.
+
+Default: 1000
+
netdev_rss_key
--------------
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 58d91ccc56e0b54368c432fb9075ab174dc3a09f..a7b7abd66e215c4bcaece6f00ca03de3ac81396f 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -67,6 +67,7 @@
FN(TC_EGRESS) \
FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
+ FN(QDISC_BURST_DROP) \
FN(QDISC_OVERLIMIT) \
FN(QDISC_CONGESTED) \
FN(CAKE_FLOOD) \
@@ -374,6 +375,11 @@ enum skb_drop_reason {
* failed to enqueue to current qdisc)
*/
SKB_DROP_REASON_QDISC_DROP,
+ /**
+ * @SKB_DROP_REASON_QDISC_BURST_DROP: dropped when net.core.qdisc_max_burst
+ * limit is hit.
+ */
+ SKB_DROP_REASON_QDISC_BURST_DROP,
/**
* @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
* instance exceeds its total buffer size limit.
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 4acec191c54ab367ca12fff590d1f8c8aad64651..6632b1aa7584821fd4ab42163b77dfff6732a45e 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -42,6 +42,7 @@ struct net_hotdata {
int netdev_budget_usecs;
int tstamp_prequeue;
int max_backlog;
+ int qdisc_max_burst;
int dev_tx_weight;
int dev_rx_weight;
int sysctl_max_skb_frags;
diff --git a/net/core/dev.c b/net/core/dev.c
index 36dc5199037edb1506e67f6ab5e977ff41efef59..a8238023774407adbdb2e5d09dc009802870bd4e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4190,8 +4190,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
do {
if (first_n && !defer_count) {
defer_count = atomic_long_inc_return(&q->defer_count);
- if (unlikely(defer_count > READ_ONCE(q->limit))) {
- kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
+ if (unlikely(defer_count > READ_ONCE(net_hotdata.qdisc_max_burst))) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_BURST_DROP);
return NET_XMIT_DROP;
}
}
@@ -4209,7 +4209,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
ll_list = llist_del_all(&q->defer_list);
/* There is a small race because we clear defer_count not atomically
* with the prior llist_del_all(). This means defer_list could grow
- * over q->limit.
+ * over qdisc_max_burst.
*/
atomic_long_set(&q->defer_count, 0);
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index dddd5c287cf08ba75aec1cc546fd1bc48c0f7b26..a6db365808178d243f53ae1a817938fb17c3f968 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -17,6 +17,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.tstamp_prequeue = 1,
.max_backlog = 1000,
+ .qdisc_max_burst = 1000,
.dev_tx_weight = 64,
.dev_rx_weight = 64,
.sysctl_max_skb_frags = MAX_SKB_FRAGS,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8d4decb2606fa18222a02e59dc889efa995d2eaa..05dd55cf8b58e6c6fce498a11c09f23fd56d8f34 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -429,6 +429,13 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "qdisc_max_burst",
+ .data = &net_hotdata.qdisc_max_burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{
.procname = "netdev_rss_key",
.data = &netdev_rss_key,
--
2.52.0.351.gbe84eed79e-goog
Powered by blists - more mailing lists