[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251124-mq-cake-sub-qdisc-v1-4-a2ff1dab488f@redhat.com>
Date: Mon, 24 Nov 2025 15:59:35 +0100
From: Toke Høiland-Jørgensen <toke@...hat.com>
To: Toke Høiland-Jørgensen <toke@...e.dk>,
Jamal Hadi Salim <jhs@...atatu.com>, Cong Wang <xiyou.wangcong@...il.com>,
Jiri Pirko <jiri@...nulli.us>, "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>
Cc: Jonas Köppeler <j.koeppeler@...berlin.de>,
cake@...ts.bufferbloat.net, netdev@...r.kernel.org,
Toke Høiland-Jørgensen <toke@...hat.com>
Subject: [PATCH net-next 4/4] net/sched: sch_cake: share shaper state
across sub-instances of cake_mq
From: Jonas Köppeler <j.koeppeler@...berlin.de>
This commit adds shared shaper state across the cake instances beneath a
cake_mq qdisc. It works by periodically tracking the number of active
instances, and scaling the configured rate by the number of active
queues.
The scan is lockless and simply reads the qlen and the last_active state
variable of each of the instances configured beneath the parent cake_mq
instance. Locking is not required since the values are only updated by
the owning instance, and eventual consistency is sufficient for the
purpose of estimating the number of active queues.
The interval for scanning the number of active queues is set to 200 us.
We found this to be a good tradeoff between overhead and response time.
For a detailed analysis of this aspect see the Netdevconf talk:
https://netdevconf.info/0x19/docs/netdev-0x19-paper16-talk-paper.pdf
Signed-off-by: Jonas Köppeler <j.koeppeler@...berlin.de>
Reviewed-by: Jamal Hadi Salim <jhs@...atatu.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
---
Documentation/netlink/specs/tc.yaml | 3 +++
include/uapi/linux/pkt_sched.h | 1 +
net/sched/sch_cake.c | 51 +++++++++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+)
diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml
index b398f7a46dae..2e663333a279 100644
--- a/Documentation/netlink/specs/tc.yaml
+++ b/Documentation/netlink/specs/tc.yaml
@@ -2207,6 +2207,9 @@ attribute-sets:
-
name: blue-timer-us
type: s32
+ -
+ name: active-queues
+ type: u32
-
name: cake-tin-stats-attrs
name-prefix: tca-cake-tin-stats-
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index c2da76e78bad..66e8072f44df 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1036,6 +1036,7 @@ enum {
TCA_CAKE_STATS_DROP_NEXT_US,
TCA_CAKE_STATS_P_DROP,
TCA_CAKE_STATS_BLUE_TIMER_US,
+ TCA_CAKE_STATS_ACTIVE_QUEUES,
__TCA_CAKE_STATS_MAX
};
#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 7ceccbfaa9b6..a04aafb129c4 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -201,6 +201,7 @@ struct cake_sched_config {
u64 rate_bps;
u64 interval;
u64 target;
+ u64 sync_time;
u32 buffer_config_limit;
u32 fwmark_mask;
u16 fwmark_shft;
@@ -257,6 +258,11 @@ struct cake_sched_data {
u16 max_adjlen;
u16 min_netlen;
u16 min_adjlen;
+
+ /* mq sync state */
+ u64 last_checked_active;
+ u64 last_active;
+ u32 active_queues;
};
enum {
@@ -383,6 +389,8 @@ static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
1239850263, 1191209601, 1147878294, 1108955788
};
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+ u64 target_ns, u64 rtt_est_ns);
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
*
@@ -2002,6 +2010,40 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
u64 delay;
u32 len;
+ if (q->config->is_shared &&
+ now - q->last_checked_active >= q->config->sync_time) { //check every 1ms is the default
+ struct net_device *dev = qdisc_dev(sch);
+ struct cake_sched_data *other_priv;
+ u64 new_rate = q->config->rate_bps;
+ u64 other_qlen, other_last_active;
+ struct Qdisc *other_sch;
+ u32 num_active_qs = 1;
+ unsigned int ntx;
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ other_sch = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
+ other_priv = qdisc_priv(other_sch);
+
+ if (other_priv == q)
+ continue;
+
+ other_qlen = READ_ONCE(other_sch->q.qlen);
+ other_last_active = READ_ONCE(other_priv->last_active);
+
+ if (other_qlen || other_last_active > q->last_checked_active)
+ num_active_qs++;
+ }
+
+ if (num_active_qs > 1)
+ new_rate = div64_u64(q->config->rate_bps, num_active_qs);
+
+ /* mtu = 0 is used to only update the rate and not mess with cobalt params */
+ cake_set_rate(b, new_rate, 0, 0, 0);
+ q->last_checked_active = now;
+ q->rate_ns = b->tin_rate_ns;
+ q->rate_shft = b->tin_rate_shft;
+ }
+
begin:
if (!sch->q.qlen)
return NULL;
@@ -2201,6 +2243,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
b->tin_ecn_mark += !!flow->cvars.ecn_marked;
qdisc_bstats_update(sch, skb);
+ q->last_active = now;
/* collect delay stats */
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
@@ -2301,6 +2344,9 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
b->tin_rate_ns = rate_ns;
b->tin_rate_shft = rate_shft;
+ if (mtu == 0)
+ return;
+
byte_target_ns = (byte_target * rate_ns) >> rate_shft;
b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
@@ -2763,6 +2809,7 @@ static void cake_config_init(struct cake_sched_config *q, bool is_shared)
*/
q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
q->is_shared = is_shared;
+ q->sync_time = 200 * NSEC_PER_USEC;
}
static int cake_init(struct Qdisc *sch, struct nlattr *opt,
@@ -2834,6 +2881,9 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
qd->avg_peak_bandwidth = q->rate_bps;
qd->min_netlen = ~0;
qd->min_adjlen = ~0;
+ qd->active_queues = 0;
+ qd->last_checked_active = 0;
+
return 0;
err:
kvfree(qd->config);
@@ -2967,6 +3017,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+ PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues);
#undef PUT_STAT_U32
#undef PUT_STAT_U64
--
2.51.2
Powered by blists - more mailing lists