[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87ms4bn1u9.fsf@toke.dk>
Date: Mon, 24 Nov 2025 11:48:14 +0100
From: Toke Høiland-Jørgensen <toke@...e.dk>
To: Xiang Mei <xmei5@....edu>, security@...nel.org
Cc: netdev@...r.kernel.org, xiyou.wangcong@...il.com,
cake@...ts.bufferbloat.net, bestswngs@...il.com, Xiang Mei <xmei5@....edu>
Subject: Re: [PATCH net v5] net/sched: sch_cake: Fix incorrect qlen
reduction in cake_drop
Xiang Mei <xmei5@....edu> writes:
> In cake_drop(), qdisc_tree_reduce_backlog() is used to update the qlen
> and backlog of the qdisc hierarchy. Its caller, cake_enqueue(), assumes
> that the parent qdisc will enqueue the current packet. However, this
> assumption breaks when cake_enqueue() returns NET_XMIT_CN: the parent
> qdisc stops enqueuing current packet, leaving the tree qlen/backlog
> accounting inconsistent. This mismatch can lead to a NULL dereference
> (e.g., when the parent Qdisc is qfq_qdisc).
>
> This patch computes the qlen/backlog delta in a more robust way by
> observing the difference before and after the series of cake_drop()
> calls, and then compensates the qdisc tree accounting if cake_enqueue()
> returns NET_XMIT_CN.
>
> To ensure correct compensation when ACK thinning is enabled, a new
> variable is introduced to keep qlen unchanged.
>
> Fixes: 15de71d06a40 ("net/sched: Make cake_enqueue return NET_XMIT_CN when past buffer_limit")
> Signed-off-by: Xiang Mei <xmei5@....edu>
> ---
> v2: add missing cc
> v3: move qdisc_tree_reduce_backlog out of cake_drop
> v4: remove redundant variable and handle ack branch correctly
> v5: add the PoC as a test case
Please split the test case into its own patch and send both as a series.
Otherwise, the changes LGTM apart from the few nits below:
> ---
> net/sched/sch_cake.c | 52 +++++++++++--------
> .../tc-testing/tc-tests/qdiscs/cake.json | 28 ++++++++++
> 2 files changed, 58 insertions(+), 22 deletions(-)
>
> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
> index 32bacfc314c2..cf4d6454ca9c 100644
> --- a/net/sched/sch_cake.c
> +++ b/net/sched/sch_cake.c
> @@ -1597,7 +1597,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
>
> qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT);
> sch->q.qlen--;
> - qdisc_tree_reduce_backlog(sch, 1, len);
>
> cake_heapify(q, 0);
>
> @@ -1750,7 +1749,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> ktime_t now = ktime_get();
> struct cake_tin_data *b;
> struct cake_flow *flow;
> - u32 idx, tin;
> + u32 idx, tin, prev_qlen, prev_backlog, drop_id;
> + bool same_flow = false;
Please make sure to maintain the reverse x-mas tree ordering of the
variable declarations.
>
> /* choose flow to insert into */
> idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
> @@ -1823,6 +1823,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> consume_skb(skb);
> } else {
> /* not splitting */
> + int ack_pkt_len = 0;
> +
> cobalt_set_enqueue_time(skb, now);
> get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
> flow_queue_add(flow, skb);
> @@ -1834,7 +1836,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> b->ack_drops++;
> sch->qstats.drops++;
> b->bytes += qdisc_pkt_len(ack);
> - len -= qdisc_pkt_len(ack);
> + ack_pkt_len = qdisc_pkt_len(ack);
There's a qdisc_tree_reduce_backlog() that uses qdisc_pkt_len(ack) just
below this; let's also change that to use ack_pkt_len while we're at it.
> q->buffer_used += skb->truesize - ack->truesize;
> if (q->rate_flags & CAKE_FLAG_INGRESS)
> cake_advance_shaper(q, b, ack, now, true);
> @@ -1848,11 +1850,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>
> /* stats */
> b->packets++;
> - b->bytes += len;
> - b->backlogs[idx] += len;
> - b->tin_backlog += len;
> - sch->qstats.backlog += len;
> - q->avg_window_bytes += len;
> + b->bytes += len - ack_pkt_len;
> + b->backlogs[idx] += len - ack_pkt_len;
> + b->tin_backlog += len - ack_pkt_len;
> + sch->qstats.backlog += len - ack_pkt_len;
> + q->avg_window_bytes += len - ack_pkt_len;
> }
>
> if (q->overflow_timeout)
> @@ -1927,24 +1929,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> if (q->buffer_used > q->buffer_max_used)
> q->buffer_max_used = q->buffer_used;
>
> - if (q->buffer_used > q->buffer_limit) {
> - bool same_flow = false;
> - u32 dropped = 0;
> - u32 drop_id;
> + if (q->buffer_used <= q->buffer_limit)
> + return NET_XMIT_SUCCESS;
>
> - while (q->buffer_used > q->buffer_limit) {
> - dropped++;
> - drop_id = cake_drop(sch, to_free);
> + prev_qlen = sch->q.qlen;
> + prev_backlog = sch->qstats.backlog;
>
> - if ((drop_id >> 16) == tin &&
> - (drop_id & 0xFFFF) == idx)
> - same_flow = true;
> - }
> - b->drop_overlimit += dropped;
> + while (q->buffer_used > q->buffer_limit) {
> + drop_id = cake_drop(sch, to_free);
> + if ((drop_id >> 16) == tin &&
> + (drop_id & 0xFFFF) == idx)
> + same_flow = true;
> + }
> +
> + /* Compute the droppped qlen and pkt length */
> + prev_qlen -= sch->q.qlen;
> + prev_backlog -= sch->qstats.backlog;
> + b->drop_overlimit += prev_backlog;
drop_overlimit was accounted in packets before, so this should be += prev_qlen.
-Toke
Powered by blists - more mailing lists