[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250506165413.GW3339421@horms.kernel.org>
Date: Tue, 6 May 2025 17:54:13 +0100
From: Simon Horman <horms@...nel.org>
To: chia-yu.chang@...ia-bell-labs.com
Cc: donald.hunter@...il.com, xandfury@...il.com, netdev@...r.kernel.org,
dave.taht@...il.com, pabeni@...hat.com, jhs@...atatu.com,
kuba@...nel.org, stephen@...workplumber.org,
xiyou.wangcong@...il.com, jiri@...nulli.us, davem@...emloft.net,
edumazet@...gle.com, andrew+netdev@...n.ch, ast@...erby.net,
liuhangbin@...il.com, shuah@...nel.org,
linux-kselftest@...r.kernel.org, ij@...nel.org,
ncardwell@...gle.com, koen.de_schepper@...ia-bell-labs.com,
g.white@...lelabs.com, ingemar.s.johansson@...csson.com,
mirja.kuehlewind@...csson.com, cheshire@...le.com, rs.ietf@....at,
Jason_Livingood@...cast.com, vidhi_goel@...le.com
Subject: Re: [PATCH v14 net-next 1/5] sched: Struct definition and parsing of
dualpi2 qdisc
On Mon, May 05, 2025 at 11:48:33AM +0200, chia-yu.chang@...ia-bell-labs.com wrote:
> From: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
>
> DualPI2 is the reference implementation of IETF RFC9332 DualQ Coupled
> AQM (https://datatracker.ietf.org/doc/html/rfc9332) providing two
> queues called low latency (L-queue) and classic (C-queue). By default,
> it enqueues non-ECN and ECT(0) packets into the C-queue and ECT(1) and
> CE packets into the low latency queue (L-queue), as per IETF RFC9332 spec.
>
> This patch defines the dualpi2 Qdisc structure and parsing, and the
> following two patches include dumping and enqueue/dequeue for the DualPI2.
>
> Signed-off-by: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
...
> diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c
...
> +static int dualpi2_change(struct Qdisc *sch, struct nlattr *opt,
> + struct netlink_ext_ack *extack)
> +{
> + struct nlattr *tb[TCA_DUALPI2_MAX + 1];
> + struct dualpi2_sched_data *q;
> + int old_backlog;
> + int old_qlen;
> + int err;
> +
> + if (!opt)
> + return -EINVAL;
> + err = nla_parse_nested(tb, TCA_DUALPI2_MAX, opt, dualpi2_policy,
> + extack);
> + if (err < 0)
> + return err;
> +
> + q = qdisc_priv(sch);
> + sch_tree_lock(sch);
> +
> + if (tb[TCA_DUALPI2_LIMIT]) {
> + u32 limit = nla_get_u32(tb[TCA_DUALPI2_LIMIT]);
> +
> + WRITE_ONCE(sch->limit, limit);
> + WRITE_ONCE(q->memory_limit, get_memory_limit(sch, limit));
> + }
> +
> + if (tb[TCA_DUALPI2_MEMORY_LIMIT])
> + WRITE_ONCE(q->memory_limit,
> + nla_get_u32(tb[TCA_DUALPI2_MEMORY_LIMIT]));
> +
> + if (tb[TCA_DUALPI2_TARGET]) {
> + u64 target = nla_get_u32(tb[TCA_DUALPI2_TARGET]);
> +
> + WRITE_ONCE(q->pi2_target, target * NSEC_PER_USEC);
> + }
> +
> + if (tb[TCA_DUALPI2_TUPDATE]) {
> + u64 tupdate = nla_get_u32(tb[TCA_DUALPI2_TUPDATE]);
> +
> + WRITE_ONCE(q->pi2_tupdate, tupdate * NSEC_PER_USEC);
> + }
> +
> + if (tb[TCA_DUALPI2_ALPHA]) {
> + u32 alpha = nla_get_u32(tb[TCA_DUALPI2_ALPHA]);
> +
> + WRITE_ONCE(q->pi2_alpha, dualpi2_scale_alpha_beta(alpha));
> + }
> +
> + if (tb[TCA_DUALPI2_BETA]) {
> + u32 beta = nla_get_u32(tb[TCA_DUALPI2_BETA]);
> +
> + WRITE_ONCE(q->pi2_beta, dualpi2_scale_alpha_beta(beta));
> + }
> +
> + if (tb[TCA_DUALPI2_STEP_THRESH]) {
> + u32 step_th = nla_get_u32(tb[TCA_DUALPI2_STEP_THRESH]);
> + bool step_pkt = nla_get_flag(tb[TCA_DUALPI2_STEP_PACKETS]);
> +
> + WRITE_ONCE(q->step_in_packets, step_pkt);
> + WRITE_ONCE(q->step_thresh,
> + step_pkt ? step_th : (step_th * NSEC_PER_USEC));
> + }
> +
> + if (tb[TCA_DUALPI2_MIN_QLEN_STEP])
> + WRITE_ONCE(q->min_qlen_step,
> + nla_get_u32(tb[TCA_DUALPI2_MIN_QLEN_STEP]));
> +
> + if (tb[TCA_DUALPI2_COUPLING]) {
> + u8 coupling = nla_get_u8(tb[TCA_DUALPI2_COUPLING]);
> +
> + WRITE_ONCE(q->coupling_factor, coupling);
> + }
> +
> + if (tb[TCA_DUALPI2_DROP_OVERLOAD]) {
> + u8 drop_overload = nla_get_u8(tb[TCA_DUALPI2_DROP_OVERLOAD]);
> +
> + WRITE_ONCE(q->drop_overload, (bool)drop_overload);
> + }
> +
> + if (tb[TCA_DUALPI2_DROP_EARLY]) {
> + u8 drop_early = nla_get_u8(tb[TCA_DUALPI2_DROP_EARLY]);
> +
> + WRITE_ONCE(q->drop_early, (bool)drop_early);
> + }
> +
> + if (tb[TCA_DUALPI2_C_PROTECTION]) {
> + u8 wc = nla_get_u8(tb[TCA_DUALPI2_C_PROTECTION]);
> +
> + dualpi2_calculate_c_protection(sch, q, wc);
> + }
> +
> + if (tb[TCA_DUALPI2_ECN_MASK]) {
> + u8 ecn_mask = nla_get_u8(tb[TCA_DUALPI2_ECN_MASK]);
> +
> + if (ecn_mask != TCA_DUALPI2_ECN_MASK_L4S_ECT &&
> + ecn_mask != TCA_DUALPI2_ECN_MASK_ANY_ECT)
> + return -EINVAL;
sch_tree_lock() is leaked here.
Flagged by Sparse and Smatch.
> + WRITE_ONCE(q->ecn_mask, ecn_mask);
> + }
> +
> + if (tb[TCA_DUALPI2_SPLIT_GSO]) {
> + u8 split_gso = nla_get_u8(tb[TCA_DUALPI2_SPLIT_GSO]);
> +
> + WRITE_ONCE(q->split_gso, (bool)split_gso);
> + }
> +
> + old_qlen = qdisc_qlen(sch);
> + old_backlog = sch->qstats.backlog;
> + while (qdisc_qlen(sch) > sch->limit ||
> + q->memory_used > q->memory_limit) {
> + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
> +
> + q->memory_used -= skb->truesize;
> + qdisc_qstats_backlog_dec(sch, skb);
> + rtnl_qdisc_drop(skb, sch);
> + }
> + qdisc_tree_reduce_backlog(sch, old_qlen - qdisc_qlen(sch),
> + old_backlog - sch->qstats.backlog);
> +
> + sch_tree_unlock(sch);
> + return 0;
> +}
...
Powered by blists - more mailing lists