netdev - Re: [PATCH v16 net-next 1/5] sched: Struct definition and parsing of dualpi2 qdisc

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <468e63af-7049-4c1e-a64d-fdbfa2b45855@redhat.com>
Date: Tue, 20 May 2025 13:48:23 +0200
From: Paolo Abeni <pabeni@...hat.com>
To: chia-yu.chang@...ia-bell-labs.com, horms@...nel.org,
 donald.hunter@...il.com, xandfury@...il.com, netdev@...r.kernel.org,
 dave.taht@...il.com, jhs@...atatu.com, kuba@...nel.org,
 stephen@...workplumber.org, xiyou.wangcong@...il.com, jiri@...nulli.us,
 davem@...emloft.net, edumazet@...gle.com, andrew+netdev@...n.ch,
 ast@...erby.net, liuhangbin@...il.com, shuah@...nel.org,
 linux-kselftest@...r.kernel.org, ij@...nel.org, ncardwell@...gle.com,
 koen.de_schepper@...ia-bell-labs.com, g.white@...lelabs.com,
 ingemar.s.johansson@...csson.com, mirja.kuehlewind@...csson.com,
 cheshire@...le.com, rs.ietf@....at, Jason_Livingood@...cast.com,
 vidhi_goel@...le.com
Subject: Re: [PATCH v16 net-next 1/5] sched: Struct definition and parsing of
 dualpi2 qdisc

On 5/16/25 2:01 AM, chia-yu.chang@...ia-bell-labs.com wrote:
> +static u32 calculate_probability(struct Qdisc *sch)
> +{
> +	struct dualpi2_sched_data *q = qdisc_priv(sch);
> +	u32 new_prob;
> +	u64 qdelay_c;
> +	u64 qdelay_l;
> +	u64 qdelay;
> +	s64 delta;
> +
> +	get_queue_delays(q, &qdelay_c, &qdelay_l);
> +	qdelay = max(qdelay_l, qdelay_c);
> +	/* Alpha and beta take at most 32b, i.e, the delay difference would
> +	 * overflow for queuing delay differences > ~4.2sec.
> +	 */
> +	delta = ((s64)qdelay - q->pi2_target) * q->pi2_alpha;
> +	delta += ((s64)qdelay - q->last_qdelay) * q->pi2_beta;

The abov code is confusing. What do you intend to obtain with the
explicit cast? the '+' left operand will be converted implicitly to
unsigned as C integer implicit conversion rules.

> +	if (delta > 0) {
> +		new_prob = __scale_delta(delta) + q->pi2_prob;
> +		if (new_prob < q->pi2_prob)
> +			new_prob = MAX_PROB;
> +	} else {
> +		new_prob = q->pi2_prob - __scale_delta(~delta + 1);
> +		if (new_prob > q->pi2_prob)
> +			new_prob = 0;
> +	}
> +	q->last_qdelay = qdelay;
> +	/* If we do not drop on overload, ensure we cap the L4S probability to
> +	 * 100% to keep window fairness when overflowing.
> +	 */
> +	if (!q->drop_overload)
> +		return min_t(u32, new_prob, MAX_PROB / q->coupling_factor);
> +	return new_prob;
> +}
> +
> +static u32 get_memory_limit(struct Qdisc *sch, u32 limit)
> +{
> +	/* Apply rule of thumb, i.e., doubling the packet length,
> +	 * to further include per packet overhead in memory_limit.
> +	 */
> +	u64 memlim = mul_u32_u32(limit, 2 * psched_mtu(qdisc_dev(sch)));
> +
> +	if (upper_32_bits(memlim))
> +		return 0xffffffff;

Pleas use U32_MAX.

[...]
> +static int dualpi2_change(struct Qdisc *sch, struct nlattr *opt,
> +			  struct netlink_ext_ack *extack)
> +{
> +	struct nlattr *tb[TCA_DUALPI2_MAX + 1];
> +	struct dualpi2_sched_data *q;
> +	int old_backlog;
> +	int old_qlen;
> +	int err;
> +
> +	if (!opt)
> +		return -EINVAL;
> +	err = nla_parse_nested(tb, TCA_DUALPI2_MAX, opt, dualpi2_policy,
> +			       extack);
> +	if (err < 0)
> +		return err;
> +
> +	q = qdisc_priv(sch);
> +	sch_tree_lock(sch);
> +
> +	if (tb[TCA_DUALPI2_LIMIT]) {
> +		u32 limit = nla_get_u32(tb[TCA_DUALPI2_LIMIT]);
> +
> +		WRITE_ONCE(sch->limit, limit);
> +		WRITE_ONCE(q->memory_limit, get_memory_limit(sch, limit));
> +	}
> +
> +	if (tb[TCA_DUALPI2_MEMORY_LIMIT])
> +		WRITE_ONCE(q->memory_limit,
> +			   nla_get_u32(tb[TCA_DUALPI2_MEMORY_LIMIT]));
> +
> +	if (tb[TCA_DUALPI2_TARGET]) {
> +		u64 target = nla_get_u32(tb[TCA_DUALPI2_TARGET]);
> +
> +		WRITE_ONCE(q->pi2_target, target * NSEC_PER_USEC);
> +	}
> +
> +	if (tb[TCA_DUALPI2_TUPDATE]) {
> +		u64 tupdate = nla_get_u32(tb[TCA_DUALPI2_TUPDATE]);
> +
> +		WRITE_ONCE(q->pi2_tupdate, convert_us_to_nsec(tupdate));
> +	}
> +
> +	if (tb[TCA_DUALPI2_ALPHA]) {
> +		u32 alpha = nla_get_u32(tb[TCA_DUALPI2_ALPHA]);
> +
> +		WRITE_ONCE(q->pi2_alpha, dualpi2_scale_alpha_beta(alpha));
> +	}
> +
> +	if (tb[TCA_DUALPI2_BETA]) {
> +		u32 beta = nla_get_u32(tb[TCA_DUALPI2_BETA]);
> +
> +		WRITE_ONCE(q->pi2_beta, dualpi2_scale_alpha_beta(beta));
> +	}
> +
> +	if (tb[TCA_DUALPI2_STEP_THRESH]) {
> +		u32 step_th = nla_get_u32(tb[TCA_DUALPI2_STEP_THRESH]);
> +		bool step_pkt = nla_get_flag(tb[TCA_DUALPI2_STEP_PACKETS]);
> +
> +		WRITE_ONCE(q->step_in_packets, step_pkt);
> +		WRITE_ONCE(q->step_thresh,
> +			   step_pkt ? step_th : convert_us_to_nsec(step_th));
> +	}
> +
> +	if (tb[TCA_DUALPI2_MIN_QLEN_STEP])
> +		WRITE_ONCE(q->min_qlen_step,
> +			   nla_get_u32(tb[TCA_DUALPI2_MIN_QLEN_STEP]));
> +
> +	if (tb[TCA_DUALPI2_COUPLING]) {
> +		u8 coupling = nla_get_u8(tb[TCA_DUALPI2_COUPLING]);
> +
> +		WRITE_ONCE(q->coupling_factor, coupling);
> +	}
> +
> +	if (tb[TCA_DUALPI2_DROP_OVERLOAD]) {
> +		u8 drop_overload = nla_get_u8(tb[TCA_DUALPI2_DROP_OVERLOAD]);
> +
> +		WRITE_ONCE(q->drop_overload, (bool)drop_overload);
> +	}
> +
> +	if (tb[TCA_DUALPI2_DROP_EARLY]) {
> +		u8 drop_early = nla_get_u8(tb[TCA_DUALPI2_DROP_EARLY]);
> +
> +		WRITE_ONCE(q->drop_early, (bool)drop_early);
> +	}
> +
> +	if (tb[TCA_DUALPI2_C_PROTECTION]) {
> +		u8 wc = nla_get_u8(tb[TCA_DUALPI2_C_PROTECTION]);
> +
> +		dualpi2_calculate_c_protection(sch, q, wc);
> +	}
> +
> +	if (tb[TCA_DUALPI2_ECN_MASK]) {
> +		u8 ecn_mask = nla_get_u8(tb[TCA_DUALPI2_ECN_MASK]);
> +
> +		WRITE_ONCE(q->ecn_mask, ecn_mask);
> +	}
> +
> +	if (tb[TCA_DUALPI2_SPLIT_GSO]) {
> +		u8 split_gso = nla_get_u8(tb[TCA_DUALPI2_SPLIT_GSO]);
> +
> +		WRITE_ONCE(q->split_gso, (bool)split_gso);
> +	}
> +
> +	old_qlen = qdisc_qlen(sch);
> +	old_backlog = sch->qstats.backlog;
> +	while (qdisc_qlen(sch) > sch->limit ||
> +	       q->memory_used > q->memory_limit) {
> +		struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);

As per commit 2d3cbfd6d54a2c39ce3244f33f85c595844bd7b8, the above should be:

		struct sk_buff *skb = qdisc_dequeue_internal(sch, true);

/P