lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ad4c6ff2-afd3-48ce-b55b-c9ea51168c79@kernel.org>
Date: Tue, 29 Jul 2025 17:45:27 -0600
From: David Ahern <dsahern@...nel.org>
To: chia-yu.chang@...ia-bell-labs.com, alok.a.tiwari@...cle.com,
 donald.hunter@...il.com, xandfury@...il.com, netdev@...r.kernel.org,
 dave.taht@...il.com, pabeni@...hat.com, jhs@...atatu.com, kuba@...nel.org,
 stephen@...workplumber.org, xiyou.wangcong@...il.com, jiri@...nulli.us,
 davem@...emloft.net, edumazet@...gle.com, horms@...nel.org,
 andrew+netdev@...n.ch, ast@...erby.net, liuhangbin@...il.com,
 shuah@...nel.org, linux-kselftest@...r.kernel.org, ij@...nel.org,
 ncardwell@...gle.com, koen.de_schepper@...ia-bell-labs.com,
 g.white@...lelabs.com, ingemar.s.johansson@...csson.com,
 mirja.kuehlewind@...csson.com, cheshire@...le.com, rs.ietf@....at,
 Jason_Livingood@...cast.com, vidhi_goel@...le.com
Cc: Olga Albisser <olga@...isser.org>,
 Oliver Tilmans <olivier.tilmans@...ia.com>,
 Bob Briscoe <research@...briscoe.net>, Henrik Steen <henrist@...rist.net>
Subject: Re: [PATCH v11 iproute2-next 1/1] tc: add dualpi2 scheduler module

On 7/17/25 5:23 PM, chia-yu.chang@...ia-bell-labs.com wrote:
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index 958d9407..15d1a37a 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h

you can drop the uapi changes.

> diff --git a/include/utils.h b/include/utils.h
> index 9a81494d..91e6e31f 100644
> --- a/include/utils.h
> +++ b/include/utils.h
> @@ -146,6 +146,8 @@ int read_prop(const char *dev, char *prop, long *value);
>  int get_long(long *val, const char *arg, int base);
>  int get_integer(int *val, const char *arg, int base);
>  int get_unsigned(unsigned *val, const char *arg, int base);
> +int get_float(float *val, const char *arg);
> +int get_float_min_max(float *val, const char *arg, float min, float max);
>  int get_time_rtt(unsigned *val, const char *arg, int *raw);
>  #define get_byte get_u8
>  #define get_ushort get_u16
> diff --git a/ip/iplink_can.c b/ip/iplink_can.c
> index fcffa852..9f6084e6 100644
> --- a/ip/iplink_can.c
> +++ b/ip/iplink_can.c
> @@ -67,20 +67,6 @@ static void usage(void)
>  	print_usage(stderr);
>  }
>  
> -static int get_float(float *val, const char *arg)
> -{
> -	float res;
> -	char *ptr;
> -
> -	if (!arg || !*arg)
> -		return -1;
> -	res = strtof(arg, &ptr);
> -	if (!ptr || ptr == arg || *ptr)
> -		return -1;
> -	*val = res;
> -	return 0;
> -}
> -
>  static void set_ctrlmode(char *name, char *arg,
>  			 struct can_ctrlmode *cm, __u32 flags)
>  {
> diff --git a/lib/utils.c b/lib/utils.c
> index 706e93c3..dd242d4d 100644
> --- a/lib/utils.c
> +++ b/lib/utils.c
> @@ -220,6 +220,36 @@ int get_unsigned(unsigned int *val, const char *arg, int base)
>  	return 0;
>  }
>  
> +int get_float(float *val, const char *arg)
> +{
> +	float res;
> +	char *ptr;
> +
> +	if (!arg || !*arg)
> +		return -1;
> +	res = strtof(arg, &ptr);
> +	if (!ptr || ptr == arg || *ptr)
> +		return -1;
> +	*val = res;
> +	return 0;
> +}

Put the move of get_float in a standlone patch indicating it is a code
move.


> +
> +int get_float_min_max(float *val, const char *arg, float min, float max)
> +{
> +	float res;
> +	char *ptr;
> +
> +	if (!arg || !*arg)
> +		return -1;
> +	res = strtof(arg, &ptr);
> +	if (!ptr || ptr == arg || *ptr)
> +		return -1;
> +	if (res < min || res > max)
> +		return -1;
> +	*val = res;
> +	return 0;
> +}
> +
>  /*
>   * get_time_rtt is "translated" from a similar routine "get_time" in
>   * tc_util.c.  We don't use the exact same routine because tc passes

Add get_float_min_max in a standalone patch.



> diff --git a/tc/q_dualpi2.c b/tc/q_dualpi2.c
> new file mode 100644
> index 00000000..50d52aad
> --- /dev/null
> +++ b/tc/q_dualpi2.c
> @@ -0,0 +1,528 @@
> +// SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
> +/* Copyright (C) 2024 Nokia
> + *
> + * Author: Koen De Schepper <koen.de_schepper@...ia-bell-labs.com>
> + * Author: Olga Albisser <olga@...isser.org>
> + * Author: Henrik Steen <henrist@...rist.net>
> + * Author: Olivier Tilmans <olivier.tilmans@...ia.com>
> + * Author: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
> + *
> + * DualPI Improved with a Square (dualpi2):
> + * - Supports congestion controls that comply with the Prague requirements
> + *   in RFC9331 (e.g. TCP-Prague)
> + * - Supports coupled dual-queue with PI2 as defined in RFC9332
> + * - Supports ECN L4S-identifier (IP.ECN==0b*1)
> + *
> + * note: Although DCTCP and BBRv3 can use shallow-threshold ECN marks,
> + *   they do not meet the 'Prague L4S Requirements' listed in RFC 9331
> + *   Section 4, so they can only be used with DualPI2 in a datacenter
> + *   context.
> + *
> + * References:
> + * - RFC9332: https://datatracker.ietf.org/doc/html/rfc9332
> + * - De Schepper, Koen, et al. "PI 2: A linearized AQM for both classic and
> + *   scalable TCP."  in proc. ACM CoNEXT'16, 2016.
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <syslog.h>
> +#include <fcntl.h>
> +#include <sys/socket.h>
> +#include <netinet/in.h>
> +#include <arpa/inet.h>
> +#include <string.h>
> +#include <math.h>
> +#include <errno.h>
> +
> +#include "utils.h"
> +#include "tc_util.h"
> +
> +#define MAX_PROB ((uint32_t)(~0U))
> +#define DEFAULT_ALPHA_BETA ((uint32_t)(~0U))
> +#define ALPHA_BETA_MAX ((2 << 23) - 1) /* see net/sched/sch_dualpi2.c */
> +#define ALPHA_BETA_SCALE (1 << 8)
> +#define RTT_TYP_TO_MAX 6
> +
> +static const char *get_credit_queue(int credit)
> +{
> +	return credit > 0 ? "C-queue" : "L-queue";
> +}
> +
> +static const char *get_ecn_type(uint8_t ect)
> +{
> +	switch (ect & TC_DUALPI2_ECN_MASK_ANY_ECT) {
> +	case TC_DUALPI2_ECN_MASK_L4S_ECT: return "l4s_ect";
> +	case TC_DUALPI2_ECN_MASK_CLA_ECT:
> +	case TC_DUALPI2_ECN_MASK_ANY_ECT: return "any_ect";
> +	default:
> +		fprintf(stderr,
> +			"Warning: Unexpected ecn type %u!\n", ect);
> +		return "";
> +	}
> +}
> +
> +static const char *get_ecn_type_json(uint8_t ect)
> +{
> +	switch (ect & TC_DUALPI2_ECN_MASK_ANY_ECT) {
> +	case TC_DUALPI2_ECN_MASK_L4S_ECT: return "l4s-ect";
> +	case TC_DUALPI2_ECN_MASK_CLA_ECT:
> +	case TC_DUALPI2_ECN_MASK_ANY_ECT: return "any-ect";
> +	default:
> +		fprintf(stderr,
> +			"Warning: Unexpected ecn type %u!\n", ect);
> +		return "";
> +	}
> +}
> +
> +static void explain(void)
> +{
> +	fprintf(stderr, "Usage: ... dualpi2\n");
> +	fprintf(stderr, "               [limit PACKETS]\n");
> +	fprintf(stderr, "               [memlimit BYTES]\n");
> +	fprintf(stderr, "               [coupling_factor NUMBER]\n");
> +	fprintf(stderr, "               [step_thresh TIME|PACKETS]\n");
> +	fprintf(stderr, "               [min_qlen_step PACKETS]\n");
> +	fprintf(stderr, "               [drop_on_overload|overflow]\n");
> +	fprintf(stderr, "               [drop_enqueue|drop_dequeue]\n");
> +	fprintf(stderr, "               [classic_protection PERCENTAGE]\n");
> +	fprintf(stderr, "               [max_rtt TIME [typical_rtt TIME]]\n");
> +	fprintf(stderr, "               [target TIME] [tupdate TIME]\n");
> +	fprintf(stderr, "               [alpha ALPHA] [beta BETA]\n");
> +	fprintf(stderr, "               [split_gso|no_split_gso]\n");
> +}
> +
> +static int get_packets(uint32_t *val, const char *arg)
> +{
> +	unsigned long res;
> +	char *ptr;
> +
> +	if (!arg || !*arg)
> +		return -1;
> +	res = strtoul(arg, &ptr, 10);
> +	if (!ptr || ptr == arg ||
> +	    !(matches(ptr, "pkts") == 0 || matches(ptr, "packets") == 0))

we are not allowing any more uses of "matches".


> +		return -1;
> +	if (res == ULONG_MAX && errno == ERANGE)
> +		return -1;
> +	if (res > 0xFFFFFFFFUL)
> +		return -1;
> +	*val = res;
> +
> +	return 0;
> +}
> +
> +static int parse_alpha_beta(const char *name, char *argv, uint32_t *field)
> +{
> +
> +	float field_f;
> +
> +	if (get_float_min_max(&field_f, argv, 0.0, ALPHA_BETA_MAX)) {
> +		fprintf(stderr, "Illegal \"%s\"\n", name);
> +		return -1;
> +	} else if (field_f < 1.0f / ALPHA_BETA_SCALE)
> +		fprintf(stderr,
> +			"Warning: \"%s\" is too small and will be rounded to zero.\n",
> +			name);
> +	*field = (uint32_t)(field_f * ALPHA_BETA_SCALE);
> +
> +	return 0;
> +}
> +
> +static int try_get_percent(int *val, const char *arg)
> +{
> +	double per;
> +
> +	if (parse_percent(&per, arg))
> +		return -1;
> +
> +	*val = rint(per * 100);
> +
> +	return 0;
> +}
> +
> +static int dualpi2_parse_opt(const struct qdisc_util *qu, int argc,
> +			     char **argv, struct nlmsghdr *n, const char *dev)
> +{
> +	uint32_t limit = 0;
> +	uint32_t memory_limit = 0;
> +	uint32_t target = 0;
> +	uint32_t tupdate = 0;
> +	uint32_t alpha = DEFAULT_ALPHA_BETA;
> +	uint32_t beta = DEFAULT_ALPHA_BETA;
> +	int32_t coupling_factor = -1;
> +	uint8_t ecn_mask = 0;
> +	int step_unit = __TCA_DUALPI2_MAX;
> +	uint32_t step_thresh = 0;
> +	uint32_t min_qlen_step =  0;
> +	bool set_min_qlen_step = false;
> +	int c_protection = -1;
> +	uint8_t drop_early = __TCA_DUALPI2_DROP_EARLY_MAX;
> +	uint8_t drop_overload = __TCA_DUALPI2_DROP_OVERLOAD_MAX;
> +	uint8_t split_gso = __TCA_DUALPI2_SPLIT_GSO_MAX;
> +	uint32_t rtt_max = 0;
> +	uint32_t rtt_typ = 0;
> +	struct rtattr *tail;

iproute2 follows kernel coding standards and netdev's preference for
reverse xmas tree listing of variables.


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ