linux-kernel - Re: [PATCH net-next 04/13] net/sched: taprio: allow user input of per-tc max SDU

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87k065iqe1.fsf@intel.com>
Date:   Wed, 14 Sep 2022 14:43:02 -0700
From:   Vinicius Costa Gomes <vinicius.gomes@...el.com>
To:     Vladimir Oltean <vladimir.oltean@....com>, netdev@...r.kernel.org
Cc:     "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>,
        Xiaoliang Yang <xiaoliang.yang_1@....com>,
        Rui Sousa <rui.sousa@....com>,
        Claudiu Manoil <claudiu.manoil@....com>,
        Alexandre Belloni <alexandre.belloni@...tlin.com>,
        UNGLinuxDriver@...rochip.com, Andrew Lunn <andrew@...n.ch>,
        Vivien Didelot <vivien.didelot@...il.com>,
        Florian Fainelli <f.fainelli@...il.com>,
        Michael Walle <michael@...le.cc>,
        Maxim Kochetkov <fido_max@...ox.ru>,
        Colin Foster <colin.foster@...advantage.com>,
        Richie Pearn <richard.pearn@....com>,
        Kurt Kanzenbach <kurt@...utronix.de>,
        Vladimir Oltean <olteanv@...il.com>,
        Jesse Brandeburg <jesse.brandeburg@...el.com>,
        Tony Nguyen <anthony.l.nguyen@...el.com>,
        Giuseppe Cavallaro <peppe.cavallaro@...com>,
        Alexandre Torgue <alexandre.torgue@...s.st.com>,
        Jose Abreu <joabreu@...opsys.com>,
        Jamal Hadi Salim <jhs@...atatu.com>,
        Cong Wang <xiyou.wangcong@...il.com>,
        Jiri Pirko <jiri@...nulli.us>,
        Gerhard Engleder <gerhard@...leder-embedded.com>,
        Grygorii Strashko <grygorii.strashko@...com>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next 04/13] net/sched: taprio: allow user input of
 per-tc max SDU

Vladimir Oltean <vladimir.oltean@....com> writes:

> IEEE 802.1Q clause 12.29.1.1 "The queueMaxSDUTable structure and data
> types" and 8.6.8.4 "Enhancements for scheduled traffic" talk about the
> existence of a per traffic class limitation of maximum frame sizes, with
> a fallback on the port-based MTU.
>
> As far as I am able to understand, the 802.1Q Service Data Unit (SDU)
> represents the MAC Service Data Unit (MSDU, i.e. L2 payload), excluding
> any number of prepended VLAN headers which may be otherwise present in
> the MSDU. Therefore, the queueMaxSDU is directly comparable to the
> device MTU (1500 means L2 payload sizes are accepted, or frame sizes of
> 1518 octets, or 1522 plus one VLAN header). Drivers which offload this
> are directly responsible of translating into other units of measurement.
>
> Signed-off-by: Vladimir Oltean <vladimir.oltean@....com>
> ---
>  include/net/pkt_sched.h        |   1 +
>  include/uapi/linux/pkt_sched.h |  11 +++
>  net/sched/sch_taprio.c         | 122 ++++++++++++++++++++++++++++++++-
>  3 files changed, 133 insertions(+), 1 deletion(-)
>
> diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
> index 29f65632ebc5..88080998557b 100644
> --- a/include/net/pkt_sched.h
> +++ b/include/net/pkt_sched.h
> @@ -168,6 +168,7 @@ struct tc_taprio_qopt_offload {
>  	ktime_t base_time;
>  	u64 cycle_time;
>  	u64 cycle_time_extension;
> +	u32 max_sdu[TC_MAX_QUEUE];
>  
>  	size_t num_entries;
>  	struct tc_taprio_sched_entry entries[];
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index f292b467b27f..000eec106856 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -1232,6 +1232,16 @@ enum {
>  #define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST	_BITUL(0)
>  #define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD	_BITUL(1)
>  
> +enum {
> +	TCA_TAPRIO_TC_ENTRY_UNSPEC,
> +	TCA_TAPRIO_TC_ENTRY_INDEX,		/* u32 */
> +	TCA_TAPRIO_TC_ENTRY_MAX_SDU,		/* u32 */
> +
> +	/* add new constants above here */
> +	__TCA_TAPRIO_TC_ENTRY_CNT,
> +	TCA_TAPRIO_TC_ENTRY_MAX = (__TCA_TAPRIO_TC_ENTRY_CNT - 1)
> +};
> +
>  enum {
>  	TCA_TAPRIO_ATTR_UNSPEC,
>  	TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
> @@ -1245,6 +1255,7 @@ enum {
>  	TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
>  	TCA_TAPRIO_ATTR_FLAGS, /* u32 */
>  	TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */
> +	TCA_TAPRIO_ATTR_TC_ENTRY, /* nest */
>  	__TCA_TAPRIO_ATTR_MAX,
>  };
>  
> diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
> index 2a4b8f59f444..834cbed88e4f 100644
> --- a/net/sched/sch_taprio.c
> +++ b/net/sched/sch_taprio.c
> @@ -79,6 +79,7 @@ struct taprio_sched {
>  	struct sched_gate_list __rcu *admin_sched;
>  	struct hrtimer advance_timer;
>  	struct list_head taprio_list;
> +	u32 max_sdu[TC_MAX_QUEUE];
>  	u32 txtime_delay;
>  };
>  
> @@ -416,6 +417,9 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
>  			      struct Qdisc *child, struct sk_buff **to_free)
>  {
>  	struct taprio_sched *q = qdisc_priv(sch);
> +	struct net_device *dev = qdisc_dev(sch);
> +	int prio = skb->priority;
> +	u8 tc;
>  
>  	/* sk_flags are only safe to use on full sockets. */
>  	if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
> @@ -427,6 +431,12 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
>  			return qdisc_drop(skb, sch, to_free);
>  	}
>  
> +	/* Devices with full offload are expected to honor this in hardware */
> +	tc = netdev_get_prio_tc_map(dev, prio);
> +	if (q->max_sdu[tc] &&
> +	    q->max_sdu[tc] < max_t(int, 0, skb->len - skb_mac_header_len(skb)))
> +		return qdisc_drop(skb, sch, to_free);
> +

One minor idea, perhaps if you initialize q->max_sdu[] with a value that
you could use to compare here (2^32 - 1), this comparison could be
simplified. The issue is that that value would become invalid for a
maximum SDU, not a problem for ethernet.

>  	qdisc_qstats_backlog_inc(sch, skb);
>  	sch->q.qlen++;
>  
> @@ -761,6 +771,11 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
>  	[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
>  };
>  
> +static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
> +	[TCA_TAPRIO_TC_ENTRY_INDEX]	   = { .type = NLA_U32 },
> +	[TCA_TAPRIO_TC_ENTRY_MAX_SDU]	   = { .type = NLA_U32 },
> +};
> +
>  static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
>  	[TCA_TAPRIO_ATTR_PRIOMAP]	       = {
>  		.len = sizeof(struct tc_mqprio_qopt)
> @@ -773,6 +788,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
>  	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
>  	[TCA_TAPRIO_ATTR_FLAGS]                      = { .type = NLA_U32 },
>  	[TCA_TAPRIO_ATTR_TXTIME_DELAY]		     = { .type = NLA_U32 },
> +	[TCA_TAPRIO_ATTR_TC_ENTRY]		     = { .type = NLA_NESTED },
>  };
>  
>  static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
> @@ -1236,7 +1252,7 @@ static int taprio_enable_offload(struct net_device *dev,
>  {
>  	const struct net_device_ops *ops = dev->netdev_ops;
>  	struct tc_taprio_qopt_offload *offload;
> -	int err = 0;
> +	int tc, err = 0;
>  
>  	if (!ops->ndo_setup_tc) {
>  		NL_SET_ERR_MSG(extack,
> @@ -1253,6 +1269,9 @@ static int taprio_enable_offload(struct net_device *dev,
>  	offload->enable = 1;
>  	taprio_sched_to_offload(dev, sched, offload);
>  
> +	for (tc = 0; tc < TC_MAX_QUEUE; tc++)
> +		offload->max_sdu[tc] = q->max_sdu[tc];
> +
>  	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
>  	if (err < 0) {
>  		NL_SET_ERR_MSG(extack,
> @@ -1387,6 +1406,73 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
>  	return err;
>  }
>  
> +static int taprio_parse_tc_entry(struct Qdisc *sch,
> +				 struct nlattr *opt,
> +				 unsigned long *seen_tcs,
> +				 struct netlink_ext_ack *extack)
> +{
> +	struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
> +	struct taprio_sched *q = qdisc_priv(sch);
> +	struct net_device *dev = qdisc_dev(sch);
> +	u32 max_sdu = 0;
> +	int err, tc;
> +
> +	err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
> +			       taprio_tc_policy, extack);
> +	if (err < 0)
> +		return err;
> +
> +	if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) {
> +		NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
> +		return -EINVAL;
> +	}
> +
> +	tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
> +	if (tc >= TC_QOPT_MAX_QUEUE) {
> +		NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range");
> +		return -ERANGE;
> +	}
> +
> +	if (*seen_tcs & BIT(tc)) {
> +		NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry");
> +		return -EINVAL;
> +	}
> +
> +	*seen_tcs |= BIT(tc);
> +
> +	if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU])
> +		max_sdu = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
> +
> +	if (max_sdu > dev->max_mtu) {
> +		NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
> +		return -ERANGE;
> +	}
> +
> +	q->max_sdu[tc] = max_sdu;
> +
> +	return 0;
> +}
> +
> +static int taprio_parse_tc_entries(struct Qdisc *sch,
> +				   struct nlattr *opt,
> +				   struct netlink_ext_ack *extack)
> +{
> +	unsigned long seen_tcs = 0;
> +	struct nlattr *n;
> +	int err = 0, rem;
> +
> +	nla_for_each_nested(n, opt, rem) {
> +		if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
> +			continue;
> +
> +		err = taprio_parse_tc_entry(sch, n, &seen_tcs, extack);
> +		if (err)
> +			break;
> +	}
> +
> +	return err;
> +}
> +
>  static int taprio_mqprio_cmp(const struct net_device *dev,
>  			     const struct tc_mqprio_qopt *mqprio)
>  {
> @@ -1465,6 +1551,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
>  	if (err < 0)
>  		return err;
>  
> +	err = taprio_parse_tc_entries(sch, opt, extack);
> +	if (err)
> +		return err;
> +
>  	new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
>  	if (!new_admin) {
>  		NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
> @@ -1855,6 +1945,33 @@ static int dump_schedule(struct sk_buff *msg,
>  	return -1;
>  }
>  
> +static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb)
> +{
> +	struct nlattr *n;
> +	int tc;
> +
> +	for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
> +		n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY);
> +		if (!n)
> +			return -EMSGSIZE;
> +
> +		if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc))
> +			goto nla_put_failure;
> +
> +		if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
> +				q->max_sdu[tc]))
> +			goto nla_put_failure;
> +
> +		nla_nest_end(skb, n);
> +	}
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, n);
> +	return -EMSGSIZE;
> +}
> +
>  static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
>  {
>  	struct taprio_sched *q = qdisc_priv(sch);
> @@ -1894,6 +2011,9 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
>  	    nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
>  		goto options_error;
>  
> +	if (taprio_dump_tc_entries(q, skb))
> +		goto options_error;
> +
>  	if (oper && dump_schedule(skb, oper))
>  		goto options_error;
>  
> -- 
> 2.34.1
>

-- 
Vinicius