netdev - [PATCH v2 net-next 12/15] net/sched: keep the max_frm_len information inside struct sched_gate

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230207135440.1482856-13-vladimir.oltean@nxp.com>
Date:   Tue,  7 Feb 2023 15:54:37 +0200
From:   Vladimir Oltean <vladimir.oltean@....com>
To:     netdev@...r.kernel.org
Cc:     "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>,
        Jamal Hadi Salim <jhs@...atatu.com>,
        Cong Wang <xiyou.wangcong@...il.com>,
        Jiri Pirko <jiri@...nulli.us>,
        Vinicius Costa Gomes <vinicius.gomes@...el.com>,
        Kurt Kanzenbach <kurt@...utronix.de>,
        Jacob Keller <jacob.e.keller@...el.com>,
        Gerhard Engleder <gerhard@...leder-embedded.com>,
        Jesse Brandeburg <jesse.brandeburg@...el.com>,
        Tony Nguyen <anthony.l.nguyen@...el.com>,
        intel-wired-lan@...ts.osuosl.org, linux-kernel@...r.kernel.org
Subject: [PATCH v2 net-next 12/15] net/sched: keep the max_frm_len information inside struct sched_gate_list

I have one practical reason for doing this and one concerning correctness.

The practical reason has to do with a follow-up patch, which aims to mix
2 sources of max_sdu (one coming from the user and the other automatically
calculated based on TC gate durations @current link speed). Among those
2 sources of input, we must always select the smaller max_sdu value, but
this can change at various link speeds. So the max_sdu coming from the
user must be kept separated from the value that is operationally used
(the minimum of the 2), because otherwise we overwrite it and forget
what the user asked us to do.

To solve that, this patch proposes that struct sched_gate_list contains
the operationally active max_frm_len, and q->max_sdu contains just what
was requested by the user.

The reason having to do with correctness is based on the following
observation: the admin sched_gate_list becomes operational at a given
base_time in the future. Until then, it is inactive and applies no
shaping, all gates are open, etc. So the queueMaxSDU dropping shouldn't
apply either (this is a mechanism to ensure that packets smaller than
the largest gate duration for that TC don't hang the port; clearly it
makes little sense if the gates are always open).

Signed-off-by: Vladimir Oltean <vladimir.oltean@....com>
Reviewed-by: Kurt Kanzenbach <kurt@...utronix.de>
---
v1->v2: create a taprio_skb_exceeds_queue_max_sdu() helper function

 net/sched/sch_taprio.c | 53 +++++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 7553bc82cf6f..d3e3be543fae 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -63,6 +63,7 @@ struct sched_gate_list {
 	 * or 0 if a traffic class gate never opens during the schedule.
 	 */
 	u64 max_open_gate_duration[TC_MAX_QUEUE];
+	u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
 	struct rcu_head rcu;
 	struct list_head entries;
 	size_t num_entries;
@@ -93,7 +94,6 @@ struct taprio_sched {
 	struct hrtimer advance_timer;
 	struct list_head taprio_list;
 	int cur_txq[TC_MAX_QUEUE];
-	u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
 	u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */
 	u32 txtime_delay;
 };
@@ -246,6 +246,21 @@ static int length_to_duration(struct taprio_sched *q, int len)
 	return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
 }
 
+static void taprio_update_queue_max_sdu(struct taprio_sched *q,
+					struct sched_gate_list *sched)
+{
+	struct net_device *dev = qdisc_dev(q->root);
+	int num_tc = netdev_get_num_tc(dev);
+	int tc;
+
+	for (tc = 0; tc < num_tc; tc++) {
+		if (q->max_sdu[tc])
+			sched->max_frm_len[tc] = q->max_sdu[tc] + dev->hard_header_len;
+		else
+			sched->max_frm_len[tc] = U32_MAX; /* never oversized */
+	}
+}
+
 /* Returns the entry corresponding to next available interval. If
  * validate_interval is set, it only validates whether the timestamp occurs
  * when the gate corresponding to the skb's traffic class is open.
@@ -479,14 +494,33 @@ static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
 	return txtime;
 }
 
-static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
-			      struct Qdisc *child, struct sk_buff **to_free)
+/* Devices with full offload are expected to honor this in hardware */
+static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch,
+					     struct sk_buff *skb)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
+	struct sched_gate_list *sched;
 	int prio = skb->priority;
+	bool exceeds = false;
 	u8 tc;
 
+	tc = netdev_get_prio_tc_map(dev, prio);
+
+	rcu_read_lock();
+	sched = rcu_dereference(q->oper_sched);
+	if (sched && skb->len > sched->max_frm_len[tc])
+		exceeds = true;
+	rcu_read_unlock();
+
+	return exceeds;
+}
+
+static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
+			      struct Qdisc *child, struct sk_buff **to_free)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+
 	/* sk_flags are only safe to use on full sockets. */
 	if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
 		if (!is_valid_interval(skb, sch))
@@ -497,9 +531,7 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
 			return qdisc_drop(skb, sch, to_free);
 	}
 
-	/* Devices with full offload are expected to honor this in hardware */
-	tc = netdev_get_prio_tc_map(dev, prio);
-	if (skb->len > q->max_frm_len[tc])
+	if (taprio_skb_exceeds_queue_max_sdu(sch, skb))
 		return qdisc_drop(skb, sch, to_free);
 
 	qdisc_qstats_backlog_inc(sch, skb);
@@ -1609,7 +1641,6 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
 				   struct netlink_ext_ack *extack)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
-	struct net_device *dev = qdisc_dev(sch);
 	u32 max_sdu[TC_QOPT_MAX_QUEUE];
 	unsigned long seen_tcs = 0;
 	struct nlattr *n;
@@ -1628,13 +1659,8 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
 			goto out;
 	}
 
-	for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
+	for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
 		q->max_sdu[tc] = max_sdu[tc];
-		if (max_sdu[tc])
-			q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len;
-		else
-			q->max_frm_len[tc] = U32_MAX; /* never oversized */
-	}
 
 out:
 	return err;
@@ -1758,6 +1784,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 		goto free_sched;
 
 	taprio_set_picos_per_byte(dev, q);
+	taprio_update_queue_max_sdu(q, new_admin);
 
 	if (mqprio) {
 		err = netdev_set_num_tc(dev, mqprio->num_tc);
-- 
2.34.1