netdev - [RFC PATCH net-next 15/15] net/sched: taprio: don't segment unnecessarily

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230128010719.2182346-16-vladimir.oltean@nxp.com>
Date:   Sat, 28 Jan 2023 03:07:19 +0200
From:   Vladimir Oltean <vladimir.oltean@....com>
To:     netdev@...r.kernel.org
Cc:     Vinicius Costa Gomes <vinicius.gomes@...el.com>,
        Kurt Kanzenbach <kurt@...utronix.de>
Subject: [RFC PATCH net-next 15/15] net/sched: taprio: don't segment unnecessarily

Improve commit 497cc00224cf ("taprio: Handle short intervals and large
packets") to only perform segmentation when skb->len exceeds what
taprio_dequeue() expects.

In practice, this will make the biggest difference when a traffic class
gate is always open in the schedule. This is because the max_frm_len
will be U32_MAX, and such large skb->len values as Kurt reported will be
sent just fine unsegmented.

What I don't seem to know how to handle is how to make sure that the
segmented skbs themselves are smaller than the maximum frame size given
by the current queueMaxSDU[tc]. Nonetheless, we still need to drop
those, otherwise the Qdisc will hang.

Signed-off-by: Vladimir Oltean <vladimir.oltean@....com>
---
 net/sched/sch_taprio.c | 67 ++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 25 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index cc11787dc62a..e0bd613a6415 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -527,10 +527,6 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
 			      struct Qdisc *child, struct sk_buff **to_free)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
-	struct net_device *dev = qdisc_dev(sch);
-	struct sched_gate_list *sched;
-	int prio = skb->priority;
-	u8 tc;
 
 	/* sk_flags are only safe to use on full sockets. */
 	if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
@@ -542,17 +538,6 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
 			return qdisc_drop(skb, sch, to_free);
 	}
 
-	/* Devices with full offload are expected to honor this in hardware */
-	tc = netdev_get_prio_tc_map(dev, prio);
-
-	rcu_read_lock();
-	sched = rcu_dereference(q->oper_sched);
-	if (sched && skb->len > sched->max_frm_len[tc]) {
-		rcu_read_unlock();
-		return qdisc_drop(skb, sch, to_free);
-	}
-	rcu_read_unlock();
-
 	qdisc_qstats_backlog_inc(sch, skb);
 	sch->q.qlen++;
 
@@ -565,19 +550,34 @@ static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch,
 {
 	unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
 	netdev_features_t features = netif_skb_features(skb);
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sched_gate_list *sched;
 	struct sk_buff *segs, *nskb;
-	int ret;
+	int tc, ret;
+
+	tc = netdev_get_prio_tc_map(dev, skb->priority);
 
 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
 	if (IS_ERR_OR_NULL(segs))
 		return qdisc_drop(skb, sch, to_free);
 
+	rcu_read_lock();
+	sched = rcu_dereference(q->oper_sched);
+
 	skb_list_walk_safe(segs, segs, nskb) {
 		skb_mark_not_on_list(segs);
 		qdisc_skb_cb(segs)->pkt_len = segs->len;
 		slen += segs->len;
 
-		ret = taprio_enqueue_one(segs, sch, child, to_free);
+		/* FIXME: we should be segmenting to a smaller size
+		 * rather than dropping these
+		 */
+		if (sched && skb->len > sched->max_frm_len[tc])
+			ret = qdisc_drop(segs, sch, to_free);
+		else
+			ret = taprio_enqueue_one(segs, sch, child, to_free);
+
 		if (ret != NET_XMIT_SUCCESS) {
 			if (net_xmit_drop_count(ret))
 				qdisc_qstats_drop(sch);
@@ -586,6 +586,8 @@ static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch,
 		}
 	}
 
+	rcu_read_unlock();
+
 	if (numsegs > 1)
 		qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
 	consume_skb(skb);
@@ -600,8 +602,11 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			  struct sk_buff **to_free)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sched_gate_list *sched;
+	int prio = skb->priority;
 	struct Qdisc *child;
-	int queue;
+	int tc, queue;
 
 	queue = skb_get_queue_mapping(skb);
 
@@ -609,13 +614,25 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (unlikely(!child))
 		return qdisc_drop(skb, sch, to_free);
 
-	/* Large packets might not be transmitted when the transmission duration
-	 * exceeds any configured interval. Therefore, segment the skb into
-	 * smaller chunks. Drivers with full offload are expected to handle
-	 * this in hardware.
-	 */
-	if (skb_is_gso(skb))
-		return taprio_enqueue_segmented(skb, sch, child, to_free);
+	/* Devices with full offload are expected to honor this in hardware */
+	tc = netdev_get_prio_tc_map(dev, prio);
+
+	rcu_read_lock();
+	sched = rcu_dereference(q->oper_sched);
+	if (sched && skb->len > sched->max_frm_len[tc]) {
+		rcu_read_unlock();
+		/* Large packets might not be transmitted when the transmission duration
+		 * exceeds any configured interval. Therefore, segment the skb into
+		 * smaller chunks. Drivers with full offload are expected to handle
+		 * this in hardware.
+		 */
+		if (skb_is_gso(skb))
+			return taprio_enqueue_segmented(skb, sch, child,
+							to_free);
+
+		return qdisc_drop(skb, sch, to_free);
+	}
+	rcu_read_unlock();
 
 	return taprio_enqueue_one(skb, sch, child, to_free);
 }
-- 
2.34.1