lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230128010719.2182346-6-vladimir.oltean@nxp.com>
Date:   Sat, 28 Jan 2023 03:07:09 +0200
From:   Vladimir Oltean <vladimir.oltean@....com>
To:     netdev@...r.kernel.org
Cc:     Vinicius Costa Gomes <vinicius.gomes@...el.com>,
        Kurt Kanzenbach <kurt@...utronix.de>
Subject: [RFC PATCH net-next 05/15] net/sched: taprio: give higher priority to higher TCs in software dequeue mode

Currently taprio iterates over child qdiscs in increasing order of TXQ
index, therefore giving higher xmit priority to TXQ 0 and lower to TXQ N.

However, to the best of my understanding, we should prioritize based on
the traffic class, so we should really dequeue starting with the highest
traffic class and going down from there. We get to the TXQ using the
tc_to_txq[] netdev property.

TXQs within the same TC have the same (strict) priority, so we should
pick from them as fairly as we can. Implement something very similar to
q->curband from multiq_dequeue().

Something tells me Vinicius won't like the way in which this patch
interacts with TXTIME_ASSIST_IS_ENABLED(q->flags) and NICs where TXQ 0
really has higher priority than TXQ 1....

Signed-off-by: Vladimir Oltean <vladimir.oltean@....com>
---
 net/sched/sch_taprio.c | 49 +++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 30741b950b46..7dbb09b87bc5 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -78,6 +78,7 @@ struct taprio_sched {
 	struct sched_gate_list __rcu *admin_sched;
 	struct hrtimer advance_timer;
 	struct list_head taprio_list;
+	int cur_txq[TC_MAX_QUEUE];
 	u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
 	u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */
 	u32 txtime_delay;
@@ -515,13 +516,10 @@ static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
 					       u32 gate_mask)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
-	struct net_device *dev = qdisc_dev(sch);
 	struct Qdisc *child = q->qdiscs[txq];
 	struct sk_buff *skb;
 	ktime_t guard;
-	int prio;
 	int len;
-	u8 tc;
 
 	if (unlikely(!child))
 		return NULL;
@@ -533,12 +531,6 @@ static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
 	if (!skb)
 		return NULL;
 
-	prio = skb->priority;
-	tc = netdev_get_prio_tc_map(dev, prio);
-
-	if (!(gate_mask & BIT(tc)))
-		return NULL;
-
 	len = qdisc_pkt_len(skb);
 	guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len));
 
@@ -566,6 +558,16 @@ static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
 	return skb;
 }
 
+static void taprio_next_tc_txq(struct net_device *dev, int tc, int *txq)
+{
+	int offset = dev->tc_to_txq[tc].offset;
+	int count = dev->tc_to_txq[tc].count;
+
+	(*txq)++;
+	if (*txq == offset + count)
+		*txq = offset;
+}
+
 /* Will not be called in the full offload case, since the TX queues are
  * attached to the Qdisc created using qdisc_create_dflt()
  */
@@ -573,10 +575,11 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
+	int num_tc = netdev_get_num_tc(dev);
 	struct sk_buff *skb = NULL;
 	struct sched_entry *entry;
 	u32 gate_mask;
-	int i;
+	int tc;
 
 	rcu_read_lock();
 	entry = rcu_dereference(q->current_entry);
@@ -590,10 +593,24 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
 	if (!gate_mask)
 		goto done;
 
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		skb = taprio_dequeue_from_txq(sch, i, entry, gate_mask);
-		if (skb)
-			goto done;
+	for (tc = num_tc - 1; tc >= 0; tc--) {
+		int first_txq = q->cur_txq[tc];
+
+		if (!(gate_mask & BIT(tc)))
+			continue;
+
+		/* Select among TXQs belonging to the same TC
+		 * using round robin
+		 */
+		do {
+			skb = taprio_dequeue_from_txq(sch, q->cur_txq[tc],
+						      entry, gate_mask);
+
+			taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
+
+			if (skb)
+				goto done;
+		} while (q->cur_txq[tc] != first_txq);
 	}
 
 done:
@@ -1588,10 +1605,12 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 		err = netdev_set_num_tc(dev, mqprio->num_tc);
 		if (err)
 			goto free_sched;
-		for (i = 0; i < mqprio->num_tc; i++)
+		for (i = 0; i < mqprio->num_tc; i++) {
 			netdev_set_tc_queue(dev, i,
 					    mqprio->count[i],
 					    mqprio->offset[i]);
+			q->cur_txq[i] = mqprio->offset[i];
+		}
 
 		/* Always use supplied priority mappings */
 		for (i = 0; i <= TC_BITMASK; i++)
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ