lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20080622.161658.115278698.davem@davemloft.net>
Date:	Sun, 22 Jun 2008 16:16:58 -0700 (PDT)
From:	David Miller <davem@...emloft.net>
To:	netdev@...r.kernel.org
CC:	vinay@...ux.vnet.ibm.com, krkumar2@...ibm.com,
	matheos.worku@....com
Subject: [PATCH 2/2]: niu: Manage TX backlog in-driver.


niu: Manage TX backlog in-driver.

We no longer stop and wake the generic device queue.
Instead we manage the backlog inside of the driver,
and the mid-layer thinks that the device can always
receive packets.

Signed-off-by: David S. Miller <davem@...emloft.net>
---
 drivers/net/niu.c |  116 +++++++++++++++++++++++++++++++++++++++-------------
 drivers/net/niu.h |   38 ++++++++++++++++-
 2 files changed, 123 insertions(+), 31 deletions(-)

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 8486da4..d5c79c4 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -3232,6 +3232,45 @@ static int release_tx_packet(struct niu *np, struct tx_ring_info *rp, int idx)
 	return idx;
 }
 
+static void __niu_tx_hit(struct niu *np, struct tx_ring_info *rp, int n)
+{
+	if (unlikely(n == rp->prod))
+		return;
+
+	if (n < rp->prod)
+		rp->wrap_bit ^= TX_RING_KICK_WRAP;
+	rp->prod = n;
+
+	nw64(TX_RING_KICK(rp->tx_channel), rp->wrap_bit | (n << 3));
+
+	np->dev->trans_start = jiffies;
+}
+
+static int __niu_xmit_one(struct niu *np, struct tx_ring_info *rp,
+			  struct net_device *dev, struct sk_buff *skb,
+			  int prod_entry);
+
+/* Queue as many backlogged TX packets as possible.  Invoked
+ * under tx_lock.
+ */
+static void __niu_xmit_backlog(struct niu *np, struct tx_ring_info *rp)
+{
+	struct sk_buff *skb;
+	int prod;
+
+	if (unlikely(skb_queue_empty(&rp->tx_backlog)))
+		return;
+
+	prod = rp->prod;
+	while ((skb = skb_peek(&rp->tx_backlog)) != NULL) {
+		if (!__niu_tx_has_space(rp, prod, skb))
+			break;
+		__skb_unlink(skb, &rp->tx_backlog);
+		prod = __niu_xmit_one(np, rp, np->dev, skb, prod);
+	}
+	__niu_tx_hit(np, rp, prod);
+}
+
 #define NIU_TX_WAKEUP_THRESH(rp)		((rp)->pending / 4)
 
 static void niu_tx_work(struct niu *np, struct tx_ring_info *rp)
@@ -3262,12 +3301,14 @@ static void niu_tx_work(struct niu *np, struct tx_ring_info *rp)
 	smp_mb();
 
 out:
-	if (unlikely(netif_queue_stopped(np->dev) &&
-		     (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp)))) {
+	/* Since this code path is running lockless, some care is needed
+	 * in order to prevent deadlocking the backlog queue.  See the
+	 * commentary in __niu_tx_queue_backlog() for details.
+	 */
+	if (!skb_queue_empty(&rp->tx_backlog) &&
+	    (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp))) {
 		netif_tx_lock(np->dev);
-		if (netif_queue_stopped(np->dev) &&
-		    (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp)))
-			netif_wake_queue(np->dev);
+		__niu_xmit_backlog(np, rp);
 		netif_tx_unlock(np->dev);
 	}
 }
@@ -3870,6 +3911,7 @@ static void niu_free_tx_ring_info(struct niu *np, struct tx_ring_info *rp)
 		rp->mbox = NULL;
 	}
 	if (rp->descr) {
+		struct sk_buff *skb;
 		int i;
 
 		for (i = 0; i < MAX_TX_RING_SIZE; i++) {
@@ -3885,6 +3927,9 @@ static void niu_free_tx_ring_info(struct niu *np, struct tx_ring_info *rp)
 		rp->prod = 0;
 		rp->cons = 0;
 		rp->wrap_bit = 0;
+
+		while ((skb = __skb_dequeue(&rp->tx_backlog)) != NULL)
+			dev_kfree_skb(skb);
 	}
 }
 
@@ -4010,6 +4055,8 @@ static int niu_alloc_tx_ring_info(struct niu *np,
 	rp->cons = 0;
 	rp->wrap_bit = 0;
 
+	skb_queue_head_init(&rp->tx_backlog);
+
 	/* XXX make these configurable... XXX */
 	rp->mark_freq = rp->pending / 4;
 
@@ -6210,37 +6257,48 @@ out_err:
 
 }
 
+static void __niu_tx_queue_backlog(struct niu *np, struct tx_ring_info *rp,
+				   struct sk_buff *skb)
+{
+	if (skb_queue_len(&rp->tx_backlog) < np->dev->tx_queue_len)
+		__skb_queue_tail(&rp->tx_backlog, skb);
+	else
+		dev_kfree_skb(skb);
+
+	smp_mb();
+
+	/* This is a deadlock breaker.  niu_tx_work() updates the consumer
+	 * index, then checks the tx_backlog for emptiness.  It also
+	 * tries to mitigate work by only flushing the backlog when at
+	 * least a certain percentage of space is available.  Those
+	 * tests in niu_tx_work() run lockless.
+	 *
+	 * Here, we make the two primary memory operations in the
+	 * reverse order.  The idea is to make sure that one of these
+	 * two code paths will process the backlog no matter what the
+	 * order of their relative execution might be.
+	 *
+	 * In short:
+	 *
+	 * niu_tx_work() --> rp->cons = foo; test skb_queue_empty()
+	 * niu_start_xmit() --> __skb_queue_tail(); test rp->cons
+	 */
+	if (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp))
+		__niu_xmit_backlog(np, rp);
+}
+
 static int niu_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct niu *np = netdev_priv(dev);
 	struct tx_ring_info *rp;
-	int prod;
 
 	rp = tx_ring_select(np, skb);
 
-	if (niu_tx_avail(rp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		dev_err(np->device, PFX "%s: BUG! Tx ring full when "
-			"queue awake!\n", dev->name);
-		rp->tx_errors++;
-		return NETDEV_TX_BUSY;
-	}
-
-	prod = __niu_xmit_one(np, rp, dev, skb, rp->prod);
-	if (prod != rp->prod) {
-		if (prod < rp->prod)
-			rp->wrap_bit ^= TX_RING_KICK_WRAP;
-		rp->prod = prod;
-
-		nw64(TX_RING_KICK(rp->tx_channel),
-		     rp->wrap_bit | (prod << 3));
-
-		if (unlikely(niu_tx_avail(rp) <= (MAX_SKB_FRAGS + 1))) {
-			netif_stop_queue(dev);
-			if (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp))
-				netif_wake_queue(dev);
-		}
-		dev->trans_start = jiffies;
+	if (unlikely(!niu_tx_has_space(rp, skb))) {
+		__niu_tx_queue_backlog(np, rp, skb);
+	} else {
+		__niu_tx_hit(np, rp,
+			     __niu_xmit_one(np, rp, dev, skb, rp->prod));
 	}
 
 	return NETDEV_TX_OK;
diff --git a/drivers/net/niu.h b/drivers/net/niu.h
index c6fa883..f7d6ffc 100644
--- a/drivers/net/niu.h
+++ b/drivers/net/niu.h
@@ -2841,6 +2841,7 @@ struct tx_ring_info {
 	int			prod;
 	int			cons;
 	int			wrap_bit;
+	struct sk_buff_head	tx_backlog;
 	u16			last_pkt_cnt;
 	u16			tx_channel;
 	u16			mark_counter;
@@ -2862,10 +2863,43 @@ struct tx_ring_info {
 #define NEXT_TX(tp, index) \
 	(((index) + 1) < (tp)->pending ? ((index) + 1) : 0)
 
-static inline u32 niu_tx_avail(struct tx_ring_info *tp)
+static inline u32 __niu_tx_avail(struct tx_ring_info *tp, int prod)
 {
 	return (tp->pending -
-		((tp->prod - tp->cons) & (MAX_TX_RING_SIZE - 1)));
+		((prod - tp->cons) & (MAX_TX_RING_SIZE - 1)));
+}
+
+static inline u32 niu_tx_avail(struct tx_ring_info *tp)
+{
+	return __niu_tx_avail(tp, tp->prod);
+}
+
+/* Return true if the TX ring has enough space to queue SKB.  */
+static bool __niu_tx_has_space(struct tx_ring_info *tp, int prod,
+			       struct sk_buff *skb)
+{
+	int chunks = skb_shinfo(skb)->nr_frags + 1;
+	if (__niu_tx_avail(tp, prod) <= chunks)
+		return false;
+	return true;
+}
+
+static bool niu_tx_has_space(struct tx_ring_info *tp, struct sk_buff *skb)
+{
+	/* If the backlog has any packets, indicate no space.  We want
+	 * to queue in this case because the TX completion interrupt
+	 * handler is pending to run the backlog, and therefore if we
+	 * push the packet straight out now we'll introduce packet
+	 * reordering.
+	 *
+	 * It is important that we make this check here, and not in
+	 * __niu_tx_has_space(), otherwise the TX backlog processing
+	 * would never make any progress.
+	 */
+	if (!skb_queue_empty(&tp->tx_backlog))
+		return false;
+
+	return __niu_tx_has_space(tp, tp->prod, skb);
 }
 
 struct rxdma_mailbox {
-- 
1.5.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ