lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 31 Jan 2018 14:53:55 +0100
From:   Björn Töpel <bjorn.topel@...il.com>
To:     bjorn.topel@...il.com, magnus.karlsson@...el.com,
        alexander.h.duyck@...el.com, alexander.duyck@...il.com,
        john.fastabend@...il.com, ast@...com, brouer@...hat.com,
        willemdebruijn.kernel@...il.com, daniel@...earbox.net,
        netdev@...r.kernel.org
Cc:     michael.lundkvist@...csson.com, jesse.brandeburg@...el.com,
        anjali.singhai@...el.com, jeffrey.b.shaw@...el.com,
        ferruh.yigit@...el.com, qi.z.zhang@...el.com
Subject: [RFC PATCH 23/24] i40e: introduced Tx completion callbacks

From: Magnus Karlsson <magnus.karlsson@...el.com>

Introduced a TX completion callback in the clean_tx_irq function.
In order to make it non-introsive to the SKB path, the XDP path
now has its own function for cleaning up after TX completion.
The XDP cases have been removed from the SKB path, which should
make this faster.

This is in preparation for the AF_XDP zero copy mode.

Signed-off-by: Magnus Karlsson <magnus.karlsson@...el.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c |   2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 182 +++++++++++++++++++++++-----
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |   6 +-
 3 files changed, 159 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 363077c7157a..95b8942a31ae 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10071,7 +10071,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->count = vsi->num_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
-		ring->clean_tx = i40e_clean_tx_irq;
+		ring->clean_tx = i40e_xdp_clean_tx_irq;
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
 		set_ring_xdp(ring);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 932b318b8147..7ab49146d15c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -783,6 +783,153 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
 
 #define WB_STRIDE 4
 
+static void i40e_update_stats_and_arm_wb(struct i40e_ring *tx_ring,
+					 struct i40e_vsi *vsi,
+					 unsigned int total_packets,
+					 unsigned int total_bytes,
+					 int budget)
+{
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	tx_ring->q_vector->tx.total_bytes += total_bytes;
+	tx_ring->q_vector->tx.total_packets += total_packets;
+
+	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
+		/* check to see if there are < 4 descriptors
+		 * waiting to be written back, then kick the hardware to force
+		 * them to be written back in case we stay in NAPI.
+		 * In this mode on X722 we do not enable Interrupt.
+		 */
+		unsigned int j = i40e_get_tx_pending(tx_ring);
+
+		if (budget &&
+		    ((j / WB_STRIDE) == 0) && j > 0 &&
+		    !test_bit(__I40E_VSI_DOWN, vsi->state) &&
+		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+			tx_ring->arm_wb = true;
+	}
+}
+
+static void i40e_xdp_tx_completion(u32 start, u32 npackets,
+				   unsigned long ctx1, unsigned long ctx2)
+{
+	struct i40e_ring *tx_ring = (struct i40e_ring *)ctx1;
+	struct i40e_tx_buffer *tx_buf;
+	u32 i = 0;
+
+	(void)ctx2;
+	tx_buf = &tx_ring->tx_bi[start];
+	while (i < npackets) {
+		/* free the XDP data */
+		page_frag_free(tx_buf->raw_buf);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		tx_buf->skb = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+
+		/* Next packet */
+		tx_buf++;
+		i++;
+		if (unlikely(i + start == tx_ring->count))
+			tx_buf = tx_ring->tx_bi;
+	}
+}
+
+/**
+ * i40e_xdp_clean_tx_irq - Reclaim resources after transmit completes
+ * @vsi: the VSI we care about
+ * @tx_ring: Tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * Used for XDP packets.
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ **/
+bool i40e_xdp_clean_tx_irq(struct i40e_vsi *vsi,
+			   struct i40e_ring *tx_ring, int napi_budget)
+{
+	u16 i = tx_ring->next_to_clean;
+	struct i40e_tx_buffer *tx_buf;
+	struct i40e_tx_desc *tx_head;
+	struct i40e_tx_desc *tx_desc;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = vsi->work_limit;
+	struct i40e_tx_buffer *prev_buf;
+	unsigned int packets_completed = 0;
+	u16 start = tx_ring->next_to_clean;
+
+	tx_buf = &tx_ring->tx_bi[i];
+	prev_buf = tx_buf;
+	tx_desc = I40E_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
+
+	do {
+		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
+		/* we have caught up to head, no work left to do */
+		if (tx_head == tx_desc)
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buf->bytecount;
+		total_packets += tx_buf->gso_segs;
+
+		if (prev_buf->completion != tx_buf->completion) {
+			prev_buf->completion(start, packets_completed,
+					     prev_buf->ctx1, prev_buf->ctx2);
+			packets_completed = 0;
+			start = i + tx_ring->count - 1;
+		}
+		packets_completed++;
+
+		/* move us one more past the eop_desc for start of next pkt */
+		prev_buf = tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_bi;
+			tx_desc = I40E_TX_DESC(tx_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	if (packets_completed > 0)
+		prev_buf->completion(start, packets_completed,
+				     prev_buf->ctx1, prev_buf->ctx2);
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	i40e_update_stats_and_arm_wb(tx_ring, vsi, total_packets,
+				     total_bytes, budget);
+
+	return !!budget;
+}
+
 /**
  * i40e_clean_tx_irq - Reclaim resources after transmit completes
  * @vsi: the VSI we care about
@@ -829,11 +976,8 @@ bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		total_bytes += tx_buf->bytecount;
 		total_packets += tx_buf->gso_segs;
 
-		/* free the skb/XDP data */
-		if (ring_is_xdp(tx_ring))
-			page_frag_free(tx_buf->raw_buf);
-		else
-			napi_consume_skb(tx_buf->skb, napi_budget);
+		/* free the skb data */
+		napi_consume_skb(tx_buf->skb, napi_budget);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -887,30 +1031,8 @@ bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
-	u64_stats_update_begin(&tx_ring->syncp);
-	tx_ring->stats.bytes += total_bytes;
-	tx_ring->stats.packets += total_packets;
-	u64_stats_update_end(&tx_ring->syncp);
-	tx_ring->q_vector->tx.total_bytes += total_bytes;
-	tx_ring->q_vector->tx.total_packets += total_packets;
-
-	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
-		/* check to see if there are < 4 descriptors
-		 * waiting to be written back, then kick the hardware to force
-		 * them to be written back in case we stay in NAPI.
-		 * In this mode on X722 we do not enable Interrupt.
-		 */
-		unsigned int j = i40e_get_tx_pending(tx_ring);
-
-		if (budget &&
-		    ((j / WB_STRIDE) == 0) && (j > 0) &&
-		    !test_bit(__I40E_VSI_DOWN, vsi->state) &&
-		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
-			tx_ring->arm_wb = true;
-	}
-
-	if (ring_is_xdp(tx_ring))
-		return !!budget;
+	i40e_update_stats_and_arm_wb(tx_ring, vsi, total_packets,
+				     total_bytes, budget);
 
 	/* notify netdev of completed buffers */
 	netdev_tx_completed_queue(txring_txq(tx_ring),
@@ -3182,6 +3304,8 @@ static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
 	tx_bi->bytecount = size;
 	tx_bi->gso_segs = 1;
 	tx_bi->raw_buf = xdp->data;
+	tx_bi->completion = i40e_xdp_tx_completion;
+	tx_bi->ctx1 = (unsigned long)xdp_ring;
 
 	/* record length, and DMA address */
 	dma_unmap_len_set(tx_bi, len, size);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index cbb1bd261e6a..6f8ce176e509 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -261,6 +261,9 @@ struct i40e_tx_buffer {
 	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
+	tx_completion_func completion;
+	unsigned long ctx1;
+	unsigned long ctx2;
 
 	DEFINE_DMA_UNMAP_ADDR(dma);
 	DEFINE_DMA_UNMAP_LEN(len);
@@ -484,7 +487,8 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp);
 void i40e_xdp_flush(struct net_device *dev);
 bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		       struct i40e_ring *tx_ring, int napi_budget);
-
+bool i40e_xdp_clean_tx_irq(struct i40e_vsi *vsi,
+			   struct i40e_ring *tx_ring, int napi_budget);
 
 /**
  * i40e_get_head - Retrieve head from head writeback
-- 
2.14.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ