lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1397170682-19138-4-git-send-email-ezequiel.garcia@free-electrons.com>
Date:	Thu, 10 Apr 2014 19:58:02 -0300
From:	Ezequiel Garcia <ezequiel.garcia@...e-electrons.com>
To:	<netdev@...r.kernel.org>
Cc:	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <eric.dumazet@...il.com>,
	Thomas Petazzoni <thomas.petazzoni@...e-electrons.com>,
	Gregory Clement <gregory.clement@...e-electrons.com>,
	Simon Guinot <simon.guinot@...uanux.org>,
	Willy Tarreau <w@....eu>, Tawfik Bayouk <tawfik@...vell.com>,
	Lior Amsalem <alior@...vell.com>,
	Ezequiel Garcia <ezequiel.garcia@...e-electrons.com>,
	Simon Guinot <sguinot@...ie.com>
Subject: [PATCH 3/3] net: mvneta: Introduce a software TSO implementation

This commit implements a software TSO which reduces the CPU
usage significantly while retaining the outbound throughput
at line rate.

Tested on a Plat'home Openblocks AX/3 board acting as iperf client (tx).
The CPU usage shows a substantial CPU usage drop, between 15%-25%.

Other tests performed by Willy Tarreau show performance improvements:
Willy reported that "[..] turning the TSO flag on immediately increases the
HTTP request rate from 1680 to 1820 per second (30 kB objects)".

Tested-by: Willy Tarreau <w@....eu>
Signed-off-by: Simon Guinot <sguinot@...ie.com>
Signed-off-by: Ezequiel Garcia <ezequiel.garcia@...e-electrons.com>
---
 drivers/net/ethernet/marvell/mvneta.c | 207 +++++++++++++++++++++++++++++++++-
 1 file changed, 206 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e5bd3ca..cd6b998 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -244,6 +244,9 @@
 
 #define MVNETA_TX_MTU_MAX		0x3ffff
 
+/* TSO header size */
+#define TSO_HEADER_SIZE 128
+
 /* Max number of Rx descriptors */
 #define MVNETA_MAX_RXD 128
 
@@ -413,6 +416,12 @@ struct mvneta_tx_queue {
 
 	/* Index of the next TX DMA descriptor to process */
 	int next_desc_to_proc;
+
+	/* DMA buffers for TSO headers */
+	char *tso_hdrs;
+
+	/* DMA address of TSO headers */
+	dma_addr_t tso_hdrs_phys;
 };
 
 struct mvneta_rx_queue {
@@ -1519,6 +1528,181 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 	return rx_done;
 }
 
+static inline void
+mvneta_tso_build_hdr(struct net_device *dev, struct mvneta_tx_queue *txq,
+		     struct sk_buff *skb, int hdr_len, int size,
+		     u32 tcp_seq, u16 ip_id, bool is_last)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	struct mvneta_tx_desc *tx_desc;
+	struct iphdr *iph;
+	struct tcphdr *tcph;
+	char *mac;
+	int mac_hdr_len = skb_network_offset(skb);
+
+	mac = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE;
+	memcpy(mac, skb->data, hdr_len);
+
+	iph = (struct iphdr *)(mac + mac_hdr_len);
+	iph->id = htons(ip_id);
+	iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+
+	tcph = (struct tcphdr *)(mac + skb_transport_offset(skb));
+	tcph->seq = htonl(tcp_seq);
+
+	if (!is_last) {
+		/* Clear all special flags for not last packet */
+		tcph->psh = 0;
+		tcph->fin = 0;
+		tcph->rst = 0;
+	}
+
+	txq->tx_skb[txq->txq_put_index] = NULL;
+	tx_desc = mvneta_txq_next_desc_get(txq);
+	tx_desc->data_size = hdr_len;
+	tx_desc->command = mvneta_skb_tx_csum(pp, skb);
+	tx_desc->command |= MVNETA_TXD_F_DESC;
+	tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
+				 txq->txq_put_index * TSO_HEADER_SIZE;
+	mvneta_txq_inc_put(txq);
+}
+
+static inline int
+mvneta_tso_build_data(struct net_device *dev, struct mvneta_tx_queue *txq,
+		      struct sk_buff *skb, char *frag_ptr, int frag_size,
+		      int data_left, bool is_last)
+{
+	int size;
+	struct mvneta_tx_desc *tx_desc;
+
+	size = (frag_size < data_left) ? frag_size : data_left;
+
+	tx_desc = mvneta_txq_next_desc_get(txq);
+	tx_desc->data_size = size;
+	tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, frag_ptr,
+						size, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev->dev.parent,
+		     tx_desc->buf_phys_addr))) {
+		mvneta_txq_desc_put(txq);
+		return 0;
+	}
+
+	tx_desc->command = 0;
+	txq->tx_skb[txq->txq_put_index] = NULL;
+
+	if (size == data_left) {
+		/* last descriptor in the TCP packet */
+		tx_desc->command = MVNETA_TXD_L_DESC;
+
+		/* last descriptor in SKB */
+		if (is_last)
+			txq->tx_skb[txq->txq_put_index] = skb;
+	}
+	mvneta_txq_inc_put(txq);
+	return size;
+}
+
+static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
+			 struct mvneta_tx_queue *txq)
+{
+	int total_len, hdr_len, size, frag_size, data_left;
+	int desc_count;
+	u16 ip_id;
+	u32 tcp_seq;
+	skb_frag_t *frag;
+	int frag_idx = 0;
+	char *frag_ptr;
+	const struct tcphdr *th = tcp_hdr(skb);
+	struct mvneta_port *pp = netdev_priv(dev);
+	int i;
+
+	/* Calculate expected number of TX descriptors */
+	desc_count = skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
+	if ((txq->count + desc_count) >= txq->size)
+		return 0;
+
+	total_len = skb->len;
+	hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb));
+
+	total_len -= hdr_len;
+	ip_id = ntohs(ip_hdr(skb)->id);
+	tcp_seq = ntohl(th->seq);
+
+	frag_size = skb_headlen(skb);
+	frag_ptr = skb->data;
+
+	if (frag_size < hdr_len)
+		return 0;
+
+	frag_size -= hdr_len;
+	frag_ptr += hdr_len;
+	if (frag_size == 0) {
+		frag = &skb_shinfo(skb)->frags[frag_idx];
+
+		/* Move to next segment */
+		frag_size = frag->size;
+		frag_ptr = page_address(frag->page.p) + frag->page_offset;
+		frag_idx++;
+	}
+	desc_count = 0;
+
+	while (total_len > 0) {
+		data_left = (skb_shinfo(skb)->gso_size < total_len) ?
+				skb_shinfo(skb)->gso_size : total_len;
+		desc_count++;
+		total_len -= data_left;
+
+		/* prepare packet headers: MAC + IP + TCP */
+		mvneta_tso_build_hdr(dev, txq, skb, hdr_len, data_left,
+				     tcp_seq, ip_id, total_len == 0);
+		ip_id++;
+
+		while (data_left > 0) {
+			desc_count++;
+
+			size = mvneta_tso_build_data(dev, txq, skb,
+						     frag_ptr, frag_size,
+						     data_left, total_len == 0);
+			if (size == 0)
+				goto err_release;
+
+			data_left -= size;
+			tcp_seq += size;
+
+			frag_size -= size;
+			frag_ptr += size;
+
+			if ((frag_size == 0) &&
+			    (frag_idx < skb_shinfo(skb)->nr_frags)) {
+				frag = &skb_shinfo(skb)->frags[frag_idx];
+
+				/* Move to next segment */
+				frag_size = frag->size;
+				frag_ptr = page_address(frag->page.p) +
+					   frag->page_offset;
+				frag_idx++;
+			}
+		}
+	}
+
+	return desc_count;
+
+err_release:
+	/* Release all used data descriptors; header descriptors must not
+	 * be DMA-unmapped.
+	 */
+	for (i = desc_count - 1; i >= 0; i--) {
+		struct mvneta_tx_desc *tx_desc = txq->descs + i;
+		if (!(tx_desc->command & MVNETA_TXD_F_DESC))
+			dma_unmap_single(pp->dev->dev.parent,
+					 tx_desc->buf_phys_addr,
+					 tx_desc->data_size,
+					 DMA_TO_DEVICE);
+		mvneta_txq_desc_put(txq);
+	}
+	return 0;
+}
+
 /* Handle tx fragmentation processing */
 static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
 				  struct mvneta_tx_queue *txq)
@@ -1590,6 +1774,11 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 	if (!netif_running(dev))
 		goto out;
 
+	if (skb_is_gso(skb)) {
+		frags = mvneta_tx_tso(skb, dev, txq);
+		goto out;
+	}
+
 	frags = skb_shinfo(skb)->nr_frags + 1;
 
 	/* Get a descriptor for the first part of the packet */
@@ -2108,6 +2297,18 @@ static int mvneta_txq_init(struct mvneta_port *pp,
 				  txq->descs, txq->descs_phys);
 		return -ENOMEM;
 	}
+
+	/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
+	txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
+					   txq->size * TSO_HEADER_SIZE,
+					   &txq->tso_hdrs_phys, GFP_KERNEL);
+	if (txq->tso_hdrs == NULL) {
+		kfree(txq->tx_skb);
+		dma_free_coherent(pp->dev->dev.parent,
+				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
+				  txq->descs, txq->descs_phys);
+		return -ENOMEM;
+	}
 	mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
 
 	return 0;
@@ -2119,6 +2320,10 @@ static void mvneta_txq_deinit(struct mvneta_port *pp,
 {
 	kfree(txq->tx_skb);
 
+	if (txq->tso_hdrs)
+		dma_free_coherent(pp->dev->dev.parent,
+				  txq->size * TSO_HEADER_SIZE,
+				  txq->tso_hdrs, txq->tso_hdrs_phys);
 	if (txq->descs)
 		dma_free_coherent(pp->dev->dev.parent,
 				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2861,7 +3066,7 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight);
 
-	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM;
+	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->hw_features |= dev->features;
 	dev->vlan_features |= dev->features;
 	dev->priv_flags |= IFF_UNICAST_FLT;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ