[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1402532183-14910-7-git-send-email-b38611@freescale.com>
Date: Thu, 12 Jun 2014 08:16:23 +0800
From: Fugang Duan <b38611@...escale.com>
To: <davem@...emloft.net>
CC: <netdev@...r.kernel.org>, <b38611@...escale.com>,
<ezequiel.garcia@...e-electrons.com>, <b20596@...escale.com>,
<David.Laight@...LAB.COM>, <eric.dumazet@...il.com>
Subject: [PATCH v5 6/6] net: fec: Add software TSO support
Add software TSO support for FEC.
This feature allows to improve outbound throughput performance.
Tested on imx6dl sabresd board, running iperf tcp tests shows:
- 16.2% improvement comparing with FEC SG patch
- 82% improvement comparing with NO SG & TSO patch
$ ethtool -K eth0 tso on
$ iperf -c 10.192.242.167 -t 3 &
[ 3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 3.0 sec 181 MBytes 506 Mbits/sec
During the testing, CPU loading is 30%.
Since imx6dl FEC Bandwidth is limited to SOC system bus bandwidth, the
performance with SW TSO is a milestone.
CC: Ezequiel Garcia <ezequiel.garcia@...e-electrons.com>
CC: Eric Dumazet <eric.dumazet@...il.com>
CC: David Laight <David.Laight@...LAB.COM>
CC: Li Frank <B20596@...escale.com>
Signed-off-by: Fugang Duan <B38611@...escale.com>
---
drivers/net/ethernet/freescale/fec.h | 6 +
drivers/net/ethernet/freescale/fec_main.c | 255 ++++++++++++++++++++++++++---
2 files changed, 238 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index e7ce14d..671d080 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -299,6 +299,12 @@ struct fec_enet_private {
unsigned short bufdesc_size;
unsigned short tx_ring_size;
unsigned short rx_ring_size;
+ unsigned short tx_stop_threshold;
+ unsigned short tx_wake_threshold;
+
+ /* Software TSO */
+ char *tso_hdrs;
+ dma_addr_t tso_hdrs_dma;
struct platform_device *pdev;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 6bf8d47..d76099d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -36,6 +36,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <net/ip.h>
+#include <net/tso.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
@@ -228,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define FEC_PAUSE_FLAG_AUTONEG 0x1
#define FEC_PAUSE_FLAG_ENABLE 0x2
+#define TSO_HEADER_SIZE 128
+/* Max number of allowed TCP segments for software TSO */
+#define FEC_MAX_TSO_SEGS 100
+#define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
+#define IS_TSO_HEADER(txq, addr) \
+ ((addr >= txq->tso_hdrs_dma) && \
+ (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
+
static int mii_cnt;
static inline
@@ -439,8 +449,17 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
unsigned short buflen;
unsigned int estatus = 0;
unsigned int index;
+ int entries_free;
int ret;
+ entries_free = fec_enet_get_free_txdesc_num(fep);
+ if (entries_free < MAX_SKB_FRAGS + 1) {
+ dev_kfree_skb_any(skb);
+ if (net_ratelimit())
+ netdev_err(ndev, "NOT enough BD for SG!\n");
+ return NETDEV_TX_OK;
+ }
+
/* Protocol checksum off-load for TCP and UDP. */
if (fec_enet_clear_csum(skb, ndev)) {
dev_kfree_skb_any(skb);
@@ -535,35 +554,210 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
return 0;
}
-static netdev_tx_t
-fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static int
+fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev,
+ struct bufdesc *bdp, int index, char *data,
+ int size, bool last_tcp, bool is_last)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- struct bufdesc *bdp;
- unsigned short status;
- int entries_free;
- int ret;
-
- /* Fill in a Tx ring entry */
- bdp = fep->cur_tx;
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
+ struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+ unsigned short status;
+ unsigned int estatus = 0;
status = bdp->cbd_sc;
+ status &= ~BD_ENET_TX_STATS;
- if (status & BD_ENET_TX_READY) {
- /* Ooops. All transmit buffers are full. Bail out.
- * This should not happen, since ndev->tbusy should be set.
- */
+ status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+ bdp->cbd_datlen = size;
+
+ if (((unsigned long) data) & FEC_ALIGNMENT ||
+ id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+ memcpy(fep->tx_bounce[index], data, size);
+ data = fep->tx_bounce[index];
+
+ if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+ swap_buffer(data, size);
+ }
+
+ bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
+ size, DMA_TO_DEVICE);
+ if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+ dev_kfree_skb_any(skb);
if (net_ratelimit())
- netdev_err(ndev, "tx queue full!\n");
+ netdev_err(ndev, "Tx DMA memory map failed\n");
return NETDEV_TX_BUSY;
}
- ret = fec_enet_txq_submit_skb(skb, ndev);
+ if (fep->bufdesc_ex) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+ ebdp->cbd_bdu = 0;
+ ebdp->cbd_esc = estatus;
+ }
+
+ /* Handle the last BD specially */
+ if (last_tcp)
+ status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC);
+ if (is_last) {
+ status |= BD_ENET_TX_INTR;
+ if (fep->bufdesc_ex)
+ ebdp->cbd_esc |= BD_ENET_TX_INT;
+ }
+
+ bdp->cbd_sc = status;
+
+ return 0;
+}
+
+static int
+fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev,
+ struct bufdesc *bdp, int index)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+ void *bufaddr;
+ unsigned long dmabuf;
+ unsigned short status;
+ unsigned int estatus = 0;
+
+ status = bdp->cbd_sc;
+ status &= ~BD_ENET_TX_STATS;
+ status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+
+ bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+ dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE;
+ if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+ id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+ memcpy(fep->tx_bounce[index], skb->data, hdr_len);
+ bufaddr = fep->tx_bounce[index];
+
+ if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+ swap_buffer(bufaddr, hdr_len);
+
+ dmabuf = dma_map_single(&fep->pdev->dev, bufaddr,
+ hdr_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&fep->pdev->dev, dmabuf)) {
+ dev_kfree_skb_any(skb);
+ if (net_ratelimit())
+ netdev_err(ndev, "Tx DMA memory map failed\n");
+ return NETDEV_TX_BUSY;
+ }
+ }
+
+ bdp->cbd_bufaddr = dmabuf;
+ bdp->cbd_datlen = hdr_len;
+
+ if (fep->bufdesc_ex) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+ ebdp->cbd_bdu = 0;
+ ebdp->cbd_esc = estatus;
+ }
+
+ bdp->cbd_sc = status;
+
+ return 0;
+}
+
+static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int total_len, data_left;
+ struct bufdesc *bdp = fep->cur_tx;
+ struct tso_t tso;
+ unsigned int index = 0;
+ int ret;
+
+ if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) {
+ dev_kfree_skb_any(skb);
+ if (net_ratelimit())
+ netdev_err(ndev, "NOT enough BD for TSO!\n");
+ return NETDEV_TX_OK;
+ }
+
+ /* Protocol checksum off-load for TCP and UDP. */
+ if (fec_enet_clear_csum(skb, ndev)) {
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* Initialize the TSO handler, and prepare the first payload */
+ tso_start(skb, &tso);
+
+ total_len = skb->len - hdr_len;
+ while (total_len > 0) {
+ char *hdr;
+
+ index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+ data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+ total_len -= data_left;
+
+ /* prepare packet headers: MAC + IP + TCP */
+ hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+ tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+ ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index);
+ if (ret)
+ goto err_release;
+
+ while (data_left > 0) {
+ int size;
+
+ size = min_t(int, tso.size, data_left);
+ bdp = fec_enet_get_nextdesc(bdp, fep);
+ index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+ ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data,
+ size, size == data_left,
+ total_len == 0);
+ if (ret)
+ goto err_release;
+
+ data_left -= size;
+ tso_build_data(skb, &tso, size);
+ }
+
+ bdp = fec_enet_get_nextdesc(bdp, fep);
+ }
+
+ /* Save skb pointer */
+ fep->tx_skbuff[index] = skb;
+
+ fec_enet_submit_work(bdp, fep);
+
+ skb_tx_timestamp(skb);
+ fep->cur_tx = bdp;
+
+ /* Trigger transmission start */
+ writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+
+ return 0;
+
+err_release:
+ /* TODO: Release all used data descriptors for TSO */
+ return ret;
+}
+
+static netdev_tx_t
+fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ int entries_free;
+ int ret;
+
+ if (skb_is_gso(skb))
+ ret = fec_enet_txq_submit_tso(skb, ndev);
+ else
+ ret = fec_enet_txq_submit_skb(skb, ndev);
if (ret)
return ret;
entries_free = fec_enet_get_free_txdesc_num(fep);
- if (entries_free < MAX_SKB_FRAGS + 1)
+ if (entries_free <= fep->tx_stop_threshold)
netif_stop_queue(ndev);
return NETDEV_TX_OK;
@@ -884,7 +1078,7 @@ fec_enet_tx(struct net_device *ndev)
unsigned short status;
struct sk_buff *skb;
int index = 0;
- int entries;
+ int entries_free;
fep = netdev_priv(ndev);
bdp = fep->dirty_tx;
@@ -901,8 +1095,9 @@ fec_enet_tx(struct net_device *ndev)
index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
skb = fep->tx_skbuff[index];
- dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen,
- DMA_TO_DEVICE);
+ if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr))
+ dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+ bdp->cbd_datlen, DMA_TO_DEVICE);
bdp->cbd_bufaddr = 0;
if (!skb) {
bdp = fec_enet_get_nextdesc(bdp, fep);
@@ -963,9 +1158,11 @@ fec_enet_tx(struct net_device *ndev)
/* Since we have freed up a buffer, the ring is no longer full
*/
- entries = fec_enet_get_free_txdesc_num(fep);
- if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev))
- netif_wake_queue(ndev);
+ if (netif_queue_stopped(ndev)) {
+ entries_free = fec_enet_get_free_txdesc_num(fep);
+ if (entries_free >= fep->tx_wake_threshold)
+ netif_wake_queue(ndev);
+ }
}
return;
}
@@ -2167,6 +2364,9 @@ static int fec_enet_init(struct net_device *ndev)
fep->tx_ring_size = TX_RING_SIZE;
fep->rx_ring_size = RX_RING_SIZE;
+ fep->tx_stop_threshold = FEC_MAX_SKB_DESCS;
+ fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2;
+
if (fep->bufdesc_ex)
fep->bufdesc_size = sizeof(struct bufdesc_ex);
else
@@ -2180,6 +2380,13 @@ static int fec_enet_init(struct net_device *ndev)
if (!cbd_base)
return -ENOMEM;
+ fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE,
+ &fep->tso_hdrs_dma, GFP_KERNEL);
+ if (!fep->tso_hdrs) {
+ dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma);
+ return -ENOMEM;
+ }
+
memset(cbd_base, 0, PAGE_SIZE);
fep->netdev = ndev;
@@ -2210,9 +2417,11 @@ static int fec_enet_init(struct net_device *ndev)
ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;
if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
+ ndev->gso_max_segs = FEC_MAX_TSO_SEGS;
+
/* enable hw accelerator */
ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
- | NETIF_F_RXCSUM | NETIF_F_SG);
+ | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
}
--
1.7.8
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists