lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b2e5da15e6fe4b109259bbc62daccb80@BLUPR03MB486.namprd03.prod.outlook.com>
Date:	Fri, 30 May 2014 14:26:05 +0000
From:	"Frank.Li@...escale.com" <Frank.Li@...escale.com>
To:	"fugang.duan@...escale.com" <fugang.duan@...escale.com>,
	"davem@...emloft.net" <davem@...emloft.net>
CC:	"ezequiel.garcia@...e-electrons.com" 
	<ezequiel.garcia@...e-electrons.com>,
	"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
	"shawn.guo@...aro.org" <shawn.guo@...aro.org>,
	"bhutchings@...arflare.com" <bhutchings@...arflare.com>,
	"fugang.duan@...escale.com" <fugang.duan@...escale.com>,
	"stephen@...workplumber.org" <stephen@...workplumber.org>
Subject: RE: [PATCH v1 5/6] net: fec: Add Scatter/gather support



> -----Original Message-----
> From: Fugang Duan [mailto:b38611@...escale.com]
> Sent: Thursday, May 29, 2014 9:06 PM
> To: Li Frank-B20596; davem@...emloft.net
> Cc: ezequiel.garcia@...e-electrons.com; netdev@...r.kernel.org;
> shawn.guo@...aro.org; bhutchings@...arflare.com; Duan Fugang-B38611;
> stephen@...workplumber.org
> Subject: [PATCH v1 5/6] net: fec: Add Scatter/gather support
> 
> Add Scatter/gather support for FEC.
> This feature allows to improve outbound throughput performance.
> Running iperf tests shows a 55.4% improvement, tested on imx6dl sabresd
> board.
> 
> $ ethtool -K eth0 sg off
> [  3] local 10.192.242.108 port 52618 connected with 10.192.242.167 port
> 5001
> [ ID] Interval       Transfer     Bandwidth
> [  3]  0.0- 3.0 sec  99.5 MBytes   278 Mbits/sec
> 
> $ ethtool -K eth0 sg on
> $ iperf -c 10.192.242.167 -t 3 &
> [  3] local 10.192.242.108 port 52617 connected with 10.192.242.167 port
> 5001
> [ ID] Interval       Transfer     Bandwidth
> [  3]  0.0- 3.0 sec   154 MBytes   432 Mbits/sec
> 
> Signed-off-by: Fugang Duan <B38611@...escale.com>
> ---
>  drivers/net/ethernet/freescale/fec.h      |    2 +-
>  drivers/net/ethernet/freescale/fec_main.c |  230 ++++++++++++++++++++++--
> -----
>  2 files changed, 178 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec.h
> b/drivers/net/ethernet/freescale/fec.h
> index 74fbd49..57f3ecf 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -221,7 +221,7 @@ struct bufdesc_ex {
>  #define BD_ENET_TX_RCMASK       ((ushort)0x003c)
>  #define BD_ENET_TX_UN           ((ushort)0x0002)
>  #define BD_ENET_TX_CSL          ((ushort)0x0001)
> -#define BD_ENET_TX_STATS        ((ushort)0x03ff)        /* All status
> bits */
> +#define BD_ENET_TX_STATS        ((ushort)0x0fff)        /* All status
> bits */
> 
>  /*enhanced buffer descriptor control/status used by Ethernet transmit*/
>  #define BD_ENET_TX_INT          0x40000000
> diff --git a/drivers/net/ethernet/freescale/fec_main.c
> b/drivers/net/ethernet/freescale/fec_main.c
> index 714100f..a96ed3a 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -301,6 +301,23 @@ int fec_enet_get_bd_index(struct bufdesc *bdp, struct
> fec_enet_private *fep)
>  	return index;
>  }
> 
> +static inline
> +int fec_enet_txdesc_entry_free(struct fec_enet_private *fep) {

Suggest change to fec_enet_get_free_txdesc_num
_free, look like free some memory or resource. 

> +	int entries;
> +
> +	if (fep->bufdesc_ex)
> +		entries = (struct bufdesc_ex *)fep->dirty_tx -
> +				(struct bufdesc_ex *)fep->cur_tx;
> +	else
> +		entries = fep->dirty_tx - fep->cur_tx;
> +
> +	if (fep->cur_tx >= fep->dirty_tx)
> +		entries += fep->tx_ring_size;
> +
> +	return entries;
> +}
> +
>  static void *swap_buffer(void *bufaddr, int len)  {
>  	int i;
> @@ -328,20 +345,116 @@ fec_enet_clear_csum(struct sk_buff *skb, struct
> net_device *ndev)
>  	return 0;
>  }
> 
> -static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
> +static void
> +fec_enet_submit_work(struct bufdesc *bdp, struct fec_enet_private *fep)
> +{
> +	const struct platform_device_id *id_entry =
> +				platform_get_device_id(fep->pdev);
> +	struct bufdesc *bdp_pre;
> +
> +	bdp_pre = fec_enet_get_prevdesc(bdp, fep);
> +	if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
> +	    !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
> +		fep->delay_work.trig_tx = true;
> +		schedule_delayed_work(&(fep->delay_work.delay_work),
> +					msecs_to_jiffies(1));
> +	}
> +}
> +
> +static int
> +fec_enet_txq_submit_frag_skb(struct sk_buff *skb, struct net_device
> +*ndev)
>  {
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	const struct platform_device_id *id_entry =
>  				platform_get_device_id(fep->pdev);
> -	struct bufdesc *bdp, *bdp_pre;
> -	void *bufaddr;
> -	unsigned short	status;
> +	struct bufdesc *bdp = fep->cur_tx;
> +	struct bufdesc_ex *ebdp;
> +	int nr_frags = skb_shinfo(skb)->nr_frags;
> +	int frag, frag_len;
> +	unsigned short status;
> +	unsigned int estatus = 0;
> +	skb_frag_t *this_frag;
>  	unsigned int index;
> +	void *bufaddr;
> +	int i;
> 
> -	/* Fill in a Tx ring entry */
> +	for (frag = 0; frag < nr_frags; frag++) {
> +		this_frag = &skb_shinfo(skb)->frags[frag];
> +		bdp = fec_enet_get_nextdesc(bdp, fep);
> +		ebdp = (struct bufdesc_ex *)bdp;
> +
> +		status = bdp->cbd_sc;
> +		status &= ~BD_ENET_TX_STATS;
> +		status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
> +		frag_len = skb_shinfo(skb)->frags[frag].size;
> +
> +		/* Handle the last BD specially */
> +		if (frag == nr_frags - 1) {
> +			status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
> +			if (fep->bufdesc_ex) {
> +				estatus |= BD_ENET_TX_INT;
> +				if (unlikely(skb_shinfo(skb)->tx_flags &
> +					SKBTX_HW_TSTAMP && fep->hwts_tx_en))
> +					estatus |= BD_ENET_TX_TS;
> +			}
> +		}
> +
> +		if (fep->bufdesc_ex) {
> +			if (skb->ip_summed == CHECKSUM_PARTIAL)
> +				estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
> +			ebdp->cbd_bdu = 0;
> +			ebdp->cbd_esc = estatus;
> +		}
> +
> +		bufaddr = page_address(this_frag->page.p) + this_frag-
> >page_offset;
> +
> +		index = fec_enet_get_bd_index(bdp, fep);
> +		if (((unsigned long) bufaddr) & FEC_ALIGNMENT) {
> +			memcpy(fep->tx_bounce[index], bufaddr, frag_len);
> +			bufaddr = fep->tx_bounce[index];
> +		}
> +		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
> +			swap_buffer(bufaddr, frag_len);
> +		bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
> +						frag_len, DMA_TO_DEVICE);
> +		if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
> +			dev_kfree_skb_any(skb);
> +			if (net_ratelimit())
> +				netdev_err(ndev, "Tx DMA memory map failed\n");
> +			goto dma_mapping_error;
> +		}
> +
> +		bdp->cbd_datlen = frag_len;
> +		bdp->cbd_sc = status;
> +	}
> +
> +	fep->cur_tx = bdp;
> +
> +	return 0;
> +
> +dma_mapping_error:
>  	bdp = fep->cur_tx;
> +	for (i = 0; i < frag; i++) {
> +		bdp = fec_enet_get_nextdesc(bdp, fep);
> +		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
> +				bdp->cbd_datlen, DMA_TO_DEVICE);
> +	}
> +	return NETDEV_TX_OK;
> +}
> 
> -	status = bdp->cbd_sc;
> +static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct
> +net_device *ndev) {
> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +	const struct platform_device_id *id_entry =
> +				platform_get_device_id(fep->pdev);
> +	int nr_frags = skb_shinfo(skb)->nr_frags;
> +	struct bufdesc *bdp, *last_bdp;
> +	void *bufaddr;
> +	unsigned short status;
> +	unsigned short buflen;
> +	unsigned int estatus = 0;
> +	unsigned int index;
> +	int ret;
> 
>  	/* Protocol checksum off-load for TCP and UDP. */
>  	if (fec_enet_clear_csum(skb, ndev)) {
> @@ -349,17 +462,18 @@ static int txq_submit_skb(struct sk_buff *skb,
> struct net_device *ndev)
>  		return NETDEV_TX_OK;
>  	}
> 
> -	/* Clear all of the status flags */
> +	/* Fill in a Tx ring entry */
> +	bdp = fep->cur_tx;
> +	status = bdp->cbd_sc;
>  	status &= ~BD_ENET_TX_STATS;
> 
>  	/* Set buffer length and buffer pointer */
>  	bufaddr = skb->data;
> -	bdp->cbd_datlen = skb->len;
> +	buflen = skb_headlen(skb);
> 
>  	index = fec_enet_get_bd_index(bdp, fep);
> -
>  	if (((unsigned long) bufaddr) & FEC_ALIGNMENT) {
> -		memcpy(fep->tx_bounce[index], skb->data, skb->len);
> +		memcpy(fep->tx_bounce[index], skb->data, buflen);
>  		bufaddr = fep->tx_bounce[index];
>  	}
> 
> @@ -369,62 +483,66 @@ static int txq_submit_skb(struct sk_buff *skb,
> struct net_device *ndev)
>  	 * swap every frame going to and coming from the controller.
>  	 */
>  	if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
> -		swap_buffer(bufaddr, skb->len);
> -
> -	/* Save skb pointer */
> -	fep->tx_skbuff[index] = skb;
> +		swap_buffer(bufaddr, buflen);
> 
>  	/* Push the data cache so the CPM does not get stale memory
>  	 * data.
>  	 */
>  	bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
> -			skb->len, DMA_TO_DEVICE);
> +					buflen, DMA_TO_DEVICE);
>  	if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
> -		bdp->cbd_bufaddr = 0;
> -		fep->tx_skbuff[index] = NULL;
>  		dev_kfree_skb_any(skb);
>  		if (net_ratelimit())
>  			netdev_err(ndev, "Tx DMA memory map failed\n");
>  		return NETDEV_TX_OK;
>  	}
> 
> +	if (nr_frags) {
> +		ret = fec_enet_txq_submit_frag_skb(skb, ndev);

The process of sending skb->data is similar with frag. 
Can you combine to one function?

Best regards
Frank Li

> +		if (ret)
> +			return ret;
> +	} else {
> +		status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
> +		if (fep->bufdesc_ex) {
> +			estatus = BD_ENET_TX_INT;
> +			if (unlikely(skb_shinfo(skb)->tx_flags &
> +				SKBTX_HW_TSTAMP && fep->hwts_tx_en))
> +				estatus |= BD_ENET_TX_TS;
> +		}
> +	}
> +
>  	if (fep->bufdesc_ex) {
> 
>  		struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
> -		ebdp->cbd_bdu = 0;
> +
>  		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
> -			fep->hwts_tx_en)) {
> -			ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT);
> +			fep->hwts_tx_en))
>  			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
> -		} else {
> -			ebdp->cbd_esc = BD_ENET_TX_INT;
> 
> -			/* Enable protocol checksum flags
> -			 * We do not bother with the IP Checksum bits as they
> -			 * are done by the kernel
> -			 */
> -			if (skb->ip_summed == CHECKSUM_PARTIAL)
> -				ebdp->cbd_esc |= BD_ENET_TX_PINS |
> BD_ENET_TX_IINS;
> -		}
> +		if (skb->ip_summed == CHECKSUM_PARTIAL)
> +			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
> +
> +		ebdp->cbd_bdu = 0;
> +		ebdp->cbd_esc = estatus;
>  	}
> 
> +	last_bdp = fep->cur_tx;
> +	index = fec_enet_get_bd_index(last_bdp, fep);
> +	/* Save skb pointer */
> +	fep->tx_skbuff[index] = skb;
> +
> +	bdp->cbd_datlen = buflen;
> +
>  	/* Send it on its way.  Tell FEC it's ready, interrupt when done,
>  	 * it's the last BD of the frame, and to put the CRC on the end.
>  	 */
> -	status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR
> -			| BD_ENET_TX_LAST | BD_ENET_TX_TC);
> +	status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
>  	bdp->cbd_sc = status;
> 
> -	bdp_pre = fec_enet_get_prevdesc(bdp, fep);
> -	if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
> -	    !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
> -		fep->delay_work.trig_tx = true;
> -		schedule_delayed_work(&(fep->delay_work.delay_work),
> -					msecs_to_jiffies(1));
> -	}
> +	fec_enet_submit_work(bdp, fep);
> 
>  	/* If this was the last BD in the ring, start at the beginning again.
> */
> -	bdp = fec_enet_get_nextdesc(bdp, fep);
> +	bdp = fec_enet_get_nextdesc(last_bdp, fep);
> 
>  	skb_tx_timestamp(skb);
> 
> @@ -433,7 +551,7 @@ static int txq_submit_skb(struct sk_buff *skb, struct
> net_device *ndev)
>  	/* Trigger transmission start */
>  	writel(0, fep->hwp + FEC_X_DES_ACTIVE);
> 
> -	return NETDEV_TX_OK;
> +	return 0;
>  }
> 
>  static netdev_tx_t
> @@ -442,6 +560,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	struct bufdesc *bdp;
>  	unsigned short	status;
> +	int entries_free;
>  	int ret;
> 
>  	/* Fill in a Tx ring entry */
> @@ -453,15 +572,17 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
>  		/* Ooops.  All transmit buffers are full.  Bail out.
>  		 * This should not happen, since ndev->tbusy should be set.
>  		 */
> -		netdev_err(ndev, "tx queue full!\n");
> +		if (net_ratelimit())
> +			netdev_err(ndev, "tx queue full!\n");
>  		return NETDEV_TX_BUSY;
>  	}
> 
> -	ret = txq_submit_skb(skb, ndev);
> -	if (ret == -EBUSY)
> -		return NETDEV_TX_BUSY;
> +	ret = fec_enet_txq_submit_skb(skb, ndev);
> +	if (ret)
> +		return ret;
> 
> -	if (fep->cur_tx == fep->dirty_tx)
> +	entries_free = fec_enet_txdesc_entry_free(fep);
> +	if (entries_free < MAX_SKB_FRAGS + 1)
>  		netif_stop_queue(ndev);
> 
>  	return NETDEV_TX_OK;
> @@ -782,6 +903,7 @@ fec_enet_tx(struct net_device *ndev)
>  	unsigned short status;
>  	struct	sk_buff	*skb;
>  	int	index = 0;
> +	int	entries;
> 
>  	fep = netdev_priv(ndev);
>  	bdp = fep->dirty_tx;
> @@ -798,9 +920,13 @@ fec_enet_tx(struct net_device *ndev)
>  		index = fec_enet_get_bd_index(bdp, fep);
> 
>  		skb = fep->tx_skbuff[index];
> -		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, skb->len,
> +		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp-
> >cbd_datlen,
>  				DMA_TO_DEVICE);
>  		bdp->cbd_bufaddr = 0;
> +		if (!skb) {
> +			bdp = fec_enet_get_nextdesc(bdp, fep);
> +			continue;
> +		}
> 
>  		/* Check for errors. */
>  		if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | @@ -819,7 +945,7
> @@ fec_enet_tx(struct net_device *ndev)
>  				ndev->stats.tx_carrier_errors++;
>  		} else {
>  			ndev->stats.tx_packets++;
> -			ndev->stats.tx_bytes += bdp->cbd_datlen;
> +			ndev->stats.tx_bytes += skb->len;
>  		}
> 
>  		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
> @@ -856,15 +982,13 @@ fec_enet_tx(struct net_device *ndev)
> 
>  		/* Since we have freed up a buffer, the ring is no longer full
>  		 */
> -		if (fep->dirty_tx != fep->cur_tx) {
> -			if (netif_queue_stopped(ndev))
> -				netif_wake_queue(ndev);
> -		}
> +		entries = fec_enet_txdesc_entry_free(fep);
> +		if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev))
> +			netif_wake_queue(ndev);
>  	}
>  	return;
>  }
> 
> -
>  /* During a receive, the cur_rx points to the current incoming buffer.
>   * When we update through the ring, if the next incoming buffer has
>   * not been given to the system, we just set the empty indicator, @@ -
> 2106,7 +2230,7 @@ static int fec_enet_init(struct net_device *ndev)
>  	if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
>  		/* enable hw accelerator */
>  		ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
> -				| NETIF_F_RXCSUM);
> +				| NETIF_F_RXCSUM | NETIF_F_SG);
>  		fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
>  	}
> 
> --
> 1.7.8

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ