lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 16 Apr 2013 16:39:23 +0100
From:	Ben Hutchings <bhutchings@...arflare.com>
To:	Jim Baxter <jim_baxter@...tor.com>
CC:	"David S. Miller" <davem@...emloft.net>,
	Frank Li <Frank.Li@...escale.com>,
	Fugang Duan <B38611@...escale.com>, <netdev@...r.kernel.org>
Subject: Re: [PATCH net-next v2 1/1] net: fec: Enable imx6 enet checksum
 acceleration.

On Tue, 2013-04-16 at 11:36 +0100, Jim Baxter wrote:
> Enables hardware generatation of IP header and
> protocol specific checksums for transmitted
> packets.
> 
> Enabled hardware discarding of received packets with
> invalid IP header or protocol specific checksums.
> 
> The feature is enabled by default but can be
> enabled/disabled by ethtool.
> 
> Signed-off-by: Fugang Duan <B38611@...escale.com>
> Signed-off-by: Jim Baxter <jim_baxter@...tor.com>
> ---
>  drivers/net/ethernet/freescale/fec.h      |    9 +-
>  drivers/net/ethernet/freescale/fec_main.c |  134 +++++++++++++++++++++++++++++
>  2 files changed, 142 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
> index eb43729..f558a1a 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -52,6 +52,7 @@
>  #define FEC_R_FIFO_RSEM		0x194 /* Receive FIFO section empty threshold */
>  #define FEC_R_FIFO_RAEM		0x198 /* Receive FIFO almost empty threshold */
>  #define FEC_R_FIFO_RAFL		0x19c /* Receive FIFO almost full threshold */
> +#define FEC_RACC		0x1C4 /* Receive Accelerator function */
>  #define FEC_MIIGSK_CFGR		0x300 /* MIIGSK Configuration reg */
>  #define FEC_MIIGSK_ENR		0x308 /* MIIGSK Enable reg */
>  
> @@ -164,9 +165,11 @@ struct bufdesc_ex {
>  #define BD_ENET_TX_CSL          ((ushort)0x0001)
>  #define BD_ENET_TX_STATS        ((ushort)0x03ff)        /* All status bits */
>  
> -/*enhanced buffer desciptor control/status used by Ethernet transmit*/
> +/*enhanced buffer descriptor control/status used by Ethernet transmit*/
>  #define BD_ENET_TX_INT          0x40000000
>  #define BD_ENET_TX_TS           0x20000000
> +#define BD_ENET_TX_PINS         0x10000000
> +#define BD_ENET_TX_IINS         0x08000000
>  
> 
>  /* This device has up to three irqs on some platforms */
> @@ -190,6 +193,9 @@ struct bufdesc_ex {
>  
>  #define BD_ENET_RX_INT          0x00800000
>  #define BD_ENET_RX_PTP          ((ushort)0x0400)
> +#define BD_ENET_RX_ICE	    0x00000020
> +#define BD_ENET_RX_PCR		0x00000010
> +#define FLAG_RX_CSUM_ENABLED	(BD_ENET_RX_ICE | BD_ENET_RX_PCR)
>
>  /* The FEC buffer descriptors track the ring buffers.  The rx_bd_base and
>   * tx_bd_base always point to the base of the buffer descriptors.  The
> @@ -247,6 +253,7 @@ struct fec_enet_private {
>  	int	pause_flag;
>  
>  	struct	napi_struct napi;
> +	int	csum_flags;
>  
>  	struct ptp_clock *ptp_clock;
>  	struct ptp_clock_info ptp_caps;
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index d7657a4..738a57d 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -34,6 +34,12 @@
>  #include <linux/netdevice.h>
>  #include <linux/etherdevice.h>
>  #include <linux/skbuff.h>
> +#include <linux/in.h>
> +#include <linux/ip.h>
> +#include <net/ip.h>
> +#include <linux/tcp.h>
> +#include <linux/udp.h>
> +#include <linux/icmp.h>
>  #include <linux/spinlock.h>
>  #include <linux/workqueue.h>
>  #include <linux/bitops.h>
> @@ -181,6 +187,10 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
>  #define PKT_MINBUF_SIZE		64
>  #define PKT_MAXBLR_SIZE		1520
>  
> +/* FEC receive accleration */
> +#define FEC_RACC_IPDIS		(1 << 1)
> +#define FEC_RACC_PRODIS		(1 << 2)
> +
>  /*
>   * The 5270/5271/5280/5282/532x RX control register also contains maximum frame
>   * size bits. Other FEC hardware does not, so we need to take that into
> @@ -241,6 +251,52 @@ static void *swap_buffer(void *bufaddr, int len)
>  	return bufaddr;
>  }
>  
> +static void
> +fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev)
> +{
> +	if (!(ndev->features & NETIF_F_HW_CSUM))
> +		return;

Don't check features.  You must fill in the checksum for every packet
with ip_summed == CHECKSUM_PARTIAL, regardless of whether there has been
a change to features since it was queued.

> +	/* Only run for packets requiring a checksum. */
> +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> +		return;
> +	if (skb->len < (ETH_HLEN + sizeof(struct iphdr)))
> +		return;
> +
> +	if (skb->protocol == htons(ETH_P_IP)) {
> +		ip_hdr(skb)->check = 0;

You must use skb_cow_head() before editing the header.

> +		switch (ip_hdr(skb)->protocol) {
> +		case IPPROTO_UDP:
> +			if (skb->len < (ETH_HLEN +
> +				(ip_hdr(skb)->ihl << 2) +
> +				sizeof(struct udphdr)))
> +				return;
> +			skb_set_transport_header(skb,
> +					ETH_HLEN + ip_hdrlen(skb));
> +			udp_hdr(skb)->check = 0;
> +			break;
> +		case IPPROTO_TCP:
> +			if (skb->len < (ETH_HLEN +
> +				(ip_hdr(skb)->ihl << 2) +
> +				sizeof(struct tcphdr)))
> +				return;
> +			if (tcp_hdr(skb))
> +				tcp_hdr(skb)->check = 0;
> +			break;
> +		case IPPROTO_ICMP:
> +			if (skb->len < (ETH_HLEN +
> +				(ip_hdr(skb)->ihl << 2) +
> +				sizeof(struct icmphdr)))
> +				return;
> +			if (icmp_hdr(skb))
> +				icmp_hdr(skb)->checksum = 0;
> +			break;
> +		default:
> +			break;
> +		}
> +	}
> +}
> +
>  static netdev_tx_t
>  fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  {
> @@ -277,6 +333,9 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  	bufaddr = skb->data;
>  	bdp->cbd_datlen = skb->len;
>  
> +	/* HW accleration for ICMP TCP UDP checksum */
> +	fec_enet_clear_csum(skb, ndev);
> +
>  	/*
>  	 * On some FEC implementations data must be aligned on
>  	 * 4-byte boundaries. Use bounce buffers to copy data
> @@ -328,6 +387,10 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  		} else {
>  
>  			ebdp->cbd_esc = BD_ENET_TX_INT;
> +			if ((ndev->features & NETIF_F_HW_CSUM) &&

Don't check features.

> +					(skb->ip_summed == CHECKSUM_PARTIAL))
> +				ebdp->cbd_esc |= BD_ENET_TX_PINS
> +						| BD_ENET_TX_IINS;
>  		}
>  	}
>  	/* If this was the last BD in the ring, start at the beginning again. */
> @@ -407,6 +470,7 @@ fec_restart(struct net_device *ndev, int duplex)
>  	const struct platform_device_id *id_entry =
>  				platform_get_device_id(fep->pdev);
>  	int i;
> +	u32 val;
>  	u32 temp_mac[2];
>  	u32 rcntl = OPT_FRAME_SIZE | 0x04;
>  	u32 ecntl = 0x2; /* ETHEREN */
> @@ -473,6 +537,14 @@ fec_restart(struct net_device *ndev, int duplex)
>  	/* Set MII speed */
>  	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
>  
> +	/* set RX checksum */
> +	val = readl(fep->hwp + FEC_RACC);
> +	if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
> +		val |= (FEC_RACC_IPDIS | FEC_RACC_PRODIS);
> +	else
> +		val &= ~(FEC_RACC_IPDIS | FEC_RACC_PRODIS);
> +	writel(val, fep->hwp + FEC_RACC);
> +
>  	/*
>  	 * The phy interface and speed need to get configured
>  	 * differently on enet-mac.
> @@ -818,6 +890,19 @@ fec_enet_rx(struct net_device *ndev, int budget)
>  				spin_unlock_irqrestore(&fep->tmreg_lock, flags);
>  			}
>  
> +			if (fep->bufdesc_ex &&
> +				(fep->csum_flags & FLAG_RX_CSUM_ENABLED)) {
> +				struct bufdesc_ex *ebdp =
> +					(struct bufdesc_ex *)bdp;
> +				if (!(ebdp->cbd_esc & FLAG_RX_CSUM_ENABLED)) {
> +					/* don't check it */
> +					skb->ip_summed = CHECKSUM_UNNECESSARY;

This looks very strange.  Presumably the RX_ICE and RX_PCR flags
indicate checksum errors, and therefore !(ebdp->cbd_esc &
FLAG_RX_CSUM_ENABLED) means the checksum(s) are good?  This would be
clearer if you defined FLAG_RX_CSUM_ERROR as well (with the same numeric
value).

> +				} else {
> +					ndev->stats.rx_errors++;

Layer 3 and 4 errors should not be counted in the net device stats.

> +					skb_checksum_none_assert(skb);
> +				}
> +			}
> +
>  			if (!skb_defer_rx_timestamp(skb))
>  				napi_gro_receive(&fep->napi, skb);
>  		}
> @@ -1439,6 +1524,9 @@ static int fec_enet_alloc_buffers(struct net_device *ndev)
>  		if (fep->bufdesc_ex) {
>  			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
>  			ebdp->cbd_esc = BD_ENET_TX_INT;
> +			if (ndev->features & NETIF_F_HW_CSUM)
> +				ebdp->cbd_esc |= BD_ENET_TX_PINS
> +						| BD_ENET_TX_IINS;
>  		}
>  
>  		bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex);
> @@ -1457,6 +1545,7 @@ fec_enet_open(struct net_device *ndev)
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	int ret;
>  
> +	ndev->features |= NETIF_F_GRO;

Definitely don't do this.  Let the networking core take care of feature
configuration.

>  	napi_enable(&fep->napi);
>  
>  	/* I should reset the ring buffers here, but I don't yet know
> @@ -1618,6 +1707,44 @@ static void fec_poll_controller(struct net_device *dev)
>  }
>  #endif
>  
> +static netdev_features_t fec_fix_features(struct net_device *netdev,
> +	netdev_features_t features)
> +{
> +	return features;
> +}

Why?

> +static int fec_set_features(struct net_device *netdev,
> +	netdev_features_t features)
> +{
> +	struct fec_enet_private *fep = netdev_priv(netdev);
> +	netdev_features_t changed = features ^ netdev->features;
> +	bool restart_required = false;
> +
> +	netdev->features = features;
> +
> +	/* Receive checksum has been changed */
> +	if (changed & NETIF_F_GRO) {
> +		restart_required = true;
> +		if (features & NETIF_F_GRO)
> +			fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
> +		else
> +			fep->csum_flags &= ~FLAG_RX_CSUM_ENABLED;
> +	}

RX checksum offload is independent of GRO.

> +	/* Restart the network interface */
> +	if (true == restart_required) {
> +		if (netif_running(netdev)) {
> +			fec_stop(netdev);
> +			fec_restart(netdev, fep->phy_dev->duplex);
> +			netif_wake_queue(netdev);
> +		} else {
> +			fec_restart(netdev, fep->phy_dev->duplex);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static const struct net_device_ops fec_netdev_ops = {
>  	.ndo_open		= fec_enet_open,
>  	.ndo_stop		= fec_enet_close,
> @@ -1631,6 +1758,8 @@ static const struct net_device_ops fec_netdev_ops = {
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	.ndo_poll_controller	= fec_poll_controller,
>  #endif
> +	.ndo_fix_features	= fec_fix_features,
> +	.ndo_set_features	= fec_set_features,
>  };
>  
>   /*
> @@ -1672,6 +1801,11 @@ static int fec_enet_init(struct net_device *ndev)
>  	writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);
>  	netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, FEC_NAPI_WEIGHT);
>  
> +	/* enable hw accelerator */
> +	ndev->features |= (NETIF_F_HW_CSUM | NETIF_F_GRO);
> +	ndev->hw_features |= (NETIF_F_HW_CSUM | NETIF_F_GRO);

Now try passing IPv6 traffic.  You mean NETIF_F_IP_CSUM, not
NETIF_F_HW_CSUM.

Also you mean NETIF_F_RX_CSUM, not NETIF_F_GRO.

Ben.

> +	fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
> +
>  	fec_restart(ndev, 0);
>  
>  	return 0;

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists