[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <516DCD7B.1030609@mentor.com>
Date: Tue, 16 Apr 2013 23:15:23 +0100
From: Jim Baxter <jim_baxter@...tor.com>
To: Ben Hutchings <bhutchings@...arflare.com>
CC: "David S. Miller" <davem@...emloft.net>,
Frank Li <Frank.Li@...escale.com>,
Fugang Duan <B38611@...escale.com>, <netdev@...r.kernel.org>
Subject: Re: [PATCH net-next v2 1/1] net: fec: Enable imx6 enet checksum acceleration.
On 16/04/13 16:39, Ben Hutchings wrote:
> On Tue, 2013-04-16 at 11:36 +0100, Jim Baxter wrote:
>> Enables hardware generatation of IP header and
>> protocol specific checksums for transmitted
>> packets.
>>
>> Enabled hardware discarding of received packets with
>> invalid IP header or protocol specific checksums.
>>
>> The feature is enabled by default but can be
>> enabled/disabled by ethtool.
>>
>> Signed-off-by: Fugang Duan <B38611@...escale.com>
>> Signed-off-by: Jim Baxter <jim_baxter@...tor.com>
>> ---
>> drivers/net/ethernet/freescale/fec.h | 9 +-
>> drivers/net/ethernet/freescale/fec_main.c | 134 +++++++++++++++++++++++++++++
>> 2 files changed, 142 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
>> index eb43729..f558a1a 100644
>> --- a/drivers/net/ethernet/freescale/fec.h
>> +++ b/drivers/net/ethernet/freescale/fec.h
>> @@ -52,6 +52,7 @@
>> #define FEC_R_FIFO_RSEM 0x194 /* Receive FIFO section empty threshold */
>> #define FEC_R_FIFO_RAEM 0x198 /* Receive FIFO almost empty threshold */
>> #define FEC_R_FIFO_RAFL 0x19c /* Receive FIFO almost full threshold */
>> +#define FEC_RACC 0x1C4 /* Receive Accelerator function */
>> #define FEC_MIIGSK_CFGR 0x300 /* MIIGSK Configuration reg */
>> #define FEC_MIIGSK_ENR 0x308 /* MIIGSK Enable reg */
>>
>> @@ -164,9 +165,11 @@ struct bufdesc_ex {
>> #define BD_ENET_TX_CSL ((ushort)0x0001)
>> #define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */
>>
>> -/*enhanced buffer desciptor control/status used by Ethernet transmit*/
>> +/*enhanced buffer descriptor control/status used by Ethernet transmit*/
>> #define BD_ENET_TX_INT 0x40000000
>> #define BD_ENET_TX_TS 0x20000000
>> +#define BD_ENET_TX_PINS 0x10000000
>> +#define BD_ENET_TX_IINS 0x08000000
>>
>>
>> /* This device has up to three irqs on some platforms */
>> @@ -190,6 +193,9 @@ struct bufdesc_ex {
>>
>> #define BD_ENET_RX_INT 0x00800000
>> #define BD_ENET_RX_PTP ((ushort)0x0400)
>> +#define BD_ENET_RX_ICE 0x00000020
>> +#define BD_ENET_RX_PCR 0x00000010
>> +#define FLAG_RX_CSUM_ENABLED (BD_ENET_RX_ICE | BD_ENET_RX_PCR)
>>
>> /* The FEC buffer descriptors track the ring buffers. The rx_bd_base and
>> * tx_bd_base always point to the base of the buffer descriptors. The
>> @@ -247,6 +253,7 @@ struct fec_enet_private {
>> int pause_flag;
>>
>> struct napi_struct napi;
>> + int csum_flags;
>>
>> struct ptp_clock *ptp_clock;
>> struct ptp_clock_info ptp_caps;
>> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
>> index d7657a4..738a57d 100644
>> --- a/drivers/net/ethernet/freescale/fec_main.c
>> +++ b/drivers/net/ethernet/freescale/fec_main.c
>> @@ -34,6 +34,12 @@
>> #include <linux/netdevice.h>
>> #include <linux/etherdevice.h>
>> #include <linux/skbuff.h>
>> +#include <linux/in.h>
>> +#include <linux/ip.h>
>> +#include <net/ip.h>
>> +#include <linux/tcp.h>
>> +#include <linux/udp.h>
>> +#include <linux/icmp.h>
>> #include <linux/spinlock.h>
>> #include <linux/workqueue.h>
>> #include <linux/bitops.h>
>> @@ -181,6 +187,10 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
>> #define PKT_MINBUF_SIZE 64
>> #define PKT_MAXBLR_SIZE 1520
>>
>> +/* FEC receive accleration */
>> +#define FEC_RACC_IPDIS (1 << 1)
>> +#define FEC_RACC_PRODIS (1 << 2)
>> +
>> /*
>> * The 5270/5271/5280/5282/532x RX control register also contains maximum frame
>> * size bits. Other FEC hardware does not, so we need to take that into
>> @@ -241,6 +251,52 @@ static void *swap_buffer(void *bufaddr, int len)
>> return bufaddr;
>> }
>>
>> +static void
>> +fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev)
>> +{
>> + if (!(ndev->features & NETIF_F_HW_CSUM))
>> + return;
>
> Don't check features. You must fill in the checksum for every packet
> with ip_summed == CHECKSUM_PARTIAL, regardless of whether there has been
> a change to features since it was queued.
>
OK.
>> + /* Only run for packets requiring a checksum. */
>> + if (skb->ip_summed != CHECKSUM_PARTIAL)
>> + return;
>> + if (skb->len < (ETH_HLEN + sizeof(struct iphdr)))
>> + return;
>> +
>> + if (skb->protocol == htons(ETH_P_IP)) {
>> + ip_hdr(skb)->check = 0;
>
> You must use skb_cow_head() before editing the header.
>
Agreed, I will correct that.
>> + switch (ip_hdr(skb)->protocol) {
>> + case IPPROTO_UDP:
>> + if (skb->len < (ETH_HLEN +
>> + (ip_hdr(skb)->ihl << 2) +
>> + sizeof(struct udphdr)))
>> + return;
>> + skb_set_transport_header(skb,
>> + ETH_HLEN + ip_hdrlen(skb));
>> + udp_hdr(skb)->check = 0;
>> + break;
>> + case IPPROTO_TCP:
>> + if (skb->len < (ETH_HLEN +
>> + (ip_hdr(skb)->ihl << 2) +
>> + sizeof(struct tcphdr)))
>> + return;
>> + if (tcp_hdr(skb))
>> + tcp_hdr(skb)->check = 0;
>> + break;
>> + case IPPROTO_ICMP:
>> + if (skb->len < (ETH_HLEN +
>> + (ip_hdr(skb)->ihl << 2) +
>> + sizeof(struct icmphdr)))
>> + return;
>> + if (icmp_hdr(skb))
>> + icmp_hdr(skb)->checksum = 0;
>> + break;
>> + default:
>> + break;
>> + }
>> + }
>> +}
>> +
>> static netdev_tx_t
>> fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>> {
>> @@ -277,6 +333,9 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>> bufaddr = skb->data;
>> bdp->cbd_datlen = skb->len;
>>
>> + /* HW accleration for ICMP TCP UDP checksum */
>> + fec_enet_clear_csum(skb, ndev);
>> +
>> /*
>> * On some FEC implementations data must be aligned on
>> * 4-byte boundaries. Use bounce buffers to copy data
>> @@ -328,6 +387,10 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>> } else {
>>
>> ebdp->cbd_esc = BD_ENET_TX_INT;
>> + if ((ndev->features & NETIF_F_HW_CSUM) &&
>
> Don't check features.
>
>> + (skb->ip_summed == CHECKSUM_PARTIAL))
>> + ebdp->cbd_esc |= BD_ENET_TX_PINS
>> + | BD_ENET_TX_IINS;
>> }
>> }
>> /* If this was the last BD in the ring, start at the beginning again. */
>> @@ -407,6 +470,7 @@ fec_restart(struct net_device *ndev, int duplex)
>> const struct platform_device_id *id_entry =
>> platform_get_device_id(fep->pdev);
>> int i;
>> + u32 val;
>> u32 temp_mac[2];
>> u32 rcntl = OPT_FRAME_SIZE | 0x04;
>> u32 ecntl = 0x2; /* ETHEREN */
>> @@ -473,6 +537,14 @@ fec_restart(struct net_device *ndev, int duplex)
>> /* Set MII speed */
>> writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
>>
>> + /* set RX checksum */
>> + val = readl(fep->hwp + FEC_RACC);
>> + if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
>> + val |= (FEC_RACC_IPDIS | FEC_RACC_PRODIS);
>> + else
>> + val &= ~(FEC_RACC_IPDIS | FEC_RACC_PRODIS);
>> + writel(val, fep->hwp + FEC_RACC);
>> +
>> /*
>> * The phy interface and speed need to get configured
>> * differently on enet-mac.
>> @@ -818,6 +890,19 @@ fec_enet_rx(struct net_device *ndev, int budget)
>> spin_unlock_irqrestore(&fep->tmreg_lock, flags);
>> }
>>
>> + if (fep->bufdesc_ex &&
>> + (fep->csum_flags & FLAG_RX_CSUM_ENABLED)) {
>> + struct bufdesc_ex *ebdp =
>> + (struct bufdesc_ex *)bdp;
>> + if (!(ebdp->cbd_esc & FLAG_RX_CSUM_ENABLED)) {
>> + /* don't check it */
>> + skb->ip_summed = CHECKSUM_UNNECESSARY;
>
> This looks very strange. Presumably the RX_ICE and RX_PCR flags
> indicate checksum errors, and therefore !(ebdp->cbd_esc &
> FLAG_RX_CSUM_ENABLED) means the checksum(s) are good? This would be
> clearer if you defined FLAG_RX_CSUM_ERROR as well (with the same numeric
> value).
>
Agreed, that would be clearer.
>> + } else {
>> + ndev->stats.rx_errors++;
>
> Layer 3 and 4 errors should not be counted in the net device stats.
>
Ok, are they just for Levls 1 and 2?
>> + skb_checksum_none_assert(skb);
>> + }
>> + }
>> +
>> if (!skb_defer_rx_timestamp(skb))
>> napi_gro_receive(&fep->napi, skb);
>> }
>> @@ -1439,6 +1524,9 @@ static int fec_enet_alloc_buffers(struct net_device *ndev)
>> if (fep->bufdesc_ex) {
>> struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
>> ebdp->cbd_esc = BD_ENET_TX_INT;
>> + if (ndev->features & NETIF_F_HW_CSUM)
>> + ebdp->cbd_esc |= BD_ENET_TX_PINS
>> + | BD_ENET_TX_IINS;
>> }
>>
>> bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex);
>> @@ -1457,6 +1545,7 @@ fec_enet_open(struct net_device *ndev)
>> struct fec_enet_private *fep = netdev_priv(ndev);
>> int ret;
>>
>> + ndev->features |= NETIF_F_GRO;
>
> Definitely don't do this. Let the networking core take care of feature
> configuration.
>
This is not required at this point.
>> napi_enable(&fep->napi);
>>
>> /* I should reset the ring buffers here, but I don't yet know
>> @@ -1618,6 +1707,44 @@ static void fec_poll_controller(struct net_device *dev)
>> }
>> #endif
>>
>> +static netdev_features_t fec_fix_features(struct net_device *netdev,
>> + netdev_features_t features)
>> +{
>> + return features;
>> +}
>
> Why?
No, this is not required.
>
>> +static int fec_set_features(struct net_device *netdev,
>> + netdev_features_t features)
>> +{
>> + struct fec_enet_private *fep = netdev_priv(netdev);
>> + netdev_features_t changed = features ^ netdev->features;
>> + bool restart_required = false;
>> +
>> + netdev->features = features;
>> +
>> + /* Receive checksum has been changed */
>> + if (changed & NETIF_F_GRO) {
>> + restart_required = true;
>> + if (features & NETIF_F_GRO)
>> + fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
>> + else
>> + fep->csum_flags &= ~FLAG_RX_CSUM_ENABLED;
>> + }
>
> RX checksum offload is independent of GRO.
>
That should be NETIF_F_RXCSUM.
>> + /* Restart the network interface */
>> + if (true == restart_required) {
>> + if (netif_running(netdev)) {
>> + fec_stop(netdev);
>> + fec_restart(netdev, fep->phy_dev->duplex);
>> + netif_wake_queue(netdev);
>> + } else {
>> + fec_restart(netdev, fep->phy_dev->duplex);
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static const struct net_device_ops fec_netdev_ops = {
>> .ndo_open = fec_enet_open,
>> .ndo_stop = fec_enet_close,
>> @@ -1631,6 +1758,8 @@ static const struct net_device_ops fec_netdev_ops = {
>> #ifdef CONFIG_NET_POLL_CONTROLLER
>> .ndo_poll_controller = fec_poll_controller,
>> #endif
>> + .ndo_fix_features = fec_fix_features,
>> + .ndo_set_features = fec_set_features,
>> };
>>
>> /*
>> @@ -1672,6 +1801,11 @@ static int fec_enet_init(struct net_device *ndev)
>> writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);
>> netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, FEC_NAPI_WEIGHT);
>>
>> + /* enable hw accelerator */
>> + ndev->features |= (NETIF_F_HW_CSUM | NETIF_F_GRO);
>> + ndev->hw_features |= (NETIF_F_HW_CSUM | NETIF_F_GRO);
>
> Now try passing IPv6 traffic. You mean NETIF_F_IP_CSUM, not
> NETIF_F_HW_CSUM.
>
> Also you mean NETIF_F_RX_CSUM, not NETIF_F_GRO.
>
I agree.
> Ben.
>
>> + fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
>> +
>> fec_restart(ndev, 0);
>>
>> return 0;
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists