lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1505221489-12870-1-git-send-email-horms+renesas@verge.net.au>
Date:   Tue, 12 Sep 2017 15:04:49 +0200
From:   Simon Horman <horms+renesas@...ge.net.au>
To:     David Miller <davem@...emloft.net>,
        Sergei Shtylyov <sergei.shtylyov@...entembedded.com>
Cc:     Magnus Damm <magnus.damm@...il.com>, netdev@...r.kernel.org,
        linux-renesas-soc@...r.kernel.org,
        Simon Horman <horms+renesas@...ge.net.au>
Subject: [PATCH/RFC net-next] ravb: RX checksum offload

Add support for RX checksum offload. This is enabled by default and
may be disabled and re-enabled using ethtool:

 # ethtool -K eth0 rx off
 # ethtool -K eth0 rx on

The RAVB provides a simple checksumming scheme which appears to be
completely compatible with CHECKSUM_COMPLETE: a 1's complement sum of
all packet data after the L2 header is appended to packet data; this may
be trivially read by the driver and used to update the skb accordingly.

In terms of performance throughput is close to gigabit line-rate both with
and without RX checksum offload enabled. Perf output, however, appears to
indicate that significantly less time is spent in do_csum(). This is as
expected.

Test results with RX checksum offload enabled:
 # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 10.4.3.162
 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 () port 0 AF_INET : demo
 enable_enobufs failed: getprotobyname
 Recv   Send    Send
 Socket Socket  Message  Elapsed
 Size   Size    Size     Time     Throughput
 bytes  bytes   bytes    secs.    10^6bits/sec

  87380  16384  16384    10.00     938.78
 [ perf record: Woken up 14 times to write data ]
 [ perf record: Captured and wrote 3.524 MB /run/perf.data (~153957 samples) ]

 Summary of output of perf report:
    19.49%      ksoftirqd/0  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
     9.88%      ksoftirqd/0  [kernel.kallsyms]  [k] __pi_memcpy
     7.33%      ksoftirqd/0  [kernel.kallsyms]  [k] skb_put
     7.00%      ksoftirqd/0  [kernel.kallsyms]  [k] ravb_poll
     3.89%      ksoftirqd/0  [kernel.kallsyms]  [k] dev_gro_receive
     3.65%          netperf  [kernel.kallsyms]  [k] __arch_copy_to_user
     3.43%          swapper  [kernel.kallsyms]  [k] arch_cpu_idle
     2.77%          swapper  [kernel.kallsyms]  [k] tick_nohz_idle_enter
     1.85%      ksoftirqd/0  [kernel.kallsyms]  [k] __netdev_alloc_skb
     1.80%          swapper  [kernel.kallsyms]  [k] _raw_spin_unlock_irq
     1.64%      ksoftirqd/0  [kernel.kallsyms]  [k] __slab_alloc.isra.79
     1.62%      ksoftirqd/0  [kernel.kallsyms]  [k] __pi___inval_cache_range

Test results without RX checksum offload enabled:
 # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 10.4.3.162
 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 () port 0 AF_INET : demo
 enable_enobufs failed: getprotobyname
 Recv   Send    Send
 Socket Socket  Message  Elapsed
 Size   Size    Size     Time     Throughput
 bytes  bytes   bytes    secs.    10^6bits/sec

  87380  16384  16384    10.00     941.09
 [ perf record: Woken up 14 times to write data ]
 [ perf record: Captured and wrote 3.411 MB /run/perf.data (~149040 samples) ]

 Summary of output of perf report:
   17.50%    ksoftirqd/0  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
    10.60%    ksoftirqd/0  [kernel.kallsyms]  [k] __pi_memcpy
     7.91%    ksoftirqd/0  [kernel.kallsyms]  [k] skb_put
     6.95%    ksoftirqd/0  [kernel.kallsyms]  [k] do_csum
     6.22%    ksoftirqd/0  [kernel.kallsyms]  [k] ravb_poll
     3.84%    ksoftirqd/0  [kernel.kallsyms]  [k] dev_gro_receive
     2.53%        netperf  [kernel.kallsyms]  [k] __arch_copy_to_user
     2.53%        swapper  [kernel.kallsyms]  [k] arch_cpu_idle
     2.27%        swapper  [kernel.kallsyms]  [k] tick_nohz_idle_enter
     1.90%    ksoftirqd/0  [kernel.kallsyms]  [k] __pi___inval_cache_range
     1.90%    ksoftirqd/0  [kernel.kallsyms]  [k] __netdev_alloc_skb
     1.52%    ksoftirqd/0  [kernel.kallsyms]  [k] __slab_alloc.isra.79

Above results collected on an R-Car Gen 3 Salvator-X/r8a7796 ES1.0.
Also tested on a R-Car Gen 3 Salvator-X/r8a7795 ES1.0.

By inspection this also appears to be compatible with the ravb found
on R-Car Gen 2 SoCs, however, this patch is currently untested on such
hardware.

Signed-off-by: Simon Horman <horms+renesas@...ge.net.au>
---
 drivers/net/ethernet/renesas/ravb_main.c | 58 +++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index fdf30bfa403b..7c6438cd7de7 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -403,8 +403,9 @@ static void ravb_emac_init(struct net_device *ndev)
 	/* Receive frame limit set register */
 	ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
 
-	/* PAUSE prohibition */
+	/* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
 	ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) |
+		   (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) |
 		   ECMR_TE | ECMR_RE, ECMR);
 
 	ravb_set_rate(ndev);
@@ -520,6 +521,19 @@ static void ravb_get_tx_tstamp(struct net_device *ndev)
 	}
 }
 
+static void ravb_rx_csum(struct sk_buff *skb)
+{
+	u8 *hw_csum;
+
+	/* The hardware checksum is 2 bytes appended to packet data */
+	if (unlikely(skb->len < 2))
+		return;
+	hw_csum = skb_tail_pointer(skb) - 2;
+	skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+	skb->ip_summed = CHECKSUM_COMPLETE;
+	skb_trim(skb, skb->len - 2);
+}
+
 /* Packet receive function for Ethernet AVB */
 static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 {
@@ -587,8 +601,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 				ts.tv_nsec = le32_to_cpu(desc->ts_n);
 				shhwtstamps->hwtstamp = timespec64_to_ktime(ts);
 			}
+
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, ndev);
+			if (ndev->features & NETIF_F_RXCSUM)
+				ravb_rx_csum(skb);
 			napi_gro_receive(&priv->napi[q], skb);
 			stats->rx_packets++;
 			stats->rx_bytes += pkt_len;
@@ -1842,6 +1859,41 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 	return phy_mii_ioctl(phydev, req, cmd);
 }
 
+static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	/* Disable TX and RX */
+	ravb_rcv_snd_disable(ndev);
+
+	/* Modify RX Checksum setting */
+	if (enable)
+		ravb_modify(ndev, ECMR, 0, ECMR_RCSC);
+	else
+		ravb_modify(ndev, ECMR, ECMR_RCSC, 0);
+
+	/* Enable TX and RX */
+	ravb_rcv_snd_enable(ndev);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ravb_set_features(struct net_device *ndev,
+			     netdev_features_t features)
+{
+	netdev_features_t changed = ndev->features ^ features;
+
+	if (changed & NETIF_F_RXCSUM)
+		ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM);
+
+	ndev->features = features;
+
+	return 0;
+}
+
 static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_open		= ravb_open,
 	.ndo_stop		= ravb_close,
@@ -1853,6 +1905,7 @@ static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_do_ioctl		= ravb_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_features	= ravb_set_features,
 };
 
 /* MDIO bus init function */
@@ -2004,6 +2057,9 @@ static int ravb_probe(struct platform_device *pdev)
 	if (!ndev)
 		return -ENOMEM;
 
+	ndev->features |= NETIF_F_RXCSUM;
+	ndev->hw_features |= ndev->features;
+
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
-- 
2.1.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ