netdev - Re: [PATCH v5 10/17] net: sgi: ioc3-eth: rework skb rx handling

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190819165522.451f2ea2@cakuba.netronome.com>
Date:   Mon, 19 Aug 2019 16:55:22 -0700
From:   Jakub Kicinski <jakub.kicinski@...ronome.com>
To:     Thomas Bogendoerfer <tbogendoerfer@...e.de>
Cc:     Jonathan Corbet <corbet@....net>,
        Ralf Baechle <ralf@...ux-mips.org>,
        Paul Burton <paul.burton@...s.com>,
        James Hogan <jhogan@...nel.org>,
        Dmitry Torokhov <dmitry.torokhov@...il.com>,
        Lee Jones <lee.jones@...aro.org>,
        "David S. Miller" <davem@...emloft.net>,
        Srinivas Kandagatla <srinivas.kandagatla@...aro.org>,
        Alessandro Zummo <a.zummo@...ertech.it>,
        Alexandre Belloni <alexandre.belloni@...tlin.com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Jiri Slaby <jslaby@...e.com>,
        Evgeniy Polyakov <zbr@...emap.net>, linux-doc@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-mips@...r.kernel.org,
        linux-input@...r.kernel.org, netdev@...r.kernel.org,
        linux-rtc@...r.kernel.org, linux-serial@...r.kernel.org
Subject: Re: [PATCH v5 10/17] net: sgi: ioc3-eth: rework skb rx handling

On Mon, 19 Aug 2019 18:31:33 +0200, Thomas Bogendoerfer wrote:
> Buffers alloacted by alloc_skb() are already cache aligned so there
> is no need for an extra align done by ioc3_alloc_skb. And instead
> of skb_put/skb_trim simply use one skb_put after frame size is known
> during receive.
> 
> Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@...e.de>
> ---
>  drivers/net/ethernet/sgi/ioc3-eth.c | 50 ++++++++-----------------------------
>  1 file changed, 11 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
> index c875640926d6..d862f28887f9 100644
> --- a/drivers/net/ethernet/sgi/ioc3-eth.c
> +++ b/drivers/net/ethernet/sgi/ioc3-eth.c
> @@ -11,7 +11,6 @@
>   *
>   * To do:
>   *
> - *  o Handle allocation failures in ioc3_alloc_skb() more gracefully.
>   *  o Handle allocation failures in ioc3_init_rings().
>   *  o Use prefetching for large packets.  What is a good lower limit for
>   *    prefetching?
> @@ -72,6 +71,12 @@
>  #define TX_RING_ENTRIES		128
>  #define TX_RING_MASK		(TX_RING_ENTRIES - 1)
>  
> +/* BEWARE: The IOC3 documentation documents the size of rx buffers as
> + * 1644 while it's actually 1664.  This one was nasty to track down...
> + */
> +#define RX_OFFSET		10
> +#define RX_BUF_SIZE		1664
> +
>  #define ETCSR_FD   ((17 << ETCSR_IPGR2_SHIFT) | (11 << ETCSR_IPGR1_SHIFT) | 21)
>  #define ETCSR_HD   ((21 << ETCSR_IPGR2_SHIFT) | (21 << ETCSR_IPGR1_SHIFT) | 21)
>  
> @@ -111,31 +116,6 @@ static void ioc3_init(struct net_device *dev);
>  static const char ioc3_str[] = "IOC3 Ethernet";
>  static const struct ethtool_ops ioc3_ethtool_ops;
>  
> -/* We use this to acquire receive skb's that we can DMA directly into. */
> -
> -#define IOC3_CACHELINE	128UL

Is the cache line on the platform this driver works on 128B?
This looks like a DMA engine alignment requirement, more than an
optimization.

The comment in __alloc_skb() says:

	/* We do our best to align skb_shared_info on a separate cache
	 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
	 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
	 * Both skb->head and skb_shared_info are cache line aligned.
	 */

note the "unless".

> -static inline unsigned long aligned_rx_skb_addr(unsigned long addr)
> -{
> -	return (~addr + 1) & (IOC3_CACHELINE - 1UL);
> -}
> -
> -static inline struct sk_buff *ioc3_alloc_skb(unsigned long length,
> -					     unsigned int gfp_mask)
> -{
> -	struct sk_buff *skb;
> -
> -	skb = alloc_skb(length + IOC3_CACHELINE - 1, gfp_mask);
> -	if (likely(skb)) {
> -		int offset = aligned_rx_skb_addr((unsigned long)skb->data);
> -
> -		if (offset)
> -			skb_reserve(skb, offset);
> -	}
> -
> -	return skb;
> -}
> -
>  static inline unsigned long ioc3_map(void *ptr, unsigned long vdev)
>  {
>  #ifdef CONFIG_SGI_IP27
> @@ -148,12 +128,6 @@ static inline unsigned long ioc3_map(void *ptr, unsigned long vdev)
>  #endif
>  }
>  
> -/* BEWARE: The IOC3 documentation documents the size of rx buffers as
> - * 1644 while it's actually 1664.  This one was nasty to track down ...
> - */
> -#define RX_OFFSET		10
> -#define RX_BUF_ALLOC_SIZE	(1664 + RX_OFFSET + IOC3_CACHELINE)
> -
>  #define IOC3_SIZE 0x100000
>  
>  static inline u32 mcr_pack(u32 pulse, u32 sample)
> @@ -534,10 +508,10 @@ static inline void ioc3_rx(struct net_device *dev)
>  		err = be32_to_cpu(rxb->err);		/* It's valid ...  */
>  		if (err & ERXBUF_GOODPKT) {
>  			len = ((w0 >> ERXBUF_BYTECNT_SHIFT) & 0x7ff) - 4;
> -			skb_trim(skb, len);
> +			skb_put(skb, len);
>  			skb->protocol = eth_type_trans(skb, dev);
>  
> -			new_skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
> +			new_skb = alloc_skb(RX_BUF_SIZE, GFP_ATOMIC);
>  			if (!new_skb) {
>  				/* Ouch, drop packet and just recycle packet
>  				 * to keep the ring filled.
> @@ -546,6 +520,7 @@ static inline void ioc3_rx(struct net_device *dev)
>  				new_skb = skb;
>  				goto next;
>  			}
> +			new_skb->dev = dev;

Assigning dev pointer seems unrelated to the rest of the patch?

>  			if (likely(dev->features & NETIF_F_RXCSUM))
>  				ioc3_tcpudp_checksum(skb,
> @@ -556,8 +531,6 @@ static inline void ioc3_rx(struct net_device *dev)
>  
>  			ip->rx_skbs[rx_entry] = NULL;	/* Poison  */
>  
> -			/* Because we reserve afterwards. */
> -			skb_put(new_skb, (1664 + RX_OFFSET));
>  			rxb = (struct ioc3_erxbuf *)new_skb->data;
>  			skb_reserve(new_skb, RX_OFFSET);
>  
> @@ -846,16 +819,15 @@ static void ioc3_alloc_rings(struct net_device *dev)
>  		for (i = 0; i < RX_BUFFS; i++) {
>  			struct sk_buff *skb;
>  
> -			skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
> +			skb = alloc_skb(RX_BUF_SIZE, GFP_ATOMIC);
>  			if (!skb) {
>  				show_free_areas(0, NULL);
>  				continue;
>  			}
> +			skb->dev = dev;
>  
>  			ip->rx_skbs[i] = skb;
>  
> -			/* Because we reserve afterwards. */
> -			skb_put(skb, (1664 + RX_OFFSET));
>  			rxb = (struct ioc3_erxbuf *)skb->data;
>  			rxr[i] = cpu_to_be64(ioc3_map(rxb, 1));
>  			skb_reserve(skb, RX_OFFSET);