lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9929d2390901140349v4483bd23wac254673de591bec@mail.gmail.com>
Date:	Wed, 14 Jan 2009 03:49:35 -0800
From:	"Jeff Kirsher" <jeffrey.t.kirsher@...el.com>
To:	"Herbert Xu" <herbert@...dor.apana.org.au>
Cc:	"David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: [2/2] igb: Replace LRO with GRO

On Tue, Jan 13, 2009 at 1:28 AM, Herbert Xu <herbert@...dor.apana.org.au> wrote:
> Hi:
>
> And here is the first real conversion.
>
> I can't test this one because I don't have the hardware.  However,
> GRO is a software feature and I have tested the various code paths
> using e1000e.
>
> igb: Replace LRO with GRO
>
> This patch makes igb invoke the GRO hooks instead of LRO.  As
> GRO has a compatible external interface to LRO this is a very
> straightforward replacement.
>
> Three things of note:
>
> 1) I've kept the LRO Kconfig option until we decide to enable
> GRO across the board at which point it can also be killed.
>
> 2) The poll_controller stuff is broken in igb as it tries to do
> the same work as the normal poll routine.  Since poll_controller
> can be called in the middle of a poll, this can't be good.
>
> I noticed this because poll_controller can invoke the GRO hooks
> without flushing held GRO packets.
>
> However, this should be harmless (assuming the poll_controller
> bug above doesn't kill you first :) since the next ->poll will
> clear the backlog.  The only time when we'll have a problem is
> if we're already executing the GRO code on the same ring, but
> that's no worse than what happens now.
>
> 3) I kept the ip_summed check before calling GRO so that we're
> on par with previous behaviour.
>
> Signed-off-by: Herbert Xu <herbert@...dor.apana.org.au>
>
> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> index 65afda4..cf4fea0 100644
> --- a/drivers/net/Kconfig
> +++ b/drivers/net/Kconfig
> @@ -2022,7 +2022,6 @@ config IGB
>  config IGB_LRO
>        bool "Use software LRO"
>        depends on IGB && INET
> -       select INET_LRO
>        ---help---
>          Say Y here if you want to use large receive offload.
>
> diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
> index 5a27825..7d8c887 100644
> --- a/drivers/net/igb/igb.h
> +++ b/drivers/net/igb/igb.h
> @@ -36,12 +36,6 @@
>
>  struct igb_adapter;
>
> -#ifdef CONFIG_IGB_LRO
> -#include <linux/inet_lro.h>
> -#define MAX_LRO_AGGR                      32
> -#define MAX_LRO_DESCRIPTORS                8
> -#endif
> -
>  /* Interrupt defines */
>  #define IGB_MIN_DYN_ITR 3000
>  #define IGB_MAX_DYN_ITR 96000
> @@ -176,10 +170,6 @@ struct igb_ring {
>                        struct napi_struct napi;
>                        int set_itr;
>                        struct igb_ring *buddy;
> -#ifdef CONFIG_IGB_LRO
> -                       struct net_lro_mgr lro_mgr;
> -                       bool lro_used;
> -#endif
>                };
>        };
>
> @@ -288,12 +278,6 @@ struct igb_adapter {
>        int need_ioport;
>
>        struct igb_ring *multi_tx_table[IGB_MAX_TX_QUEUES];
> -#ifdef CONFIG_IGB_LRO
> -       unsigned int lro_max_aggr;
> -       unsigned int lro_aggregated;
> -       unsigned int lro_flushed;
> -       unsigned int lro_no_desc;
> -#endif
>        unsigned int tx_ring_count;
>        unsigned int rx_ring_count;
>  };
> diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
> index 3c831f1..4606e63 100644
> --- a/drivers/net/igb/igb_ethtool.c
> +++ b/drivers/net/igb/igb_ethtool.c
> @@ -93,11 +93,6 @@ static const struct igb_stats igb_gstrings_stats[] = {
>        { "tx_smbus", IGB_STAT(stats.mgptc) },
>        { "rx_smbus", IGB_STAT(stats.mgprc) },
>        { "dropped_smbus", IGB_STAT(stats.mgpdc) },
> -#ifdef CONFIG_IGB_LRO
> -       { "lro_aggregated", IGB_STAT(lro_aggregated) },
> -       { "lro_flushed", IGB_STAT(lro_flushed) },
> -       { "lro_no_desc", IGB_STAT(lro_no_desc) },
> -#endif
>  };
>
>  #define IGB_QUEUE_STATS_LEN \
> @@ -1921,18 +1916,6 @@ static void igb_get_ethtool_stats(struct net_device *netdev,
>        int stat_count = sizeof(struct igb_queue_stats) / sizeof(u64);
>        int j;
>        int i;
> -#ifdef CONFIG_IGB_LRO
> -       int aggregated = 0, flushed = 0, no_desc = 0;
> -
> -       for (i = 0; i < adapter->num_rx_queues; i++) {
> -               aggregated += adapter->rx_ring[i].lro_mgr.stats.aggregated;
> -               flushed += adapter->rx_ring[i].lro_mgr.stats.flushed;
> -               no_desc += adapter->rx_ring[i].lro_mgr.stats.no_desc;
> -       }
> -       adapter->lro_aggregated = aggregated;
> -       adapter->lro_flushed = flushed;
> -       adapter->lro_no_desc = no_desc;
> -#endif
>
>        igb_update_stats(adapter);
>        for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
> diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
> index b82b0fb..ed3d881 100644
> --- a/drivers/net/igb/igb_main.c
> +++ b/drivers/net/igb/igb_main.c
> @@ -115,9 +115,6 @@ static bool igb_clean_tx_irq(struct igb_ring *);
>  static int igb_poll(struct napi_struct *, int);
>  static bool igb_clean_rx_irq_adv(struct igb_ring *, int *, int);
>  static void igb_alloc_rx_buffers_adv(struct igb_ring *, int);
> -#ifdef CONFIG_IGB_LRO
> -static int igb_get_skb_hdr(struct sk_buff *skb, void **, void **, u64 *, void *);
> -#endif
>  static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
>  static void igb_tx_timeout(struct net_device *);
>  static void igb_reset_task(struct work_struct *);
> @@ -1189,7 +1186,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
>        netdev->features |= NETIF_F_TSO6;
>
>  #ifdef CONFIG_IGB_LRO
> -       netdev->features |= NETIF_F_LRO;
> +       netdev->features |= NETIF_F_GRO;
>  #endif
>
>        netdev->vlan_features |= NETIF_F_TSO;
> @@ -1739,14 +1736,6 @@ int igb_setup_rx_resources(struct igb_adapter *adapter,
>        struct pci_dev *pdev = adapter->pdev;
>        int size, desc_len;
>
> -#ifdef CONFIG_IGB_LRO
> -       size = sizeof(struct net_lro_desc) * MAX_LRO_DESCRIPTORS;
> -       rx_ring->lro_mgr.lro_arr = vmalloc(size);
> -       if (!rx_ring->lro_mgr.lro_arr)
> -               goto err;
> -       memset(rx_ring->lro_mgr.lro_arr, 0, size);
> -#endif
> -
>        size = sizeof(struct igb_buffer) * rx_ring->count;
>        rx_ring->buffer_info = vmalloc(size);
>        if (!rx_ring->buffer_info)
> @@ -1773,10 +1762,6 @@ int igb_setup_rx_resources(struct igb_adapter *adapter,
>        return 0;
>
>  err:
> -#ifdef CONFIG_IGB_LRO
> -       vfree(rx_ring->lro_mgr.lro_arr);
> -       rx_ring->lro_mgr.lro_arr = NULL;
> -#endif
>        vfree(rx_ring->buffer_info);
>        dev_err(&adapter->pdev->dev, "Unable to allocate memory for "
>                "the receive descriptor ring\n");
> @@ -1930,16 +1915,6 @@ static void igb_configure_rx(struct igb_adapter *adapter)
>                rxdctl |= IGB_RX_HTHRESH << 8;
>                rxdctl |= IGB_RX_WTHRESH << 16;
>                wr32(E1000_RXDCTL(j), rxdctl);
> -#ifdef CONFIG_IGB_LRO
> -               /* Intitial LRO Settings */
> -               ring->lro_mgr.max_aggr = MAX_LRO_AGGR;
> -               ring->lro_mgr.max_desc = MAX_LRO_DESCRIPTORS;
> -               ring->lro_mgr.get_skb_header = igb_get_skb_hdr;
> -               ring->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
> -               ring->lro_mgr.dev = adapter->netdev;
> -               ring->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
> -               ring->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
> -#endif
>        }
>
>        if (adapter->num_rx_queues > 1) {
> @@ -2128,11 +2103,6 @@ void igb_free_rx_resources(struct igb_ring *rx_ring)
>        vfree(rx_ring->buffer_info);
>        rx_ring->buffer_info = NULL;
>
> -#ifdef CONFIG_IGB_LRO
> -       vfree(rx_ring->lro_mgr.lro_arr);
> -       rx_ring->lro_mgr.lro_arr = NULL;
> -#endif
> -
>        pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma);
>
>        rx_ring->desc = NULL;
> @@ -3768,39 +3738,6 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
>        return (count < tx_ring->count);
>  }
>
> -#ifdef CONFIG_IGB_LRO
> - /**
> - * igb_get_skb_hdr - helper function for LRO header processing
> - * @skb: pointer to sk_buff to be added to LRO packet
> - * @iphdr: pointer to ip header structure
> - * @tcph: pointer to tcp header structure
> - * @hdr_flags: pointer to header flags
> - * @priv: pointer to the receive descriptor for the current sk_buff
> - **/
> -static int igb_get_skb_hdr(struct sk_buff *skb, void **iphdr, void **tcph,
> -                           u64 *hdr_flags, void *priv)
> -{
> -       union e1000_adv_rx_desc *rx_desc = priv;
> -       u16 pkt_type = rx_desc->wb.lower.lo_dword.pkt_info &
> -                      (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP);
> -
> -       /* Verify that this is a valid IPv4 TCP packet */
> -       if (pkt_type != (E1000_RXDADV_PKTTYPE_IPV4 |
> -                         E1000_RXDADV_PKTTYPE_TCP))
> -               return -1;
> -
> -       /* Set network headers */
> -       skb_reset_network_header(skb);
> -       skb_set_transport_header(skb, ip_hdrlen(skb));
> -       *iphdr = ip_hdr(skb);
> -       *tcph = tcp_hdr(skb);
> -       *hdr_flags = LRO_IPV4 | LRO_TCP;
> -
> -       return 0;
> -
> -}
> -#endif /* CONFIG_IGB_LRO */
> -
>  /**
>  * igb_receive_skb - helper function to handle rx indications
>  * @ring: pointer to receive ring receving this packet
> @@ -3815,28 +3752,20 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status,
>        struct igb_adapter * adapter = ring->adapter;
>        bool vlan_extracted = (adapter->vlgrp && (status & E1000_RXD_STAT_VP));
>
> -#ifdef CONFIG_IGB_LRO
> -       if (adapter->netdev->features & NETIF_F_LRO &&
> -           skb->ip_summed == CHECKSUM_UNNECESSARY) {
> +       if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
>                if (vlan_extracted)
> -                       lro_vlan_hwaccel_receive_skb(&ring->lro_mgr, skb,
> -                                          adapter->vlgrp,
> -                                          le16_to_cpu(rx_desc->wb.upper.vlan),
> -                                          rx_desc);
> +                       vlan_gro_receive(&ring->napi, adapter->vlgrp,
> +                                        le16_to_cpu(rx_desc->wb.upper.vlan),
> +                                        skb);
>                else
> -                       lro_receive_skb(&ring->lro_mgr,skb, rx_desc);
> -               ring->lro_used = 1;
> +                       napi_gro_receive(&ring->napi, skb);
>        } else {
> -#endif
>                if (vlan_extracted)
>                        vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
>                                          le16_to_cpu(rx_desc->wb.upper.vlan));
>                else
> -
>                        netif_receive_skb(skb);
> -#ifdef CONFIG_IGB_LRO
>        }
> -#endif
>  }
>
>
> @@ -3991,13 +3920,6 @@ next_desc:
>        rx_ring->next_to_clean = i;
>        cleaned_count = IGB_DESC_UNUSED(rx_ring);
>
> -#ifdef CONFIG_IGB_LRO
> -       if (rx_ring->lro_used) {
> -               lro_flush_all(&rx_ring->lro_mgr);
> -               rx_ring->lro_used = 0;
> -       }
> -#endif
> -
>        if (cleaned_count)
>                igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
>
> Cheers,
> --
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@...dor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Ok, a couple of things...
First is that we should have the patch in testing (most likely today,
do not worry dave, it won't take a week)
Second is I am not sure you need to keep this code wraped in CONFIG_IGB_LRO...

  #ifdef CONFIG_IGB_LRO
 -       netdev->features |= NETIF_F_LRO;
 +       netdev->features |= NETIF_F_GRO;
  #endif

Also, you left part of the lro code in igb_receive_skb and then put in
the GRO code.  Our preference is that you mirrored what you did with
e1000e and just replaced the main vlan_hwacel and netif_receive_skb
calls.

-- 
Cheers,
Jeff
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ