netdev - Re: [PATCH] Signed-off-by: Grant Likely <grant.likely@...retlab.ca>

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <fa686aa40912041324t20c3d32dy3260ed313a205a93@mail.gmail.com>
Date:	Fri, 4 Dec 2009 14:24:03 -0700
From:	Grant Likely <grant.likely@...retlab.ca>
To:	netdev@...r.kernel.org, linuxppc-dev@...abs.org,
	"David S. Miller" <davem@...emloft.net>
Cc:	Grant Likely <grant.likely@...retlab.ca>,
	Asier Llano <a.llano@....es>
Subject: Re: [PATCH] Signed-off-by: Grant Likely <grant.likely@...retlab.ca>

Oops, sorry about the messed up subject.  that was sloppy.  Real
subject should be:

net/mpc5200: Fix locking on fec_mpc52xx driver

On Fri, Dec 4, 2009 at 2:20 PM, Grant Likely <grant.likely@...retlab.ca> wrote:
> From: Asier Llano Palacios <asierllano@...il.com>
>
> net/mpc5200: Fix locking on fec_mpc52xx driver
>
> Fix the locking scheme on the fec_mpc52xx driver.  This device can
> receive IRQs from three sources; the FEC itself, the tx DMA, and the
> rx DMA.  Mutual exclusion was handled by taking a spin_lock() in the
> critical regions, but because the handlers are run with IRQs enabled,
> spin_lock() is insufficient and the driver can end up interrupting
> a critical region anyway from another IRQ.
>
> Asier Llano discovered that this occurs when an error IRQ is raised
> in the middle of handling rx irqs which resulted in an sk_buff memory
> leak.
>
> In addition, locking is spotty at best in the driver and inspection
> revealed quite a few places with insufficient locking.
>
> This patch is based on Asier's initial work, but reworks a number of
> things so that locks are held for as short a time as possible, so
> that spin_lock_irqsave() is used everywhere, and so the locks are
> dropped when calling into the network stack (because the lock only
> protects the hardware interface; not the network stack).
>
> Boot tested on a lite5200 with an NFS root.  Has not been performance
> tested.
>
> Signed-off-by: Asier Llano <a.llano@....es>
> Signed-off-by: Grant Likely <grant.likely@...retlab.ca>
> ---
>
> Asier, can you please test this?  It took me a while to respond to
> your initial post because I was concerned about some of the latency
> issues, and I was concerned about disabling IRQs for long periods in
> the RX handler.  I think it should be good now, but it needs testing.
>
> Cheers,
> g.
>
>  drivers/net/fec_mpc52xx.c |  121 +++++++++++++++++++++++----------------------
>  1 files changed, 62 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
> index 66dace6..4889b4d 100644
> --- a/drivers/net/fec_mpc52xx.c
> +++ b/drivers/net/fec_mpc52xx.c
> @@ -85,11 +85,15 @@ MODULE_PARM_DESC(debug, "debugging messages level");
>
>  static void mpc52xx_fec_tx_timeout(struct net_device *dev)
>  {
> +       struct mpc52xx_fec_priv *priv = netdev_priv(dev);
> +       unsigned long flags;
> +
>        dev_warn(&dev->dev, "transmit timed out\n");
>
> +       spin_lock_irqsave(&priv->lock, flags);
>        mpc52xx_fec_reset(dev);
> -
>        dev->stats.tx_errors++;
> +       spin_unlock_irqrestore(&priv->lock, flags);
>
>        netif_wake_queue(dev);
>  }
> @@ -135,28 +139,32 @@ static void mpc52xx_fec_free_rx_buffers(struct net_device *dev, struct bcom_task
>        }
>  }
>
> +static void
> +mpc52xx_fec_rx_submit(struct net_device *dev, struct sk_buff *rskb)
> +{
> +       struct mpc52xx_fec_priv *priv = netdev_priv(dev);
> +       struct bcom_fec_bd *bd;
> +
> +       bd = (struct bcom_fec_bd *) bcom_prepare_next_buffer(priv->rx_dmatsk);
> +       bd->status = FEC_RX_BUFFER_SIZE;
> +       bd->skb_pa = dma_map_single(dev->dev.parent, rskb->data,
> +                                   FEC_RX_BUFFER_SIZE, DMA_FROM_DEVICE);
> +       bcom_submit_next_buffer(priv->rx_dmatsk, rskb);
> +}
> +
>  static int mpc52xx_fec_alloc_rx_buffers(struct net_device *dev, struct bcom_task *rxtsk)
>  {
> -       while (!bcom_queue_full(rxtsk)) {
> -               struct sk_buff *skb;
> -               struct bcom_fec_bd *bd;
> +       struct sk_buff *skb;
>
> +       while (!bcom_queue_full(rxtsk)) {
>                skb = dev_alloc_skb(FEC_RX_BUFFER_SIZE);
> -               if (skb == NULL)
> +               if (!skb)
>                        return -EAGAIN;
>
>                /* zero out the initial receive buffers to aid debugging */
>                memset(skb->data, 0, FEC_RX_BUFFER_SIZE);
> -
> -               bd = (struct bcom_fec_bd *)bcom_prepare_next_buffer(rxtsk);
> -
> -               bd->status = FEC_RX_BUFFER_SIZE;
> -               bd->skb_pa = dma_map_single(dev->dev.parent, skb->data,
> -                               FEC_RX_BUFFER_SIZE, DMA_FROM_DEVICE);
> -
> -               bcom_submit_next_buffer(rxtsk, skb);
> +               mpc52xx_fec_rx_submit(dev, skb);
>        }
> -
>        return 0;
>  }
>
> @@ -328,13 +336,12 @@ static int mpc52xx_fec_start_xmit(struct sk_buff *skb, struct net_device *dev)
>                                    DMA_TO_DEVICE);
>
>        bcom_submit_next_buffer(priv->tx_dmatsk, skb);
> +       spin_unlock_irqrestore(&priv->lock, flags);
>
>        if (bcom_queue_full(priv->tx_dmatsk)) {
>                netif_stop_queue(dev);
>        }
>
> -       spin_unlock_irqrestore(&priv->lock, flags);
> -
>        return NETDEV_TX_OK;
>  }
>
> @@ -359,9 +366,9 @@ static irqreturn_t mpc52xx_fec_tx_interrupt(int irq, void *dev_id)
>  {
>        struct net_device *dev = dev_id;
>        struct mpc52xx_fec_priv *priv = netdev_priv(dev);
> +       unsigned long flags;
>
> -       spin_lock(&priv->lock);
> -
> +       spin_lock_irqsave(&priv->lock, flags);
>        while (bcom_buffer_done(priv->tx_dmatsk)) {
>                struct sk_buff *skb;
>                struct bcom_fec_bd *bd;
> @@ -372,11 +379,10 @@ static irqreturn_t mpc52xx_fec_tx_interrupt(int irq, void *dev_id)
>
>                dev_kfree_skb_irq(skb);
>        }
> +       spin_unlock_irqrestore(&priv->lock, flags);
>
>        netif_wake_queue(dev);
>
> -       spin_unlock(&priv->lock);
> -
>        return IRQ_HANDLED;
>  }
>
> @@ -384,67 +390,60 @@ static irqreturn_t mpc52xx_fec_rx_interrupt(int irq, void *dev_id)
>  {
>        struct net_device *dev = dev_id;
>        struct mpc52xx_fec_priv *priv = netdev_priv(dev);
> +       struct sk_buff *rskb; /* received sk_buff */
> +       struct sk_buff *skb;  /* new sk_buff to enqueue in its place */
> +       struct bcom_fec_bd *bd;
> +       u32 status, physaddr;
> +       int length;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&priv->lock, flags);
>
>        while (bcom_buffer_done(priv->rx_dmatsk)) {
> -               struct sk_buff *skb;
> -               struct sk_buff *rskb;
> -               struct bcom_fec_bd *bd;
> -               u32 status;
>
>                rskb = bcom_retrieve_buffer(priv->rx_dmatsk, &status,
> -                               (struct bcom_bd **)&bd);
> -               dma_unmap_single(dev->dev.parent, bd->skb_pa, rskb->len,
> -                                DMA_FROM_DEVICE);
> +                                           (struct bcom_bd **)&bd);
> +               physaddr = bd->skb_pa;
>
>                /* Test for errors in received frame */
>                if (status & BCOM_FEC_RX_BD_ERRORS) {
>                        /* Drop packet and reuse the buffer */
> -                       bd = (struct bcom_fec_bd *)
> -                               bcom_prepare_next_buffer(priv->rx_dmatsk);
> -
> -                       bd->status = FEC_RX_BUFFER_SIZE;
> -                       bd->skb_pa = dma_map_single(dev->dev.parent,
> -                                       rskb->data,
> -                                       FEC_RX_BUFFER_SIZE, DMA_FROM_DEVICE);
> -
> -                       bcom_submit_next_buffer(priv->rx_dmatsk, rskb);
> -
> +                       mpc52xx_fec_rx_submit(dev, rskb);
>                        dev->stats.rx_dropped++;
> -
>                        continue;
>                }
>
>                /* skbs are allocated on open, so now we allocate a new one,
>                 * and remove the old (with the packet) */
>                skb = dev_alloc_skb(FEC_RX_BUFFER_SIZE);
> -               if (skb) {
> -                       /* Process the received skb */
> -                       int length = status & BCOM_FEC_RX_BD_LEN_MASK;
> -
> -                       skb_put(rskb, length - 4);      /* length without CRC32 */
> -
> -                       rskb->dev = dev;
> -                       rskb->protocol = eth_type_trans(rskb, dev);
> -
> -                       netif_rx(rskb);
> -               } else {
> +               if (!skb) {
>                        /* Can't get a new one : reuse the same & drop pkt */
> -                       dev_notice(&dev->dev, "Memory squeeze, dropping packet.\n");
> +                       dev_notice(&dev->dev, "Low memory - dropped packet.\n");
> +                       mpc52xx_fec_rx_submit(dev, rskb);
>                        dev->stats.rx_dropped++;
> -
> -                       skb = rskb;
> +                       continue;
>                }
>
> -               bd = (struct bcom_fec_bd *)
> -                       bcom_prepare_next_buffer(priv->rx_dmatsk);
> +               /* Enqueue the new sk_buff back on the hardware */
> +               mpc52xx_fec_rx_submit(dev, skb);
>
> -               bd->status = FEC_RX_BUFFER_SIZE;
> -               bd->skb_pa = dma_map_single(dev->dev.parent, skb->data,
> -                               FEC_RX_BUFFER_SIZE, DMA_FROM_DEVICE);
> +               /* Process the received skb - Drop the spin lock while
> +                * calling into the network stack */
> +               spin_unlock_irqrestore(&priv->lock, flags);
>
> -               bcom_submit_next_buffer(priv->rx_dmatsk, skb);
> +               dma_unmap_single(dev->dev.parent, physaddr, rskb->len,
> +                                DMA_FROM_DEVICE);
> +               length = status & BCOM_FEC_RX_BD_LEN_MASK;
> +               skb_put(rskb, length - 4);      /* length without CRC32 */
> +               rskb->dev = dev;
> +               rskb->protocol = eth_type_trans(rskb, dev);
> +               netif_rx(rskb);
> +
> +               spin_lock_irqsave(&priv->lock, flags);
>        }
>
> +       spin_unlock_irqrestore(&priv->lock, flags);
> +
>        return IRQ_HANDLED;
>  }
>
> @@ -454,6 +453,7 @@ static irqreturn_t mpc52xx_fec_interrupt(int irq, void *dev_id)
>        struct mpc52xx_fec_priv *priv = netdev_priv(dev);
>        struct mpc52xx_fec __iomem *fec = priv->fec;
>        u32 ievent;
> +       unsigned long flags;
>
>        ievent = in_be32(&fec->ievent);
>
> @@ -471,9 +471,10 @@ static irqreturn_t mpc52xx_fec_interrupt(int irq, void *dev_id)
>                if (net_ratelimit() && (ievent & FEC_IEVENT_XFIFO_ERROR))
>                        dev_warn(&dev->dev, "FEC_IEVENT_XFIFO_ERROR\n");
>
> +               spin_lock_irqsave(&priv->lock, flags);
>                mpc52xx_fec_reset(dev);
> +               spin_unlock_irqrestore(&priv->lock, flags);
>
> -               netif_wake_queue(dev);
>                return IRQ_HANDLED;
>        }
>
> @@ -768,6 +769,8 @@ static void mpc52xx_fec_reset(struct net_device *dev)
>        bcom_enable(priv->tx_dmatsk);
>
>        mpc52xx_fec_start(dev);
> +
> +       netif_wake_queue(dev);
>  }
>
>
>
>



-- 
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html