lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTik4aWREeblhnbI3DU7a_CE11Rx0GvXi56O2yHzy@mail.gmail.com>
Date:	Fri, 4 Jun 2010 11:29:44 +0800
From:	Sonic Zhang <sonic.adi@...il.com>
To:	David Miller <davem@...emloft.net>
Cc:	netdev <netdev@...r.kernel.org>,
	uclinux-dist-devel <uclinux-dist-devel@...ckfin.uclinux.org>
Subject: Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as 
	possible after transfer

David,

Any comments?

Thanks

Sonic

On Thu, Jun 3, 2010 at 11:48 AM, sonic zhang <sonic.adi@...il.com> wrote:
> >From 40560ae9e8db42e2d2259b791ace160534c9a0f2 Mon Sep 17 00:00:00 2001
> From: Sonic Zhang <sonic.zhang@...log.com>
> Date: Thu, 3 Jun 2010 11:44:33 +0800
> Subject: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
>
> SKBs hold onto resources that can't be held indefinitely, such as TCP
> socket references and netfilter conntrack state.  So if a packet is left
> in TX ring for a long time, there might be a TCP socket that cannot be
> closed and freed up.
>
> Current blackfin EMAC driver always reclaim and free used tx skbs in future
> transfers. The problem is that future transfer may not come as soon as
> possible. This patch start a timer after transfer to reclaim and free skb.
> There is nearly no performance drop with this patch.
>
> TX interrupt is not enabled for 2 reasons:
>
> 1) If Blackfin EMAC TX transfer control is turned on, endless TX
> interrupts are triggered no matter if TX DMA is enabled. Since DMA walks
> down the ring automatically, TX transfer control can't be turned off in the
> middle. The only way is to disable TX interrupt completely.
>
> 2) skb can not be freed from interrupt context. A work queue or tasklet
> has to be created, which introduce more overhead than timer only solution.
>
> Signed-off-by: Sonic Zhang <sonic.zhang@...log.com>
> ---
>  drivers/net/bfin_mac.c |  114 ++++++++++++++++++++++++++++++------------------
>  drivers/net/bfin_mac.h |    5 ++
>  2 files changed, 77 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
> index 368f333..6f0755f 100644
> --- a/drivers/net/bfin_mac.c
> +++ b/drivers/net/bfin_mac.c
> @@ -922,61 +922,73 @@ static void bfin_mac_hwtstamp_init(struct net_device *netdev)
>  # define bfin_tx_hwtstamp(dev, skb)
>  #endif
>
> -static void adjust_tx_list(void)
> +static inline void _tx_reclaim_skb(void)
> +{
> +       do {
> +               tx_list_head->desc_a.config &= ~DMAEN;
> +               tx_list_head->status.status_word = 0;
> +               if (tx_list_head->skb) {
> +                       dev_kfree_skb(tx_list_head->skb);
> +                       tx_list_head->skb = NULL;
> +               }
> +               tx_list_head = tx_list_head->next;
> +
> +       } while (tx_list_head->status.status_word != 0);
> +}
> +
> +static void tx_reclaim_skb(struct bfin_mac_local *lp)
>  {
>        int timeout_cnt = MAX_TIMEOUT_CNT;
>
> -       if (tx_list_head->status.status_word != 0 &&
> -           current_tx_ptr != tx_list_head) {
> -               goto adjust_head;       /* released something, just return; */
> -       }
> +       if (tx_list_head->status.status_word != 0)
> +               _tx_reclaim_skb();
>
> -       /*
> -        * if nothing released, check wait condition
> -        * current's next can not be the head,
> -        * otherwise the dma will not stop as we want
> -        */
> -       if (current_tx_ptr->next->next == tx_list_head) {
> +       if (current_tx_ptr->next == tx_list_head) {
>                while (tx_list_head->status.status_word == 0) {
> +                       /* slow down polling to avoid too many queue stop. */
>                        udelay(10);
> -                       if (tx_list_head->status.status_word != 0 ||
> -                           !(bfin_read_DMA2_IRQ_STATUS() & DMA_RUN)) {
> -                               goto adjust_head;
> -                       }
> -                       if (timeout_cnt-- < 0) {
> -                               printk(KERN_ERR DRV_NAME
> -                               ": wait for adjust tx list head timeout\n");
> +                       /* reclaim skb if DMA is not running. */
> +                       if (!(bfin_read_DMA2_IRQ_STATUS() & DMA_RUN))
> +                               break;
> +                       if (timeout_cnt-- < 0)
>                                break;
> -                       }
> -               }
> -               if (tx_list_head->status.status_word != 0) {
> -                       goto adjust_head;
>                }
> +
> +               if (timeout_cnt >= 0)
> +                       _tx_reclaim_skb();
> +               else
> +                       netif_stop_queue(lp->ndev);
>        }
>
> -       return;
> +       if (current_tx_ptr->next != tx_list_head &&
> +               netif_queue_stopped(lp->ndev))
> +               netif_wake_queue(lp->ndev);
> +
> +       if (tx_list_head != current_tx_ptr) {
> +               /* shorten the timer interval if tx queue is stopped */
> +               if (netif_queue_stopped(lp->ndev))
> +                       lp->tx_reclaim_timer.expires =
> +                               jiffies + (TX_RECLAIM_JIFFIES >> 4);
> +               else
> +                       lp->tx_reclaim_timer.expires =
> +                               jiffies + TX_RECLAIM_JIFFIES;
> +
> +               mod_timer(&lp->tx_reclaim_timer,
> +                       lp->tx_reclaim_timer.expires);
> +       }
>
> -adjust_head:
> -       do {
> -               tx_list_head->desc_a.config &= ~DMAEN;
> -               tx_list_head->status.status_word = 0;
> -               if (tx_list_head->skb) {
> -                       dev_kfree_skb(tx_list_head->skb);
> -                       tx_list_head->skb = NULL;
> -               } else {
> -                       printk(KERN_ERR DRV_NAME
> -                              ": no sk_buff in a transmitted frame!\n");
> -               }
> -               tx_list_head = tx_list_head->next;
> -       } while (tx_list_head->status.status_word != 0 &&
> -                current_tx_ptr != tx_list_head);
>        return;
> +}
>
> +static void tx_reclaim_skb_timeout(unsigned long lp)
> +{
> +       tx_reclaim_skb((struct bfin_mac_local *)lp);
>  }
>
>  static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
>                                struct net_device *dev)
>  {
> +       struct bfin_mac_local *lp = netdev_priv(dev);
>        u16 *data;
>        u32 data_align = (unsigned long)(skb->data) & 0x3;
>        union skb_shared_tx *shtx = skb_tx(skb);
> @@ -1009,8 +1021,6 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
>                        skb->len);
>                current_tx_ptr->desc_a.start_addr =
>                        (u32)current_tx_ptr->packet;
> -               if (current_tx_ptr->status.status_word != 0)
> -                       current_tx_ptr->status.status_word = 0;
>                blackfin_dcache_flush_range(
>                        (u32)current_tx_ptr->packet,
>                        (u32)(current_tx_ptr->packet + skb->len + 2));
> @@ -1022,6 +1032,9 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
>         */
>        SSYNC();
>
> +       /* always clear status buffer before start tx dma */
> +       current_tx_ptr->status.status_word = 0;
> +
>        /* enable this packet's dma */
>        current_tx_ptr->desc_a.config |= DMAEN;
>
> @@ -1037,13 +1050,14 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
>        bfin_write_EMAC_OPMODE(bfin_read_EMAC_OPMODE() | TE);
>
>  out:
> -       adjust_tx_list();
> -
>        bfin_tx_hwtstamp(dev, skb);
>
>        current_tx_ptr = current_tx_ptr->next;
>        dev->stats.tx_packets++;
>        dev->stats.tx_bytes += (skb->len);
> +
> +       tx_reclaim_skb(lp);
> +
>        return NETDEV_TX_OK;
>  }
>
> @@ -1167,8 +1181,11 @@ real_rx:
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  static void bfin_mac_poll(struct net_device *dev)
>  {
> +       struct bfin_mac_local *lp = netdev_priv(dev);
> +
>        disable_irq(IRQ_MAC_RX);
>        bfin_mac_interrupt(IRQ_MAC_RX, dev);
> +       tx_reclaim_skb(lp);
>        enable_irq(IRQ_MAC_RX);
>  }
>  #endif                         /* CONFIG_NET_POLL_CONTROLLER */
> @@ -1232,12 +1249,20 @@ static int bfin_mac_enable(void)
>  /* Our watchdog timed out. Called by the networking layer */
>  static void bfin_mac_timeout(struct net_device *dev)
>  {
> +       struct bfin_mac_local *lp = netdev_priv(dev);
> +
>        pr_debug("%s: %s\n", dev->name, __func__);
>
>        bfin_mac_disable();
>
> +       if (timer_pending(&lp->tx_reclaim_timer))
> +               del_timer(&(lp->tx_reclaim_timer));
> +
>        /* reset tx queue */
> -       tx_list_tail = tx_list_head->next;
> +       current_tx_ptr = tx_list_head;
> +
> +       if (netif_queue_stopped(lp->ndev))
> +               netif_wake_queue(lp->ndev);
>
>        bfin_mac_enable();
>
> @@ -1430,6 +1455,7 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
>        SET_NETDEV_DEV(ndev, &pdev->dev);
>        platform_set_drvdata(pdev, ndev);
>        lp = netdev_priv(ndev);
> +       lp->ndev = ndev;
>
>        /* Grab the MAC address in the MAC */
>        *(__le32 *) (&(ndev->dev_addr[0])) = cpu_to_le32(bfin_read_EMAC_ADDRLO());
> @@ -1485,6 +1511,10 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
>        ndev->netdev_ops = &bfin_mac_netdev_ops;
>        ndev->ethtool_ops = &bfin_mac_ethtool_ops;
>
> +       init_timer(&lp->tx_reclaim_timer);
> +       lp->tx_reclaim_timer.data = (unsigned long)lp;
> +       lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout;
> +
>        spin_lock_init(&lp->lock);
>
>        /* now, enable interrupts */
> diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
> index 1ae7b82..04e4050 100644
> --- a/drivers/net/bfin_mac.h
> +++ b/drivers/net/bfin_mac.h
> @@ -13,9 +13,12 @@
>  #include <linux/net_tstamp.h>
>  #include <linux/clocksource.h>
>  #include <linux/timecompare.h>
> +#include <linux/timer.h>
>
>  #define BFIN_MAC_CSUM_OFFLOAD
>
> +#define TX_RECLAIM_JIFFIES (HZ / 5)
> +
>  struct dma_descriptor {
>        struct dma_descriptor *next_dma_desc;
>        unsigned long start_addr;
> @@ -68,6 +71,8 @@ struct bfin_mac_local {
>
>        int wol;                /* Wake On Lan */
>        int irq_wake_requested;
> +       struct timer_list tx_reclaim_timer;
> +       struct net_device *ndev;
>
>        /* MII and PHY stuffs */
>        int old_link;          /* used by bf537_adjust_link */
> --
> 1.6.0
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ