lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <48B524D4.2010700@pobox.com>
Date:	Wed, 27 Aug 2008 05:56:36 -0400
From:	Jeff Garzik <jgarzik@...ox.com>
To:	Sebastian Siewior <bigeasy@...utronix.de>
CC:	netdev@...r.kernel.org, Andy Fleming <afleming@...escale.com>
Subject: Re: [PATCH] net: don't grab a mutex within a timer context in gianfar

Sebastian Siewior wrote:
> I got the following backtrace while network was unavailble:
> 
> |NETDEV WATCHDOG: eth0: transmit timed out
> |BUG: sleeping function called from invalid context at /home/bigeasy/git/linux-2.6-powerpc/kernel/mutex.c:87
> |in_atomic():1, irqs_disabled():0
> |Call Trace:
> |[c0383d90] [c0006dd8] show_stack+0x48/0x184 (unreliable)
> |[c0383db0] [c001e938] __might_sleep+0xe0/0xf4
> |[c0383dc0] [c025a43c] mutex_lock+0x24/0x3c
> |[c0383de0] [c019005c] phy_stop+0x20/0x70
> |[c0383df0] [c018d4ec] stop_gfar+0x28/0xf4
> |[c0383e10] [c018e8c4] gfar_timeout+0x30/0x60
> |[c0383e20] [c01fe7c0] dev_watchdog+0xa8/0x144
> |[c0383e30] [c002f93c] run_timer_softirq+0x148/0x1c8
> |[c0383e60] [c002b084] __do_softirq+0x5c/0xc4
> |[c0383e80] [c00046fc] do_softirq+0x3c/0x54
> |[c0383e90] [c002ac60] irq_exit+0x3c/0x5c
> |[c0383ea0] [c000b378] timer_interrupt+0xe0/0xf8
> |[c0383ec0] [c000e5ac] ret_from_except+0x0/0x18
> |[c0383f80] [c000804c] cpu_idle+0xcc/0xdc
> |[c0383fa0] [c025c07c] etext+0x7c/0x90
> |[c0383fc0] [c0338960] start_kernel+0x294/0x2a8
> |[c0383ff0] [c00003dc] skpinv+0x304/0x340
> |------------[ cut here ]------------
> 
> The phylock was once a spinlock but got changed into a mutex via
> commit 35b5f6b1a aka [PHYLIB: Locking fixes for PHY I/O potentially sleeping]
> 
> Signed-off-by: Sebastian Siewior <bigeasy@...utronix.de>
> ---
> Jeff, this is a rebased post of [1]. The initial patch was acked by Andy
> Fleming. I just noticed that the initial does not apply due to David's
> NAPI changes. The initial patch is still usefull for the stable tree since
> the bug was introduced in the .25 merge window.
> 
> [1] http://patchwork.ozlabs.org/linuxppc/patch?id=19808
> 
>  drivers/net/gianfar.c |   22 ++++++++++++++++++----
>  drivers/net/gianfar.h |    1 +
>  2 files changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
> index 999d691..4320a98 100644
> --- a/drivers/net/gianfar.c
> +++ b/drivers/net/gianfar.c
> @@ -105,6 +105,7 @@ const char gfar_driver_version[] = "1.3";
>  
>  static int gfar_enet_open(struct net_device *dev);
>  static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev);
> +static void gfar_reset_task(struct work_struct *work);
>  static void gfar_timeout(struct net_device *dev);
>  static int gfar_close(struct net_device *dev);
>  struct sk_buff *gfar_new_skb(struct net_device *dev);
> @@ -209,6 +210,7 @@ static int gfar_probe(struct platform_device *pdev)
>  	spin_lock_init(&priv->txlock);
>  	spin_lock_init(&priv->rxlock);
>  	spin_lock_init(&priv->bflock);
> +	INIT_WORK(&priv->reset_task, gfar_reset_task);
>  
>  	platform_set_drvdata(pdev, dev);
>  
> @@ -1212,6 +1214,7 @@ static int gfar_close(struct net_device *dev)
>  
>  	napi_disable(&priv->napi);
>  
> +	cancel_work_sync(&priv->reset_task);
>  	stop_gfar(dev);
>  
>  	/* Disconnect from the PHY */
> @@ -1326,13 +1329,16 @@ static int gfar_change_mtu(struct net_device *dev, int new_mtu)
>  	return 0;
>  }
>  
> -/* gfar_timeout gets called when a packet has not been
> +/* gfar_reset_task gets scheduled when a packet has not been
>   * transmitted after a set amount of time.
>   * For now, assume that clearing out all the structures, and
> - * starting over will fix the problem. */
> -static void gfar_timeout(struct net_device *dev)
> + * starting over will fix the problem.
> + */
> +static void gfar_reset_task(struct work_struct *work)
>  {
> -	dev->stats.tx_errors++;
> +	struct gfar_private *priv = container_of(work, struct gfar_private,
> +			reset_task);
> +	struct net_device *dev = priv->dev;
>  
>  	if (dev->flags & IFF_UP) {
>  		stop_gfar(dev);
> @@ -1342,6 +1348,14 @@ static void gfar_timeout(struct net_device *dev)
>  	netif_tx_schedule_all(dev);
>  }
>  
> +static void gfar_timeout(struct net_device *dev)
> +{
> +	struct gfar_private *priv = netdev_priv(dev);
> +
> +	dev->stats.tx_errors++;
> +	schedule_work(&priv->reset_task);
> +}
> +
>  /* Interrupt Handler for Transmit complete */
>  static int gfar_clean_tx_ring(struct net_device *dev)
>  {
> diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
> index d59df98..f46e9b6 100644
> --- a/drivers/net/gianfar.h
> +++ b/drivers/net/gianfar.h
> @@ -756,6 +756,7 @@ struct gfar_private {
>  
>  	uint32_t msg_enable;
>  
> +	struct work_struct reset_task;
>  	/* Network Statistics */
>  	struct gfar_extra_stats extra_stats;
>  };

applied

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ