lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Fri, 6 Apr 2007 12:51:03 +0200
From:	Ingo Molnar <mingo@...e.hu>
To:	Ayaz Abdulla <AAbdulla@...dia.com>
Cc:	akpm@...ux-foundation.org, jeff@...zik.org, netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org, Adrian Bunk <bunk@...sta.de>
Subject: Re: [bug] forcedeth: hung interface under load


* Ingo Molnar <mingo@...e.hu> wrote:

> > there's a different type of regression now: under high load i dont 
> > get a crash, i get a hung interface instead. No error packets or 
> > other weird interface state - just a hung interface. [...]
> 
> the interface stats do not change from that point on:
> 
> eth1      Link encap:Ethernet  HWaddr 00:13:D4:DC:41:12
>           inet addr:10.0.1.12  Bcast:10.0.1.255  Mask:255.255.255.0
>           UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
>           RX packets:14976 errors:0 dropped:0 overruns:0 frame:0
>           TX packets:3928743 errors:0 dropped:0 overruns:0 carrier:0
>           collisions:0 txqueuelen:1000
>           RX bytes:1028544 (1004.4 KiB)  TX bytes:4126766510 (3.8 GiB)
>           Interrupt:16 Base address:0xa000
> 
> and the irq count does not change either:
> 
>  16:        816    3463148   IO-APIC-fasteoi   eth1
> 
> no matter what i do to the interface. So it's completely stuck. No 
> kernel messages either - apparently nv_tx_timeout() never triggered.

i've attached an ethtool dump, ifconfig output, interrupts output and 
lspci output of such a hang. Does the ethtool dump make any sense to 
you? The driver is -rc6 plus the changes below. (but the hang looks 
exactly the same that i got with an unmodified driver. the 
optimization_tweak is a new change too - it drastically improves the 
performance and scalability of the driver btw., by not letting it do 
100-200K irqs/sec (!).)

	Ingo

---------------->
Index: linux/drivers/net/forcedeth.c
===================================================================
--- linux.orig/drivers/net/forcedeth.c
+++ linux/drivers/net/forcedeth.c
@@ -800,7 +800,7 @@ struct fe_priv {
  * Maximum number of loops until we assume that a bit in the irq mask
  * is stuck. Overridable with module param.
  */
-static int max_interrupt_work = 5;
+static int max_interrupt_work = 50;
 
 /*
  * Optimization can be either throuput mode or cpu mode
@@ -812,7 +812,7 @@ enum {
 	NV_OPTIMIZATION_MODE_THROUGHPUT,
 	NV_OPTIMIZATION_MODE_CPU
 };
-static int optimization_mode = NV_OPTIMIZATION_MODE_THROUGHPUT;
+static int optimization_mode = NV_OPTIMIZATION_MODE_CPU;
 
 /*
  * Poll interval for timer irq
@@ -1902,6 +1902,11 @@ static void nv_tx_done(struct net_device
 						np->stats.tx_carrier_errors++;
 					np->stats.tx_errors++;
 				} else {
+					if (!np->get_tx_ctx->skb) {
+						printk("get_tx: %ld, put_tx: %ld\n", (long)(np->get_tx_ctx - np->first_tx_ctx), (long)(np->put_tx_ctx - np->first_tx_ctx));
+						WARN_ON(1);
+						break;
+					}
 					np->stats.tx_packets++;
 					np->stats.tx_bytes += np->get_tx_ctx->skb->len;
 				}
@@ -1917,6 +1922,11 @@ static void nv_tx_done(struct net_device
 						np->stats.tx_carrier_errors++;
 					np->stats.tx_errors++;
 				} else {
+					if (!np->get_tx_ctx->skb) {
+						printk("get_tx: %ld, put_tx: %ld\n", (long)(np->get_tx_ctx - np->first_tx_ctx), (long)(np->put_tx_ctx - np->first_tx_ctx));
+						WARN_ON(1);
+						break;
+					}
 					np->stats.tx_packets++;
 					np->stats.tx_bytes += np->get_tx_ctx->skb->len;
 				}
@@ -3108,9 +3118,17 @@ static int nv_napi_poll(struct net_devic
 	int retcode;
 
 	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+		spin_lock_irqsave(&np->lock, flags);
+		nv_tx_done(dev);
+		spin_unlock_irqrestore(&np->lock, flags);
+
 		pkts = nv_rx_process(dev, limit);
 		retcode = nv_alloc_rx(dev);
 	} else {
+		spin_lock_irqsave(&np->lock, flags);
+		nv_tx_done_optimized(dev, np->tx_ring_size);
+		spin_unlock_irqrestore(&np->lock, flags);
+
 		pkts = nv_rx_process_optimized(dev, limit);
 		retcode = nv_alloc_rx_optimized(dev);
 	}

View attachment "eth0.dump" of type "text/plain" (14205 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ