lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 4 Jul 2011 15:40:01 +0800
From:	MaoXiaoyun <tinnycloud@...mail.com>
To:	<netdev@...r.kernel.org>
Subject: bnx2: FTQ dump on heavy workload(bnx2-2.0.23b + kernel 2.6.32.36)


 Hi:
 
 I met bnx2 FTQ dump over and over again during my testing on Xen live migration which generate
 heavy network workload.
 
 I have two physcial machine, both have xen 4.0.1 installed, and kernel 2.6.32.36, bnx2 2.0.23b.
 I start 15 Virtual Machines totoally, and doing migration between the host over and over again,
 about 16hours, the network will not work, and sometimes, it can reset successfully, sometimes, it
 cause kernel crash.
 
 I've tried debug some, add code in the driver. below is the code when FTQ happened.
 It looks like the NIC is stop transmit the packets, and cause timeout.

 BTW, cpu max_cstate=1 in my grub.
 
 Thanks.
 
 --------------
 static void
 bnx2_tx_timeout(struct net_device *dev)
 {
 struct bnx2 *bp = netdev_priv(dev);
 struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
 struct bnx2_tx_ring_info *txr = &bnapi->tx_ring;
 struct bnx2_rx_ring_info *rxr = &bnapi->rx_ring;
 int i ;
 bnx2_dump_ftq(bp);
 bnx2_dump_state(bp);
 if (stop_on_tx_timeout) {
 printk(KERN_WARNING PFX
 "%s: prevent chip reset during tx timeout\n",
 bp->dev->name);
 smp_rmb();
 printk("last status idx %d \n", bnapi->last_status_idx);
 printk("hw_tx_cons %d, txr->hw_tx_conds %d txr->tx_prod %d txr->tx_cons %d\n",
 bnx2_get_hw_tx_cons(bnapi), txr->hw_tx_cons, txr->tx_prod, txr->tx_cons);
 printk("hw_rx_cons %d, txr->hw_rx_conds %d\n", bnx2_get_hw_rx_cons(bnapi), rxr->rx_cons);
 printk("sblk->status_attn_bits %d\n",bnapi->status_blk.msi->status_attn_bits);
 printk("sblk->status_attn_bits_ack %d\n",bnapi->status_blk.msi->status_attn_bits_ack);
 printk("bnx2_tx_avail %d \n",(bnx2_tx_avail(bp, txr)));
 printk("sblk->status_tx_quick_consumer_index0 %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index0);
 printk("sblk->status_tx_quick_consumer_index1 %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index1);
 printk("sblk->status_tx_quick_consumer_index2 %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index2);
 printk("sblk->status_tx_quick_consumer_index3 %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index3);
 printk("sblk->status_rx_quick_consumer_index0 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index0);
 printk("sblk->status_rx_quick_consumer_index1 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index1);
 printk("sblk->status_rx_quick_consumer_index2 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index2);
 printk("sblk->status_rx_quick_consumer_index3 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index3);
 printk("sblk->status_rx_quick_consumer_index4 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index4);
 printk("sblk->status_rx_quick_consumer_index5 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index5);
 printk("sblk->status_rx_quick_consumer_index6 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index6);
 printk("sblk->status_rx_quick_consumer_index7 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index7);
 printk("sblk->status_rx_quick_consumer_index8 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index8);
 printk("sblk->status_rx_quick_consumer_index9 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index9);
 printk("sblk->status_rx_quick_consumer_index10 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index10);
 printk("sblk->status_rx_quick_consumer_index11 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index11);
 printk("sblk->status_rx_quick_consumer_index12 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index12);
 printk("sblk->status_rx_quick_consumer_index13 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index13);
 printk("sblk->status_rx_quick_consumer_index14 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index14);
 printk("sblk->status_rx_quick_consumer_index15 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index15);
 printk("sblk->status_completion_producer_index %d\n",bnapi->status_blk.msi->status_completion_producer_index);
 printk("sblk->status_cmd_consumer_index %d\n",bnapi->status_blk.msi->status_cmd_consumer_index);
 printk("sblk->status_idx %d\n",bnapi->status_blk.msi->status_idx);
 printk("sblk->status_unused %d\n",bnapi->status_blk.msi->status_unused);
 printk("sblk->status_blk_num %d\n",bnapi->status_blk.msi->status_blk_num);
 is_timedout = 1;
 for (i = 0; i < bp->irq_nvecs; i++) {
 bnapi = &bp->bnx2_napi[i];
 bnx2_tx_int(bp, bnapi, 0);
 }
 return;
 }
 -----------------

 -------------FTQ log in /var/log/message
 ------------[ cut here ]------------
 WARNING: at net/sched/sch_generic.c:261 dev_watchdog+0x105/0x16a()
 Hardware name: Tecal RH2285
 Modules linked in: iptable_filter ip_tables nfs fscache nfs_acl auth_rpcgss bridge stp llc autofs4 ipmi_devintf ipmi_si ipmi_msghandler lockd sunrpc ipv6 xenfs dm_multipath fuse xen_netback xen_blkback blktap blkback_pagemap loop nbd video output sbs sbshc parport_pc lp parport snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss bnx2 serio_raw snd_pcm snd_timer snd soundcore snd_page_alloc i2c_i801 iTCO_wdt iTCO_vendor_support i2c_core pata_acpi ata_generic pcspkr ata_piix shpchp mptsas mptscsih mptbase [last unloaded: freq_table]
 Pid: 0, comm: swapper Not tainted 2.6.32.36xen #1
 Call Trace:
 <IRQ> [<ffffffff813ba154>] ? dev_watchdog+0x105/0x16a
 [<ffffffff81056666>] warn_slowpath_common+0x7c/0x94
 [<ffffffff81056738>] warn_slowpath_fmt+0xa4/0xa6
 [<ffffffff81080bfa>] ? clockevents_program_event+0x78/0x81
 [<ffffffff81081fce>] ? tick_program_event+0x2a/0x2c
 [<ffffffff813b951d>] ? __netif_tx_lock+0x1b/0x24
 [<ffffffff813b95a8>] ? netif_tx_lock+0x46/0x6e
 [<ffffffff813a3ed1>] ? netdev_drivername+0x48/0x4f
 [<ffffffff813ba154>] dev_watchdog+0x105/0x16a
 [<ffffffff81063d98>] run_timer_softirq+0x156/0x1f8
 [<ffffffff813ba04f>] ? dev_watchdog+0x0/0x16a
 [<ffffffff8105d6f0>] __do_softirq+0xd7/0x19e
 [<ffffffff81013eac>] call_softirq+0x1c/0x30
 [<ffffffff8101564b>] do_softirq+0x46/0x87
 [<ffffffff8105d575>] irq_exit+0x3b/0x7a
 [<ffffffff8128dcfe>] xen_evtchn_do_upcall+0x38/0x46
 [<ffffffff81013efe>] xen_do_hypervisor_callback+0x1e/0x30
 <EOI> [<ffffffff8103f642>] ? pick_next_task_idle+0x18/0x22
 [<ffffffff810093aa>] ? hypercall_page+0x3aa/0x1000
 [<ffffffff810093aa>] ? hypercall_page+0x3aa/0x1000
 [<ffffffff8100f1bb>] ? xen_safe_halt+0x10/0x1a
 [<ffffffff81019e14>] ? default_idle+0x39/0x56
 [<ffffffff81011cd0>] ? cpu_idle+0x5d/0x8c
 [<ffffffff8143375d>] ? cpu_bringup_and_idle+0x13/0x15
 ---[ end trace 867bb8f6cd959b03 ]---
 bnx2: <--- start FTQ dump on peth0 --->
 bnx2: peth0: BNX2_RV2P_PFTQ_CTL 10000
 bnx2: peth0: BNX2_RV2P_TFTQ_CTL 20000
 bnx2: peth0: BNX2_RV2P_MFTQ_CTL 4000
 bnx2: peth0: BNX2_TBDR_FTQ_CTL 1004002
 bnx2: peth0: BNX2_TDMA_FTQ_CTL 4010002
 bnx2: peth0: BNX2_TXP_FTQ_CTL 2410002
 bnx2: peth0: BNX2_TPAT_FTQ_CTL 10002
 bnx2: peth0: BNX2_RXP_CFTQ_CTL 8000
 bnx2: peth0: BNX2_RXP_FTQ_CTL 100000
 bnx2: peth0: BNX2_COM_COMXQ_FTQ_CTL 10000
 bnx2: peth0: BNX2_COM_COMTQ_FTQ_CTL 20000
 bnx2: peth0: BNX2_COM_COMQ_FTQ_CTL 10000
 bnx2: peth0: BNX2_CP_CPQ_FTQ_CTL 4000
 bnx2: peth0: TXP mode b84c state 80005000 evt_mask 500 pc 8000d60 pc 8000d60 instr 8f860000
 bnx2: peth0: TPAT mode b84c state 80009000 evt_mask 500 pc 8000a5c pc 8000a5c instr 10400016
 bnx2: peth0: RXP mode b84c state 80001000 evt_mask 500 pc 8004c14 pc 8004c14 instr 10e00088
 bnx2: peth0: COM mode b8cc state 80000000 evt_mask 500 pc 8000b28 pc 8000a9c instr 8c530000
 bnx2: peth0: CP mode b8cc state 80000000 evt_mask 500 pc 8000c50 pc 8000c58 instr 8ca50020
 bnx2: <--- end FTQ dump on peth0 --->
 bnx2: peth0 DEBUG: intr_sem[0]
 bnx2: peth0 DEBUG: intr_sem[0] PCI_CMD[20100406]
 bnx2: peth0 DEBUG: PCI_PM[19002008] PCI_MISC_CFG[92000088]
 bnx2: peth0 DEBUG: EMAC_TX_STATUS[00000008] EMAC_RX_STATUS[00000000]
 bnx2: peth0 RPM_MGMT_PKT_CTRL[40000088]
 bnx2: peth0 DEBUG: MCP_STATE_P0[0007e10e] MCP_STATE_P1[0003e00e]
 bnx2: peth0 DEBUG: HC_STATS_INTERRUPT_STATUS[01ff0000]
 bnx2: peth0 DEBUG: PBA[00000000]
 BNX2_PCICFG_INT_ACK_CMD[00013ce1]
 bnx2: peth0: prevent chip reset during tx timeout
 last status idx 2426
 hw_tx_cons 32474, txr->hw_tx_conds 32474 txr->tx_prod 32641 txr->tx_cons 32474
 hw_rx_cons 19665, txr->hw_rx_conds 19665
 sblk->status_attn_bits 1
 sblk->status_attn_bits_ack 1
 bnx2_tx_avail 88
 sblk->status_tx_quick_consumer_index0 32474
 sblk->status_tx_quick_consumer_index1 0
 sblk->status_tx_quick_consumer_index2 0
 sblk->status_tx_quick_consumer_index3 0
 sblk->status_rx_quick_consumer_index0 19665
 sblk->status_rx_quick_consumer_index1 0
 sblk->status_rx_quick_consumer_index2 0
 sblk->status_rx_quick_consumer_index3 0
 sblk->status_rx_quick_consumer_index4 0
 sblk->status_rx_quick_consumer_index5 0
 sblk->status_rx_quick_consumer_index6 0
 sblk->status_rx_quick_consumer_index7 0
 sblk->status_rx_quick_consumer_index8 0
 sblk->status_rx_quick_consumer_index9 0
 sblk->status_rx_quick_consumer_index10 0
 sblk->status_rx_quick_consumer_index11 0
 sblk->status_rx_quick_consumer_index12 0
 sblk->status_rx_quick_consumer_index13 0
 sblk->status_rx_quick_consumer_index14 0
 sblk->status_rx_quick_consumer_index15 0
 sblk->status_completion_producer_index 0
 sblk->status_cmd_consumer_index 0
 sblk->status_idx 2426
 sblk->status_unused 0
 sblk->status_blk_num 0
 hw_cons 32474 sw_cons 32474 ffff8801d27f85c0 bnapi
 return hw_cons 32474 sw_cons 32474 ffff8801d27f85c0 bnapi
 hw_cons 3628 sw_cons 3625 ffff8801d27f8bc0 bnapi
 return hw_cons 3628 sw_cons 3625 ffff8801d27f8bc0 bnapi
 hw_cons 62094 sw_cons 62090 ffff8801d27f91c0 bnapi
 return hw_cons 62094 sw_cons 62090 ffff8801d27f91c0 bnapi
 hw_cons 3184 sw_cons 3173 ffff8801d27f97c0 bnapi
 return hw_cons 3184 sw_cons 3173 ffff8801d27f97c0 bnapi
 hw_cons 0 sw_cons 0 ffff8801d27f9dc0 bnapi
 return hw_cons 0 sw_cons 0 ffff8801d27f9dc0 bnapi 		 	   		  
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ