[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <d8b21ff4-422b-28ee-fc49-f8d9612ebccf@linux.vnet.ibm.com>
Date: Mon, 25 Jul 2022 15:13:23 -0700
From: David Christensen <drc@...ux.vnet.ibm.com>
To: davem@...emloft.net, Ariel Elior <aelior@...vell.com>,
Sudarsana Reddy Kalluru <skalluru@...vell.com>,
GR-everest-linux-l2@...vell.com
Cc: Netdev <netdev@...r.kernel.org>
Subject: EEH Error With NAPI_STATE_SCHED Set in BNX2X Results in RTNL Held
While Sleeping
While performing EEH error recovery tests, internal test team
encountered an issue where the bnx2x driver is holding the RTNL lock
while sleeping in napi_disable() on a SuSE 15.3-SP3 system (kernel
5.3.18-150300.59.68) while running network stress on multiple adapters,
even when a previous fix (bnx2x: fix napi API usage sequence) has been
applied to the bnx2x driver.
crash> foreach UN bt
PID: 213 TASK: c000000004ba9f80 CPU: 9 COMMAND: "eehd"
#0 [c000000004c277e0] __schedule at c000000000c756a8
#1 [c000000004c278b0] schedule at c000000000c75d80
#2 [c000000004c278e0] schedule_timeout at c000000000c7bc8c
#3 [c000000004c279c0] msleep at c000000000212bec
#4 [c000000004c279f0] napi_disable at c000000000a0b2e8
#5 [c000000004c27a30] bnx2x_netif_stop at c0080000015cbbf4 [bnx2x]
#6 [c000000004c27a60] bnx2x_io_slot_reset at c00800000159555c [bnx2x]
#7 [c000000004c27b20] eeh_report_reset at c00000000004ca3c
#8 [c000000004c27b90] eeh_pe_report at c00000000004d228
#9 [c000000004c27c40] eeh_handle_normal_event at c00000000004dae4
#10 [c000000004c27d00] eeh_event_handler at c00000000004e3b0
#11 [c000000004c27db0] kthread at c000000000175f9c
#12 [c000000004c27e20] ret_from_kernel_thread at c00000000000cda8
What's interesting to me in this case is that the failure only occurs
when stress testing multiple adapters simultaneously and injecting the
EEH error to an upstream PCIe switch. If errors are injected to the
switch while running stress tests on a single NIC, no problem is observed.
Reviewing the bnx2x driver code did not reveal any obvious problems, but
when reviewing the NAPI state machine it did appear to my eye that there
is no practical method to abort a scheduled poll in the driver (i.e.
when NAPI_STATE_SCHED is set), so there's no obvious mechanism for the
driver to recover in this situation when the NIC registers are
inaccessible due to the EEH.
My attempt to introduce such a mechanism, shown below, hasn't resolved
the problem. The NIC receives a hardware interrupt while initializing
the driver which results in a NULL pointer reference, suggesting I've
missed some required step.
void napi_err_cancel(struct napi_struct *n)
{
unsigned long flags, new, val;
if (n->gro_bitmask) {
napi_gro_flush(n, true);
}
gro_normal_list(n);
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
local_irq_save(flags);
list_del_init(&n->poll_list);
local_irq_restore(flags);
}
do {
val = READ_ONCE(n->state);
WARN_ON(test_bit(NAPI_STATE_DISABLE, &val));
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
} while (cmpxchg(&n->state, val, new) != val);
return (val & NAPIF_STATE_SCHED);
}
EXPORT_SYMBOL(napi_err_cancel);
My question: Is a new function required to abort a schedule poll, and if
so, what else is required? I reviewed several other drivers that
support EEH and did not see any that appear to handle this situation.
Dave
P.S. A few additional details on the failure.
crash> net
NET_DEVICE NAME IP ADDRESS(ES)
c000000ef3bd3000 lo 127.0.0.1
c000000eef1f3000 eth0 9.3.233.22
c000000002490000 eth8 106.1.233.22 (tg3)
c000000002498000 eth1 107.1.233.22 (bnx2x)
c0000000024a4000 eth9 106.2.233.22 (tg3)
c0000000024b4000 eth10 109.1.233.22 (tg3)
c000000002494000 eth5 110.1.233.22 (e1000e)
c0000000024a8000 eth2 107.2.233.22 (bnx2x)
c0000000024b8000 eth6 109.2.233.22 (tg3)
c0000000024bc000 eth7 110.2.233.22 (e10001)
c000000eb3000000 eth16 111.1.233.22 (mlx5)
c000000eab100000 eth15 111.2.233.22 (mlx5)
# Get address of bnx2x structure for eth1
crash> struct -ox net_device
struct net_device {
...
}
SIZE: 0x980
crash> struct -x bnx2x c000000002498980
struct bnx2x {
fp = 0xc000000002520000,
...
}
# Observe NAPI struct in per-queue fastpath[0]
crash> struct -x bnx2x_fastpath c000000002520000
struct bnx2x_fastpath {
bp = 0xc000000002498980,
napi = {
poll_list = {
next = 0xc000000002520008,
prev = 0xc000000002520008
},
state = 13,
weight = 64,
defer_hard_irqs_count = 0,
gro_bitmask = 0,
poll = 0xc0080000015c8a88 <bnx2x_poll>,
poll_owner = -1,
dev = 0xc000000002498000,
gro_hash = {{
list = {
next = 0xc000000002520048,
prev = 0xc000000002520048
},
count = 0
}, {
list = {
next = 0xc000000002520060,
prev = 0xc000000002520060
},
count = 0
}, {
list = {
next = 0xc000000002520078,
prev = 0xc000000002520078
},
count = 0
}, {
list = {
next = 0xc000000002520090,
prev = 0xc000000002520090
},
count = 0
}, {
list = {
next = 0xc0000000025200a8,
prev = 0xc0000000025200a8
},
count = 0
}, {
list = {
next = 0xc0000000025200c0,
prev = 0xc0000000025200c0
},
count = 0
}, {
list = {
next = 0xc0000000025200d8,
prev = 0xc0000000025200d8
},
count = 0
}, {
list = {
next = 0xc0000000025200f0,
prev = 0xc0000000025200f0
},
count = 0
}},
skb = 0x0,
rx_list = {
next = 0xc000000002520110,
prev = 0xc000000002520110
},
rx_count = 0,
timer = {
node = {
node = {
__rb_parent_color = 13835058055321092392,
rb_right = 0x0,
rb_left = 0x0
},
expires = 0
},
_softexpires = 0,
function = 0xc000000000a0d2d0 <napi_watchdog>,
base = 0xc000000efce32380,
state = 0 '\000',
is_rel = 0 '\000',
is_soft = 0 '\000'
},
dev_list = {
next = 0xc000000002520168,
prev = 0xc000000002520168
},
napi_hash_node = {
next = 0x0,
pprev = 0x5deadbeef0000122
},
napi_id = 2284
},
...
}
crash> struct -ox bnx2x_fastpath
struct bnx2x_fastpath {
...
}
SIZE: 0x2f0
# ... and NAPI struct in fastpath[1]
crash> struct -x bnx2x_fastpath c0000000025202f0
struct bnx2x_fastpath {
bp = 0xc000000002498980,
napi = {
poll_list = {
next = 0xc0000000025202f8,
prev = 0xc0000000025202f8
},
state = 0x9,
weight = 0x40,
defer_hard_irqs_count = 0x0,
gro_bitmask = 0x0,
poll = 0xc0080000015c8a88 <bnx2x_poll>,
poll_owner = 0xffffffff,
dev = 0xc000000002498000,
gro_hash = {{
list = {
next = 0xc000000002520338,
prev = 0xc000000002520338
},
count = 0x0
}, {
list = {
next = 0xc000000002520350,
prev = 0xc000000002520350
},
count = 0x0
}, {
list = {
next = 0xc000000002520368,
prev = 0xc000000002520368
},
count = 0x0
}, {
list = {
next = 0xc000000002520380,
prev = 0xc000000002520380
},
count = 0x0
}, {
list = {
next = 0xc000000002520398,
prev = 0xc000000002520398
},
count = 0x0
}, {
list = {
next = 0xc0000000025203b0,
prev = 0xc0000000025203b0
},
count = 0x0
}, {
list = {
next = 0xc0000000025203c8,
prev = 0xc0000000025203c8
},
count = 0x0
}, {
list = {
next = 0xc0000000025203e0,
prev = 0xc0000000025203e0
},
count = 0x0
}},
skb = 0x0,
rx_list = {
next = 0xc000000002520400,
prev = 0xc000000002520400
},
rx_count = 0x0,
timer = {
node = {
node = {
__rb_parent_color = 0xc000000002520418,
rb_right = 0x0,
rb_left = 0x0
},
expires = 0x0
},
_softexpires = 0x0,
function = 0xc000000000a0d2d0 <napi_watchdog>,
base = 0xc000000efce32380,
state = 0x0,
is_rel = 0x0,
is_soft = 0x0
},
dev_list = {
next = 0xc000000002520458,
prev = 0xc000000002520458
},
napi_hash_node = {
next = 0x0,
pprev = 0x5deadbeef0000122
},
napi_id = 0x8ed
},
...
}
# ... fastpath[2..X] structs also show NAPI state as 9
Powered by blists - more mailing lists