[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230922173626.23790-2-asmaa@nvidia.com>
Date: Fri, 22 Sep 2023 13:36:24 -0400
From: Asmaa Mnebhi <asmaa@...dia.com>
To: <davem@...emloft.net>, <edumazet@...gle.com>, <kuba@...nel.org>,
<pabeni@...hat.com>, <olteanv@...il.com>
CC: Asmaa Mnebhi <asmaa@...dia.com>, <netdev@...r.kernel.org>,
<davthompson@...dia.com>
Subject: [PATCH v3 1/3] mlxbf_gige: Fix kernel panic at shutdown
There is a race condition happening during shutdown due to pending napi transactions.
Since mlxbf_gige_poll is still running, it tries to access a NULL pointer and as a
result causes a kernel panic:
[ 284.074822] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000070
...
[ 284.322326] Call trace:
[ 284.324757] mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige]
[ 284.330924] mlxbf_gige_poll+0x54/0x160 [mlxbf_gige]
[ 284.335876] __napi_poll+0x40/0x1c8
[ 284.339353] net_rx_action+0x314/0x3a0
[ 284.343086] __do_softirq+0x128/0x334
[ 284.346734] run_ksoftirqd+0x54/0x6c
[ 284.350294] smpboot_thread_fn+0x14c/0x190
[ 284.354375] kthread+0x10c/0x110
[ 284.357588] ret_from_fork+0x10/0x20
[ 284.361150] Code: 8b070000 f9000ea0 f95056c0 f86178a1 (b9407002)
[ 284.367227] ---[ end trace a18340bbb9ea2fa7 ]---
To fix this, invoke mlxbf_gige_remove to disable and dequeue napi during shutdown,
and also return in the case where "priv" is NULL in the poll function.
Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
Signed-off-by: Asmaa Mnebhi <asmaa@...dia.com>
Reviewed-by: David Thompson <davthompson@...dia.com>
---
v2->v3:
- Add the logic to clean the port to the remove() function
v1-v2:
- make mlxbf_gige_shutdown() the same as the mlxbf_gige_remove()
.../mellanox/mlxbf_gige/mlxbf_gige_main.c | 21 ++++++++-----------
.../mellanox/mlxbf_gige/mlxbf_gige_rx.c | 3 +++
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 694de9513b9f..74185b02daa0 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -471,24 +471,21 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
return err;
}
-static int mlxbf_gige_remove(struct platform_device *pdev)
+static void mlxbf_gige_remove(struct platform_device *pdev)
{
struct mlxbf_gige *priv = platform_get_drvdata(pdev);
+ if (!priv)
+ return;
+
+ writeq(0, priv->base + MLXBF_GIGE_INT_EN);
+ mlxbf_gige_clean_port(priv);
unregister_netdev(priv->netdev);
phy_disconnect(priv->netdev->phydev);
mlxbf_gige_mdio_remove(priv);
- platform_set_drvdata(pdev, NULL);
-
- return 0;
-}
-
-static void mlxbf_gige_shutdown(struct platform_device *pdev)
-{
- struct mlxbf_gige *priv = platform_get_drvdata(pdev);
-
writeq(0, priv->base + MLXBF_GIGE_INT_EN);
mlxbf_gige_clean_port(priv);
+ platform_set_drvdata(pdev, NULL);
}
static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
@@ -499,8 +496,8 @@ MODULE_DEVICE_TABLE(acpi, mlxbf_gige_acpi_match);
static struct platform_driver mlxbf_gige_driver = {
.probe = mlxbf_gige_probe,
- .remove = mlxbf_gige_remove,
- .shutdown = mlxbf_gige_shutdown,
+ .remove_new = mlxbf_gige_remove,
+ .shutdown = mlxbf_gige_remove,
.driver = {
.name = KBUILD_MODNAME,
.acpi_match_table = ACPI_PTR(mlxbf_gige_acpi_match),
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
index 0d5a41a2ae01..cfb8fb957f0c 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
@@ -298,6 +298,9 @@ int mlxbf_gige_poll(struct napi_struct *napi, int budget)
priv = container_of(napi, struct mlxbf_gige, napi);
+ if (!priv)
+ return 0;
+
mlxbf_gige_handle_tx_complete(priv);
do {
--
2.30.1
Powered by blists - more mailing lists