[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221014103443.138574-1-ihuguet@redhat.com>
Date: Fri, 14 Oct 2022 12:34:43 +0200
From: Íñigo Huguet <ihuguet@...hat.com>
To: irusskikh@...vell.com, dbogdanov@...vell.com
Cc: davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
pabeni@...hat.com, netdev@...r.kernel.org,
Íñigo Huguet <ihuguet@...hat.com>,
Li Liang <liali@...hat.com>
Subject: [PATCH net] atlantic: fix deadlock at aq_nic_stop
NIC is stopped with rtnl_lock held, and during the stop it cancels the
'service_task' work and free irqs.
However, if CONFIG_MACSEC is set, rtnl_lock is acquired both from
aq_nic_service_task and aq_linkstate_threaded_isr. Then a deadlock
happens if aq_nic_stop tries to cancel/disable them when they've already
started their execution.
As the deadlock is caused by rtnl_lock, it causes many other processes
to stall, not only atlantic related stuff.
Fix trying to acquire rtnl_lock at the beginning of those functions, and
returning if NIC closing is ongoing. Also do the "linkstate" stuff in a
workqueue instead than in a threaded irq, where sleeping or waiting a
mutex for a long time is discouraged.
The issue appeared repeteadly attaching and deattaching the NIC to a
bond interface. Doing that after this patch I cannot reproduce the bug.
Fixes: 62c1c2e606f6 ("net: atlantic: MACSec offload skeleton")
Reported-by: Li Liang <liali@...hat.com>
Signed-off-by: Íñigo Huguet <ihuguet@...hat.com>
---
.../ethernet/aquantia/atlantic/aq_macsec.c | 7 +--
.../net/ethernet/aquantia/atlantic/aq_nic.c | 59 ++++++++++++++++---
.../net/ethernet/aquantia/atlantic/aq_nic.h | 1 +
3 files changed, 55 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
index 3d0e16791e1c..5759eba89db9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
@@ -1458,7 +1458,7 @@ int aq_macsec_enable(struct aq_nic_s *nic)
if (!nic->macsec_cfg)
return 0;
- rtnl_lock();
+ ASSERT_RTNL();
if (nic->aq_fw_ops->send_macsec_req) {
struct macsec_cfg_request cfg = { 0 };
@@ -1507,7 +1507,6 @@ int aq_macsec_enable(struct aq_nic_s *nic)
ret = aq_apply_macsec_cfg(nic);
unlock:
- rtnl_unlock();
return ret;
}
@@ -1519,9 +1518,9 @@ void aq_macsec_work(struct aq_nic_s *nic)
if (!netif_carrier_ok(nic->ndev))
return;
- rtnl_lock();
+ ASSERT_RTNL();
+
aq_check_txsa_expiration(nic);
- rtnl_unlock();
}
int aq_macsec_rx_sa_cnt(struct aq_nic_s *nic)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 06508eebb585..5cb7d165dd21 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -40,6 +40,8 @@ static unsigned int aq_itr_rx;
module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644);
MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate");
+#define AQ_TASK_RETRY_MS 50
+
static void aq_nic_update_ndev_stats(struct aq_nic_s *self);
static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
@@ -210,19 +212,41 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
return 0;
}
-static irqreturn_t aq_linkstate_threaded_isr(int irq, void *private)
+static irqreturn_t aq_linkstate_isr(int irq, void *private)
{
struct aq_nic_s *self = private;
if (!self)
return IRQ_NONE;
+ if (!aq_utils_obj_test(&self->flags, AQ_NIC_FLAG_CLOSING))
+ aq_ndev_schedule_work(&self->linkstate_task);
+
+ return IRQ_HANDLED;
+}
+
+static void aq_nic_linkstate_task(struct work_struct *work)
+{
+ struct aq_nic_s *self = container_of(work, struct aq_nic_s,
+ linkstate_task);
+
+#if IS_ENABLED(CONFIG_MACSEC)
+ /* avoid deadlock at aq_nic_stop -> cancel_work_sync */
+ while (!rtnl_trylock()) {
+ if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAG_CLOSING))
+ return;
+ msleep(AQ_TASK_RETRY_MS);
+ }
+#endif
+
aq_nic_update_link_status(self);
+#if IS_ENABLED(CONFIG_MACSEC)
+ rtnl_unlock();
+#endif
+
self->aq_hw_ops->hw_irq_enable(self->aq_hw,
BIT(self->aq_nic_cfg.link_irq_vec));
-
- return IRQ_HANDLED;
}
static void aq_nic_service_task(struct work_struct *work)
@@ -236,12 +260,23 @@ static void aq_nic_service_task(struct work_struct *work)
if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAGS_IS_NOT_READY))
return;
+#if IS_ENABLED(CONFIG_MACSEC)
+ /* avoid deadlock at aq_nic_stop -> cancel_work_sync */
+ while (!rtnl_trylock()) {
+ if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAG_CLOSING))
+ return;
+ msleep(AQ_TASK_RETRY_MS);
+ }
+#endif
+
err = aq_nic_update_link_status(self);
if (err)
return;
#if IS_ENABLED(CONFIG_MACSEC)
aq_macsec_work(self);
+
+ rtnl_unlock();
#endif
mutex_lock(&self->fwreq_mutex);
@@ -505,6 +540,7 @@ int aq_nic_start(struct aq_nic_s *self)
if (err)
goto err_exit;
+ INIT_WORK(&self->linkstate_task, aq_nic_linkstate_task);
INIT_WORK(&self->service_task, aq_nic_service_task);
timer_setup(&self->service_timer, aq_nic_service_timer_cb, 0);
@@ -531,10 +567,9 @@ int aq_nic_start(struct aq_nic_s *self)
if (cfg->link_irq_vec) {
int irqvec = pci_irq_vector(self->pdev,
cfg->link_irq_vec);
- err = request_threaded_irq(irqvec, NULL,
- aq_linkstate_threaded_isr,
- IRQF_SHARED | IRQF_ONESHOT,
- self->ndev->name, self);
+ err = request_irq(irqvec, aq_linkstate_isr,
+ IRQF_SHARED | IRQF_ONESHOT,
+ self->ndev->name, self);
if (err < 0)
goto err_exit;
self->msix_entry_mask |= (1 << cfg->link_irq_vec);
@@ -1380,11 +1415,15 @@ int aq_nic_set_loopback(struct aq_nic_s *self)
int aq_nic_stop(struct aq_nic_s *self)
{
unsigned int i = 0U;
+ int ret;
+
+ aq_utils_obj_set(&self->flags, AQ_NIC_FLAG_CLOSING);
netif_tx_disable(self->ndev);
netif_carrier_off(self->ndev);
del_timer_sync(&self->service_timer);
+ cancel_work_sync(&self->linkstate_task);
cancel_work_sync(&self->service_task);
self->aq_hw_ops->hw_irq_disable(self->aq_hw, AQ_CFG_IRQ_MASK);
@@ -1401,7 +1440,11 @@ int aq_nic_stop(struct aq_nic_s *self)
aq_ptp_ring_stop(self);
- return self->aq_hw_ops->hw_stop(self->aq_hw);
+ ret = self->aq_hw_ops->hw_stop(self->aq_hw);
+
+ aq_utils_obj_clear(&self->flags, AQ_NIC_FLAG_CLOSING);
+
+ return ret;
}
void aq_nic_set_power(struct aq_nic_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 935ba889bd9a..a114b66990a9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -140,6 +140,7 @@ struct aq_nic_s {
const struct aq_fw_ops *aq_fw_ops;
struct aq_nic_cfg_s aq_nic_cfg;
struct timer_list service_timer;
+ struct work_struct linkstate_task;
struct work_struct service_task;
struct timer_list polling_timer;
struct aq_hw_link_status_s link_status;
--
2.34.1
Powered by blists - more mailing lists