lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221014103443.138574-1-ihuguet@redhat.com>
Date:   Fri, 14 Oct 2022 12:34:43 +0200
From:   Íñigo Huguet <ihuguet@...hat.com>
To:     irusskikh@...vell.com, dbogdanov@...vell.com
Cc:     davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
        pabeni@...hat.com, netdev@...r.kernel.org,
        Íñigo Huguet <ihuguet@...hat.com>,
        Li Liang <liali@...hat.com>
Subject: [PATCH net] atlantic: fix deadlock at aq_nic_stop

NIC is stopped with rtnl_lock held, and during the stop it cancels the
'service_task' work and free irqs.

However, if CONFIG_MACSEC is set, rtnl_lock is acquired both from
aq_nic_service_task and aq_linkstate_threaded_isr. Then a deadlock
happens if aq_nic_stop tries to cancel/disable them when they've already
started their execution.

As the deadlock is caused by rtnl_lock, it causes many other processes
to stall, not only atlantic related stuff.

Fix trying to acquire rtnl_lock at the beginning of those functions, and
returning if NIC closing is ongoing. Also do the "linkstate" stuff in a
workqueue instead than in a threaded irq, where sleeping or waiting a
mutex for a long time is discouraged.

The issue appeared repeteadly attaching and deattaching the NIC to a
bond interface. Doing that after this patch I cannot reproduce the bug.

Fixes: 62c1c2e606f6 ("net: atlantic: MACSec offload skeleton")
Reported-by: Li Liang <liali@...hat.com>
Signed-off-by: Íñigo Huguet <ihuguet@...hat.com>
---
 .../ethernet/aquantia/atlantic/aq_macsec.c    |  7 +--
 .../net/ethernet/aquantia/atlantic/aq_nic.c   | 59 ++++++++++++++++---
 .../net/ethernet/aquantia/atlantic/aq_nic.h   |  1 +
 3 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
index 3d0e16791e1c..5759eba89db9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
@@ -1458,7 +1458,7 @@ int aq_macsec_enable(struct aq_nic_s *nic)
 	if (!nic->macsec_cfg)
 		return 0;
 
-	rtnl_lock();
+	ASSERT_RTNL();
 
 	if (nic->aq_fw_ops->send_macsec_req) {
 		struct macsec_cfg_request cfg = { 0 };
@@ -1507,7 +1507,6 @@ int aq_macsec_enable(struct aq_nic_s *nic)
 	ret = aq_apply_macsec_cfg(nic);
 
 unlock:
-	rtnl_unlock();
 	return ret;
 }
 
@@ -1519,9 +1518,9 @@ void aq_macsec_work(struct aq_nic_s *nic)
 	if (!netif_carrier_ok(nic->ndev))
 		return;
 
-	rtnl_lock();
+	ASSERT_RTNL();
+
 	aq_check_txsa_expiration(nic);
-	rtnl_unlock();
 }
 
 int aq_macsec_rx_sa_cnt(struct aq_nic_s *nic)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 06508eebb585..5cb7d165dd21 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -40,6 +40,8 @@ static unsigned int aq_itr_rx;
 module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644);
 MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate");
 
+#define AQ_TASK_RETRY_MS	50
+
 static void aq_nic_update_ndev_stats(struct aq_nic_s *self);
 
 static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
@@ -210,19 +212,41 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
 	return 0;
 }
 
-static irqreturn_t aq_linkstate_threaded_isr(int irq, void *private)
+static irqreturn_t aq_linkstate_isr(int irq, void *private)
 {
 	struct aq_nic_s *self = private;
 
 	if (!self)
 		return IRQ_NONE;
 
+	if (!aq_utils_obj_test(&self->flags, AQ_NIC_FLAG_CLOSING))
+		aq_ndev_schedule_work(&self->linkstate_task);
+
+	return IRQ_HANDLED;
+}
+
+static void aq_nic_linkstate_task(struct work_struct *work)
+{
+	struct aq_nic_s *self = container_of(work, struct aq_nic_s,
+					     linkstate_task);
+
+#if IS_ENABLED(CONFIG_MACSEC)
+	/* avoid deadlock at aq_nic_stop -> cancel_work_sync */
+	while (!rtnl_trylock()) {
+		if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAG_CLOSING))
+			return;
+		msleep(AQ_TASK_RETRY_MS);
+	}
+#endif
+
 	aq_nic_update_link_status(self);
 
+#if IS_ENABLED(CONFIG_MACSEC)
+	rtnl_unlock();
+#endif
+
 	self->aq_hw_ops->hw_irq_enable(self->aq_hw,
 				       BIT(self->aq_nic_cfg.link_irq_vec));
-
-	return IRQ_HANDLED;
 }
 
 static void aq_nic_service_task(struct work_struct *work)
@@ -236,12 +260,23 @@ static void aq_nic_service_task(struct work_struct *work)
 	if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAGS_IS_NOT_READY))
 		return;
 
+#if IS_ENABLED(CONFIG_MACSEC)
+	/* avoid deadlock at aq_nic_stop -> cancel_work_sync */
+	while (!rtnl_trylock()) {
+		if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAG_CLOSING))
+			return;
+		msleep(AQ_TASK_RETRY_MS);
+	}
+#endif
+
 	err = aq_nic_update_link_status(self);
 	if (err)
 		return;
 
 #if IS_ENABLED(CONFIG_MACSEC)
 	aq_macsec_work(self);
+
+	rtnl_unlock();
 #endif
 
 	mutex_lock(&self->fwreq_mutex);
@@ -505,6 +540,7 @@ int aq_nic_start(struct aq_nic_s *self)
 	if (err)
 		goto err_exit;
 
+	INIT_WORK(&self->linkstate_task, aq_nic_linkstate_task);
 	INIT_WORK(&self->service_task, aq_nic_service_task);
 
 	timer_setup(&self->service_timer, aq_nic_service_timer_cb, 0);
@@ -531,10 +567,9 @@ int aq_nic_start(struct aq_nic_s *self)
 		if (cfg->link_irq_vec) {
 			int irqvec = pci_irq_vector(self->pdev,
 						    cfg->link_irq_vec);
-			err = request_threaded_irq(irqvec, NULL,
-						   aq_linkstate_threaded_isr,
-						   IRQF_SHARED | IRQF_ONESHOT,
-						   self->ndev->name, self);
+			err = request_irq(irqvec, aq_linkstate_isr,
+					  IRQF_SHARED | IRQF_ONESHOT,
+					  self->ndev->name, self);
 			if (err < 0)
 				goto err_exit;
 			self->msix_entry_mask |= (1 << cfg->link_irq_vec);
@@ -1380,11 +1415,15 @@ int aq_nic_set_loopback(struct aq_nic_s *self)
 int aq_nic_stop(struct aq_nic_s *self)
 {
 	unsigned int i = 0U;
+	int ret;
+
+	aq_utils_obj_set(&self->flags, AQ_NIC_FLAG_CLOSING);
 
 	netif_tx_disable(self->ndev);
 	netif_carrier_off(self->ndev);
 
 	del_timer_sync(&self->service_timer);
+	cancel_work_sync(&self->linkstate_task);
 	cancel_work_sync(&self->service_task);
 
 	self->aq_hw_ops->hw_irq_disable(self->aq_hw, AQ_CFG_IRQ_MASK);
@@ -1401,7 +1440,11 @@ int aq_nic_stop(struct aq_nic_s *self)
 
 	aq_ptp_ring_stop(self);
 
-	return self->aq_hw_ops->hw_stop(self->aq_hw);
+	ret = self->aq_hw_ops->hw_stop(self->aq_hw);
+
+	aq_utils_obj_clear(&self->flags, AQ_NIC_FLAG_CLOSING);
+
+	return ret;
 }
 
 void aq_nic_set_power(struct aq_nic_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 935ba889bd9a..a114b66990a9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -140,6 +140,7 @@ struct aq_nic_s {
 	const struct aq_fw_ops *aq_fw_ops;
 	struct aq_nic_cfg_s aq_nic_cfg;
 	struct timer_list service_timer;
+	struct work_struct linkstate_task;
 	struct work_struct service_task;
 	struct timer_list polling_timer;
 	struct aq_hw_link_status_s link_status;
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ