[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1747829342-1018757-2-git-send-email-tariqt@nvidia.com>
Date: Wed, 21 May 2025 15:08:58 +0300
From: Tariq Toukan <tariqt@...dia.com>
To: "David S. Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Eric Dumazet <edumazet@...gle.com>, "Andrew
Lunn" <andrew+netdev@...n.ch>
CC: Jason Gunthorpe <jgg@...pe.ca>, Leon Romanovsky <leon@...nel.org>, "Saeed
Mahameed" <saeedm@...dia.com>, Tariq Toukan <tariqt@...dia.com>, "Richard
Cochran" <richardcochran@...il.com>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, Jesper Dangaard Brouer
<hawk@...nel.org>, John Fastabend <john.fastabend@...il.com>,
<linux-rdma@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
<netdev@...r.kernel.org>, <bpf@...r.kernel.org>, Moshe Shemesh
<moshe@...dia.com>, Mark Bloch <mbloch@...dia.com>, Gal Pressman
<gal@...dia.com>, Cosmin Ratiu <cratiu@...dia.com>
Subject: [PATCH net-next 1/5] IB/IPoIB: Enqueue separate work_structs for each flushed interface
From: Cosmin Ratiu <cratiu@...dia.com>
Previously, flushing a netdevice involved first flushing all child
devices from the flush task itself. That requires holding the lock that
protects the list for the entire duration of the flush.
This poses a problem when converting from vlan_rwsem to the netdev
instance lock (next patch), because holding the parent lock while
trying to acquire a child lock makes lockdep unhappy, rightfully.
Fix this by splitting a big flush task into individual flush tasks
(all are already created in their respective ipoib_dev_priv structs)
and defining a helper function to enqueue all of them while holding the
list lock.
In ipoib_set_mac, the function is not used and the task is enqueued
directly, because in the subsequent patches locking is changed and this
function may be called with the netdev instance lock held.
This is effectively a noop, the wq is single-threaded and ordered and
will execute the same flush operations in the same order as before.
Furthermore, there should be no new races because
ipoib_parent_unregister_pre() calls flush_workqueue() after stopping new
work generation to wait for pending work to complete. flush_workqueue()
waits for all currently enqueued work to finish before returning.
Signed-off-by: Cosmin Ratiu <cratiu@...dia.com>
Reviewed-by: Carolina Jubran <cjubran@...dia.com>
Reviewed-by: Leon Romanovsky <leonro@...dia.com>
Signed-off-by: Tariq Toukan <tariqt@...dia.com>
---
drivers/infiniband/ulp/ipoib/ipoib.h | 2 +
drivers/infiniband/ulp/ipoib/ipoib_ib.c | 46 ++++++++++++++--------
drivers/infiniband/ulp/ipoib/ipoib_main.c | 10 ++++-
drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 8 ++--
4 files changed, 44 insertions(+), 22 deletions(-)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index abe0522b7df4..2e05e9c9317d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -512,6 +512,8 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *format,
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
void ipoib_ib_dev_flush_heavy(struct work_struct *work);
+void ipoib_queue_work(struct ipoib_dev_priv *priv,
+ enum ipoib_flush_level level);
void ipoib_ib_tx_timeout_work(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5cde275daa94..e0e7f600097d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1172,24 +1172,11 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
}
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
- enum ipoib_flush_level level,
- int nesting)
+ enum ipoib_flush_level level)
{
- struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
int result;
- down_read_nested(&priv->vlan_rwsem, nesting);
-
- /*
- * Flush any child interfaces too -- they might be up even if
- * the parent is down.
- */
- list_for_each_entry(cpriv, &priv->child_intfs, list)
- __ipoib_ib_dev_flush(cpriv, level, nesting + 1);
-
- up_read(&priv->vlan_rwsem);
-
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
level != IPOIB_FLUSH_HEAVY) {
/* Make sure the dev_addr is set even if not flushing */
@@ -1280,7 +1267,7 @@ void ipoib_ib_dev_flush_light(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_light);
- __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0);
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT);
}
void ipoib_ib_dev_flush_normal(struct work_struct *work)
@@ -1288,7 +1275,7 @@ void ipoib_ib_dev_flush_normal(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_normal);
- __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0);
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL);
}
void ipoib_ib_dev_flush_heavy(struct work_struct *work)
@@ -1297,10 +1284,35 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
container_of(work, struct ipoib_dev_priv, flush_heavy);
rtnl_lock();
- __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY);
rtnl_unlock();
}
+void ipoib_queue_work(struct ipoib_dev_priv *priv,
+ enum ipoib_flush_level level)
+{
+ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+ struct ipoib_dev_priv *cpriv;
+
+ down_read(&priv->vlan_rwsem);
+ list_for_each_entry(cpriv, &priv->child_intfs, list)
+ ipoib_queue_work(cpriv, level);
+ up_read(&priv->vlan_rwsem);
+ }
+
+ switch (level) {
+ case IPOIB_FLUSH_LIGHT:
+ queue_work(ipoib_workqueue, &priv->flush_light);
+ break;
+ case IPOIB_FLUSH_NORMAL:
+ queue_work(ipoib_workqueue, &priv->flush_normal);
+ break;
+ case IPOIB_FLUSH_HEAVY:
+ queue_work(ipoib_workqueue, &priv->flush_heavy);
+ break;
+ }
+}
+
void ipoib_ib_dev_cleanup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3b463db8ce39..55b1f3cbee17 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2415,6 +2415,14 @@ static int ipoib_set_mac(struct net_device *dev, void *addr)
set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
+ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+ struct ipoib_dev_priv *cpriv;
+
+ down_read(&priv->vlan_rwsem);
+ list_for_each_entry(cpriv, &priv->child_intfs, list)
+ queue_work(ipoib_workqueue, &cpriv->flush_light);
+ up_read(&priv->vlan_rwsem);
+ }
queue_work(ipoib_workqueue, &priv->flush_light);
return 0;
@@ -2526,7 +2534,7 @@ static struct net_device *ipoib_add_port(const char *format,
ib_register_event_handler(&priv->event_handler);
/* call event handler to ensure pkey in sync */
- queue_work(ipoib_workqueue, &priv->flush_heavy);
+ ipoib_queue_work(priv, IPOIB_FLUSH_HEAVY);
ndev->rtnl_link_ops = ipoib_get_link_ops();
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 368e5d77416d..86983080d28b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -280,15 +280,15 @@ void ipoib_event(struct ib_event_handler *handler,
dev_name(&record->device->dev), record->element.port_num);
if (record->event == IB_EVENT_CLIENT_REREGISTER) {
- queue_work(ipoib_workqueue, &priv->flush_light);
+ ipoib_queue_work(priv, IPOIB_FLUSH_LIGHT);
} else if (record->event == IB_EVENT_PORT_ERR ||
record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE) {
- queue_work(ipoib_workqueue, &priv->flush_normal);
+ ipoib_queue_work(priv, IPOIB_FLUSH_NORMAL);
} else if (record->event == IB_EVENT_PKEY_CHANGE) {
- queue_work(ipoib_workqueue, &priv->flush_heavy);
+ ipoib_queue_work(priv, IPOIB_FLUSH_HEAVY);
} else if (record->event == IB_EVENT_GID_CHANGE &&
!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
- queue_work(ipoib_workqueue, &priv->flush_light);
+ ipoib_queue_work(priv, IPOIB_FLUSH_LIGHT);
}
}
--
2.31.1
Powered by blists - more mailing lists