[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230509100939.760867-4-jiri@resnulli.us>
Date: Tue, 9 May 2023 12:09:39 +0200
From: Jiri Pirko <jiri@...nulli.us>
To: netdev@...r.kernel.org
Cc: kuba@...nel.org,
pabeni@...hat.com,
davem@...emloft.net,
edumazet@...gle.com,
jacob.e.keller@...el.com,
saeedm@...dia.com,
moshe@...dia.com
Subject: [patch net 3/3] devlink: change port event netdev notifier to be per-net and following netdev
From: Jiri Pirko <jiri@...dia.com>
The commit 565b4824c39f ("devlink: change port event netdev notifier
from per-net to global") changed original per-net notifier to be global
which fixed the issue of non-receiving events of netdev uninit if that
moved to a different namespace. That worked fine in -net tree.
However, later on when commit ee75f1fc44dd ("net/mlx5e: Create
separate devlink instance for ethernet auxiliary device") and
commit 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in
case of PCI device suspend") were merged, a deadlock was introduced
when removing a namespace with devlink instance with another nested
instance.
Here there is the bad flow example resulting in deadlock with mlx5:
net_cleanup_work -> cleanup_net (takes down_read(&pernet_ops_rwsem) ->
devlink_pernet_pre_exit() -> devlink_reload() ->
mlx5_devlink_reload_down() -> mlx5_unload_one_devl_locked() ->
mlx5_detach_device() -> del_adev() -> mlx5e_remove() ->
mlx5e_destroy_devlink() -> devlink_free() ->
unregister_netdevice_notifier() (takes down_write(&pernet_ops_rwsem)
Steps to reproduce:
$ modprobe mlx5_core
$ ip netns add ns1
$ devlink dev reload pci/0000:08:00.0 netns ns1
$ ip netns del ns1
Resolve this by converting the notifier to per per-net again.
But this time also per-devlink_port and setting to follow the netdev
to different namespace when spotted, ensured by
netdev_net_notifier_follow().
Note what a tree needs this fix only in case all of the cited fixes
commits are present.
Reported-by: Moshe Shemesh <moshe@...dia.com>
Fixes: 565b4824c39f ("devlink: change port event netdev notifier from per-net to global")
Fixes: ee75f1fc44dd ("net/mlx5e: Create separate devlink instance for ethernet auxiliary device")
Fixes: 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend")
Signed-off-by: Jiri Pirko <jiri@...dia.com>
---
include/net/devlink.h | 1 +
net/devlink/leftover.c | 24 +++++++++++++++---------
2 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index d0a0d1ce7db4..f252da446264 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -150,6 +150,7 @@ struct devlink_port {
struct devlink_rate *devlink_rate;
struct devlink_linecard *linecard;
struct notifier_block netdevice_nb;
+ struct netdev_net_notifier netdevice_nn;
};
struct devlink_port_new_attrs {
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 4b1627cb2b83..9e11db6528ce 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -6874,7 +6874,8 @@ int devl_port_register(struct devlink *devlink,
INIT_LIST_HEAD(&devlink_port->reporter_list);
devlink_port->netdevice_nb.notifier_call = devlink_port_netdevice_event;
- err = register_netdevice_notifier(&devlink_port->netdevice_nb);
+ err = register_netdevice_notifier_net(devlink_net(devlink),
+ &devlink_port->netdevice_nb);
if (err)
return err;
@@ -6888,7 +6889,8 @@ int devl_port_register(struct devlink *devlink,
return 0;
err_xa_insert:
- unregister_netdevice_notifier(&devlink_port->netdevice_nb);
+ unregister_netdevice_notifier_net(devlink_net(devlink),
+ &devlink_port->netdevice_nb);
return err;
}
EXPORT_SYMBOL_GPL(devl_port_register);
@@ -6928,13 +6930,16 @@ EXPORT_SYMBOL_GPL(devlink_port_register);
*/
void devl_port_unregister(struct devlink_port *devlink_port)
{
- lockdep_assert_held(&devlink_port->devlink->lock);
+ struct devlink *devlink = devlink_port->devlink;
+
+ lockdep_assert_held(&devlink->lock);
WARN_ON(devlink_port->type != DEVLINK_PORT_TYPE_NOTSET);
devlink_port_type_warn_cancel(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- xa_erase(&devlink_port->devlink->ports, devlink_port->index);
- WARN_ON_ONCE(unregister_netdevice_notifier(&devlink_port->netdevice_nb));
+ xa_erase(&devlink->ports, devlink_port->index);
+ WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink),
+ &devlink_port->netdevice_nb));
WARN_ON(!list_empty(&devlink_port->reporter_list));
devlink_port->registered = false;
}
@@ -7099,11 +7104,11 @@ static int devlink_port_netdevice_event(struct notifier_block *nb,
*/
__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
NULL);
+ netdev_net_notifier_follow(netdev, nb,
+ &devlink_port->netdevice_nn);
break;
case NETDEV_REGISTER:
case NETDEV_CHANGENAME:
- if (devlink_net(devlink_port->devlink) != dev_net(netdev))
- return NOTIFY_OK;
/* Set the netdev on top of previously set type. Note this
* event happens also during net namespace change so here
* we take into account netdev pointer appearing in this
@@ -7113,8 +7118,6 @@ static int devlink_port_netdevice_event(struct notifier_block *nb,
netdev);
break;
case NETDEV_UNREGISTER:
- if (devlink_net(devlink_port->devlink) != dev_net(netdev))
- return NOTIFY_OK;
/* Clear netdev pointer, but not the type. This event happens
* also during net namespace change so we need to clear
* pointer to netdev that is going to another net namespace.
@@ -7128,6 +7131,9 @@ static int devlink_port_netdevice_event(struct notifier_block *nb,
*/
__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET,
NULL);
+ netdev_net_notifier_unfollow(netdev,
+ &devlink_port->netdevice_nn,
+ devlink_net(devlink_port->devlink));
break;
}
--
2.39.2
Powered by blists - more mailing lists