[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20201112192424.2742-7-parav@nvidia.com>
Date: Thu, 12 Nov 2020 21:24:16 +0200
From: Parav Pandit <parav@...dia.com>
To: <netdev@...r.kernel.org>, <linux-rdma@...r.kernel.org>,
<gregkh@...uxfoundation.org>
CC: <jiri@...dia.com>, <jgg@...dia.com>, <dledford@...hat.com>,
<leonro@...dia.com>, <saeedm@...dia.com>, <kuba@...nel.org>,
<davem@...emloft.net>, Parav Pandit <parav@...dia.com>
Subject: [PATCH net-next 06/13] devlink: Introduce devlink refcount to reduce scope of global devlink_mutex
Currently global devlink_mutex is held while a doit() operation is
progress. This brings a limitation.
A Driver cannot perform devlink_register()/unregister() calls
during devlink doit() callback functions.
This is typically required when a port state change described in
RFC [1] callback wants to delete an active SF port or wants to
activate a SF port that results into unregistering or registering a
devlink instance on different bus such as ancillary bus.
An example flow:
devlink_predoit()
mutex_lock(&devlink_mutex); <- First lock acquire
devlink_reload()
driver->reload_down(inactive)
adev->remove();
mlx5_adev_remove(ancillary_dev);
devlink_unregister(ancillary_dev->devlink_instance);
mutex_lock(&devlink_mutex); <- Second lock acquire
This patch is preparation patch to enable drivers to achieve this.
It achieves this by maintaining a per devlink instance refcount to
prevent devlink device unregistration while user command are in progress
or while devlink device is migration to init_net net namespace.
devlink_nl_family continue to remain registered with parallel_ops
disabled. So even after removing devlink_mutex during doit commands,
it doesn't enable userspace to run multiple devlink commands for one
or multiple devlink instance.
[1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho
Signed-off-by: Parav Pandit <parav@...dia.com>
Reviewed-by: Jiri Pirko <jiri@...dia.com>
---
include/net/devlink.h | 5 +++
net/core/devlink.c | 84 +++++++++++++++++++++++++++++++------------
2 files changed, 67 insertions(+), 22 deletions(-)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index ef487b8ed17b..c8eab814c234 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -53,6 +53,11 @@ struct devlink {
* port, sb, dpipe, resource, params, region, traps and more.
*/
struct mutex reload_lock; /* Protects reload operation */
+ struct list_head reload_list;
+ refcount_t refcount; /* Serializes user doit commands and netns command
+ * with device unregistration.
+ */
+ struct completion unregister_complete;
u8 reload_failed:1,
reload_enabled:1,
registered:1;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index c7c6f274d392..84f3ec12b3e8 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -96,9 +96,8 @@ static LIST_HEAD(devlink_list);
/* devlink_mutex
*
- * An overall lock guarding every operation coming from userspace.
- * It also guards devlink devices list and it is taken when
- * driver registers/unregisters it.
+ * An overall lock guarding devlink devices list during operations coming from
+ * userspace and when driver registers/unregisters devlink device.
*/
static DEFINE_MUTEX(devlink_mutex);
@@ -121,6 +120,18 @@ void devlink_net_set(struct devlink *devlink, struct net *net)
}
EXPORT_SYMBOL_GPL(devlink_net_set);
+static inline bool
+devlink_try_get(struct devlink *devlink)
+{
+ return refcount_inc_not_zero(&devlink->refcount);
+}
+
+static void devlink_put(struct devlink *devlink)
+{
+ if (refcount_dec_and_test(&devlink->refcount))
+ complete(&devlink->unregister_complete);
+}
+
static struct devlink *devlink_get_from_attrs(struct net *net,
struct nlattr **attrs)
{
@@ -139,7 +150,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
list_for_each_entry(devlink, &devlink_list, list) {
if (strcmp(devlink->dev->bus->name, busname) == 0 &&
strcmp(dev_name(devlink->dev), devname) == 0 &&
- net_eq(devlink_net(devlink), net))
+ net_eq(devlink_net(devlink), net) && devlink_try_get(devlink))
return devlink;
}
@@ -411,7 +422,7 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
- * devlink lock is taken and protects from disruption by user-calls.
+ * devlink lock is taken and protects from disruption by dumpit user-calls.
*/
#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
@@ -424,10 +435,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
mutex_lock(&devlink_mutex);
devlink = devlink_get_from_info(info);
- if (IS_ERR(devlink)) {
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink_mutex);
+
+ if (IS_ERR(devlink))
return PTR_ERR(devlink);
- }
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_lock(&devlink->lock);
info->user_ptr[0] = devlink;
@@ -448,7 +459,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
unlock:
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
- mutex_unlock(&devlink_mutex);
+ devlink_put(devlink);
return err;
}
@@ -460,7 +471,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
devlink = info->user_ptr[0];
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
- mutex_unlock(&devlink_mutex);
+ devlink_put(devlink);
}
static struct genl_family devlink_nl_family;
@@ -8122,6 +8133,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
mutex_init(&devlink->reload_lock);
+ init_completion(&devlink->unregister_complete);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -8136,6 +8148,7 @@ int devlink_register(struct devlink *devlink, struct device *dev)
{
devlink->dev = dev;
devlink->registered = true;
+ refcount_set(&devlink->refcount, 1);
mutex_lock(&devlink_mutex);
list_add_tail(&devlink->list, &devlink_list);
devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -8151,12 +8164,23 @@ EXPORT_SYMBOL_GPL(devlink_register);
*/
void devlink_unregister(struct devlink *devlink)
{
+ /* Remove from the list first, so that no new users can get it */
mutex_lock(&devlink_mutex);
- WARN_ON(devlink_reload_supported(devlink->ops) &&
- devlink->reload_enabled);
devlink_notify(devlink, DEVLINK_CMD_DEL);
list_del(&devlink->list);
mutex_unlock(&devlink_mutex);
+
+ /* Balances with refcount_set in devlink_register(). */
+ devlink_put(devlink);
+ /* Wait for any existing users to stop using the devlink device */
+ wait_for_completion(&devlink->unregister_complete);
+
+ /* At this point there are no active users working on the devlink instance;
+ * also net ns exit operation (if any) is also completed.
+ * devlink is out of global list, hence no users can acquire reference to this devlink
+ * instance anymore. Hence, it is safe to proceed with unregistration.
+ */
+ WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled);
}
EXPORT_SYMBOL_GPL(devlink_unregister);
@@ -10472,6 +10496,8 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
{
struct devlink *devlink;
u32 actions_performed;
+ LIST_HEAD(local_list);
+ struct devlink *tmp;
int err;
/* In case network namespace is getting destroyed, reload
@@ -10479,18 +10505,32 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
*/
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
- if (net_eq(devlink_net(devlink), net)) {
- if (WARN_ON(!devlink_reload_supported(devlink->ops)))
- continue;
- err = devlink_reload(devlink, &init_net,
- DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- DEVLINK_RELOAD_LIMIT_UNSPEC,
- &actions_performed, NULL);
- if (err && err != -EOPNOTSUPP)
- pr_warn("Failed to reload devlink instance into init_net\n");
- }
+ if (!net_eq(devlink_net(devlink), net))
+ continue;
+
+ if (WARN_ON(!devlink_reload_supported(devlink->ops)))
+ continue;
+
+ /* Hold the reference to devlink instance so that it doesn't get unregistered
+ * once global devlink_mutex is unlocked.
+ * Store the devlink to a shadow list so that if devlink unregistration is
+ * started, it can be still found in the shadow list.
+ */
+ if (devlink_try_get(devlink))
+ list_add_tail(&devlink->reload_list, &local_list);
}
mutex_unlock(&devlink_mutex);
+
+ list_for_each_entry_safe(devlink, tmp, &local_list, reload_list) {
+ list_del_init(&devlink->reload_list);
+ err = devlink_reload(devlink, &init_net,
+ DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_LIMIT_UNSPEC,
+ &actions_performed, NULL);
+ if (err && err != -EOPNOTSUPP)
+ pr_warn("Failed to reload devlink instance into init_net\n");
+ devlink_put(devlink);
+ }
}
static struct pernet_operations devlink_pernet_ops __net_initdata = {
--
2.26.2
Powered by blists - more mailing lists