lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 12 Nov 2020 21:24:16 +0200
From:   Parav Pandit <parav@...dia.com>
To:     <netdev@...r.kernel.org>, <linux-rdma@...r.kernel.org>,
        <gregkh@...uxfoundation.org>
CC:     <jiri@...dia.com>, <jgg@...dia.com>, <dledford@...hat.com>,
        <leonro@...dia.com>, <saeedm@...dia.com>, <kuba@...nel.org>,
        <davem@...emloft.net>, Parav Pandit <parav@...dia.com>
Subject: [PATCH net-next 06/13] devlink: Introduce devlink refcount to reduce scope of global devlink_mutex

Currently global devlink_mutex is held while a doit() operation is
progress. This brings a limitation.

A Driver cannot perform devlink_register()/unregister() calls
during devlink doit() callback functions.
This is typically required when a port state change described in
RFC [1] callback wants to delete an active SF port or wants to
activate a SF port that results into unregistering or registering a
devlink instance on different bus such as ancillary bus.

An example flow:

devlink_predoit()
  mutex_lock(&devlink_mutex); <- First lock acquire
  devlink_reload()
    driver->reload_down(inactive)
        adev->remove();
           mlx5_adev_remove(ancillary_dev);
             devlink_unregister(ancillary_dev->devlink_instance);
               mutex_lock(&devlink_mutex); <- Second lock acquire

This patch is preparation patch to enable drivers to achieve this.

It achieves this by maintaining a per devlink instance refcount to
prevent devlink device unregistration while user command are in progress
or while devlink device is migration to init_net net namespace.

devlink_nl_family continue to remain registered with parallel_ops
disabled. So even after removing devlink_mutex during doit commands,
it doesn't enable userspace to run multiple devlink commands for one
or multiple devlink instance.

[1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho

Signed-off-by: Parav Pandit <parav@...dia.com>
Reviewed-by: Jiri Pirko <jiri@...dia.com>
---
 include/net/devlink.h |  5 +++
 net/core/devlink.c    | 84 +++++++++++++++++++++++++++++++------------
 2 files changed, 67 insertions(+), 22 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ef487b8ed17b..c8eab814c234 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -53,6 +53,11 @@ struct devlink {
 			    * port, sb, dpipe, resource, params, region, traps and more.
 			    */
 	struct mutex reload_lock; /* Protects reload operation */
+	struct list_head reload_list;
+	refcount_t refcount; /* Serializes user doit commands and netns command
+			      * with device unregistration.
+			      */
+	struct completion unregister_complete;
 	u8 reload_failed:1,
 	   reload_enabled:1,
 	   registered:1;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index c7c6f274d392..84f3ec12b3e8 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -96,9 +96,8 @@ static LIST_HEAD(devlink_list);
 
 /* devlink_mutex
  *
- * An overall lock guarding every operation coming from userspace.
- * It also guards devlink devices list and it is taken when
- * driver registers/unregisters it.
+ * An overall lock guarding devlink devices list during operations coming from
+ * userspace and when driver registers/unregisters devlink device.
  */
 static DEFINE_MUTEX(devlink_mutex);
 
@@ -121,6 +120,18 @@ void devlink_net_set(struct devlink *devlink, struct net *net)
 }
 EXPORT_SYMBOL_GPL(devlink_net_set);
 
+static inline bool
+devlink_try_get(struct devlink *devlink)
+{
+	return refcount_inc_not_zero(&devlink->refcount);
+}
+
+static void devlink_put(struct devlink *devlink)
+{
+	if (refcount_dec_and_test(&devlink->refcount))
+		complete(&devlink->unregister_complete);
+}
+
 static struct devlink *devlink_get_from_attrs(struct net *net,
 					      struct nlattr **attrs)
 {
@@ -139,7 +150,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
 	list_for_each_entry(devlink, &devlink_list, list) {
 		if (strcmp(devlink->dev->bus->name, busname) == 0 &&
 		    strcmp(dev_name(devlink->dev), devname) == 0 &&
-		    net_eq(devlink_net(devlink), net))
+		    net_eq(devlink_net(devlink), net) && devlink_try_get(devlink))
 			return devlink;
 	}
 
@@ -411,7 +422,7 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
 
 /* The per devlink instance lock is taken by default in the pre-doit
  * operation, yet several commands do not require this. The global
- * devlink lock is taken and protects from disruption by user-calls.
+ * devlink lock is taken and protects from disruption by dumpit user-calls.
  */
 #define DEVLINK_NL_FLAG_NO_LOCK			BIT(2)
 
@@ -424,10 +435,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 
 	mutex_lock(&devlink_mutex);
 	devlink = devlink_get_from_info(info);
-	if (IS_ERR(devlink)) {
-		mutex_unlock(&devlink_mutex);
+	mutex_unlock(&devlink_mutex);
+
+	if (IS_ERR(devlink))
 		return PTR_ERR(devlink);
-	}
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_lock(&devlink->lock);
 	info->user_ptr[0] = devlink;
@@ -448,7 +459,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 unlock:
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
-	mutex_unlock(&devlink_mutex);
+	devlink_put(devlink);
 	return err;
 }
 
@@ -460,7 +471,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
 	devlink = info->user_ptr[0];
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
-	mutex_unlock(&devlink_mutex);
+	devlink_put(devlink);
 }
 
 static struct genl_family devlink_nl_family;
@@ -8122,6 +8133,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	mutex_init(&devlink->lock);
 	mutex_init(&devlink->reporters_lock);
 	mutex_init(&devlink->reload_lock);
+	init_completion(&devlink->unregister_complete);
 	return devlink;
 }
 EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -8136,6 +8148,7 @@ int devlink_register(struct devlink *devlink, struct device *dev)
 {
 	devlink->dev = dev;
 	devlink->registered = true;
+	refcount_set(&devlink->refcount, 1);
 	mutex_lock(&devlink_mutex);
 	list_add_tail(&devlink->list, &devlink_list);
 	devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -8151,12 +8164,23 @@ EXPORT_SYMBOL_GPL(devlink_register);
  */
 void devlink_unregister(struct devlink *devlink)
 {
+	/* Remove from the list first, so that no new users can get it */
 	mutex_lock(&devlink_mutex);
-	WARN_ON(devlink_reload_supported(devlink->ops) &&
-		devlink->reload_enabled);
 	devlink_notify(devlink, DEVLINK_CMD_DEL);
 	list_del(&devlink->list);
 	mutex_unlock(&devlink_mutex);
+
+	/* Balances with refcount_set in devlink_register(). */
+	devlink_put(devlink);
+	/* Wait for any existing users to stop using the devlink device */
+	wait_for_completion(&devlink->unregister_complete);
+
+	/* At this point there are no active users working on the devlink instance;
+	 * also net ns exit operation (if any) is also completed.
+	 * devlink is out of global list, hence no users can acquire reference to this devlink
+	 * instance anymore. Hence, it is safe to proceed with unregistration.
+	 */
+	WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled);
 }
 EXPORT_SYMBOL_GPL(devlink_unregister);
 
@@ -10472,6 +10496,8 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 {
 	struct devlink *devlink;
 	u32 actions_performed;
+	LIST_HEAD(local_list);
+	struct devlink *tmp;
 	int err;
 
 	/* In case network namespace is getting destroyed, reload
@@ -10479,18 +10505,32 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 	 */
 	mutex_lock(&devlink_mutex);
 	list_for_each_entry(devlink, &devlink_list, list) {
-		if (net_eq(devlink_net(devlink), net)) {
-			if (WARN_ON(!devlink_reload_supported(devlink->ops)))
-				continue;
-			err = devlink_reload(devlink, &init_net,
-					     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
-					     DEVLINK_RELOAD_LIMIT_UNSPEC,
-					     &actions_performed, NULL);
-			if (err && err != -EOPNOTSUPP)
-				pr_warn("Failed to reload devlink instance into init_net\n");
-		}
+		if (!net_eq(devlink_net(devlink), net))
+			continue;
+
+		if (WARN_ON(!devlink_reload_supported(devlink->ops)))
+			continue;
+
+		/* Hold the reference to devlink instance so that it doesn't get unregistered
+		 * once global devlink_mutex is unlocked.
+		 * Store the devlink to a shadow list so that if devlink unregistration is
+		 * started, it can be still found in the shadow list.
+		 */
+		if (devlink_try_get(devlink))
+			list_add_tail(&devlink->reload_list, &local_list);
 	}
 	mutex_unlock(&devlink_mutex);
+
+	list_for_each_entry_safe(devlink, tmp, &local_list, reload_list) {
+		list_del_init(&devlink->reload_list);
+		err = devlink_reload(devlink, &init_net,
+				     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+				     DEVLINK_RELOAD_LIMIT_UNSPEC,
+				     &actions_performed, NULL);
+		if (err && err != -EOPNOTSUPP)
+			pr_warn("Failed to reload devlink instance into init_net\n");
+		devlink_put(devlink);
+	}
 }
 
 static struct pernet_operations devlink_pernet_ops __net_initdata = {
-- 
2.26.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ