lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 16 Aug 2023 10:45:18 -0500
From:   Stuart Hayes <stuart.w.hayes@...il.com>
To:     linux-kernel@...r.kernel.org,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        "Rafael J . Wysocki" <rafael@...nel.org>,
        Tanjore Suresh <tansuresh@...gle.com>,
        Martin Belanger <Martin.Belanger@...l.com>,
        Oliver O'Halloran <oohall@...il.com>,
        Daniel Wagner <dwagner@...e.de>,
        Keith Busch <kbusch@...nel.org>, Lukas Wunner <lukas@...ner.de>
Cc:     Stuart Hayes <stuart.w.hayes@...il.com>
Subject: [PATCH] driver core: shut down devices asynchronously

Attempt to shut down devices asynchronously, by making a tree of devices with
associated work and completion structs, to ensure that child devices are shut
down before parents.

This can dramatically reduce system shutdown/reboot time on systems that have
devices that take many seconds to shut down, such as some NVMe drives.  On once
system tested, the shutdown time went from 11 minutes before the patch to 55
seconds with the patch.

The code could be simplified by adding the work and completion structs to
struct device, but it may make more sense to not burden it with that when there
is likely enough memory to allocate this at shutdown time, and if there isn’t,
it just falls back to the current synchronous shutdown.

Signed-off-by: Stuart Hayes <stuart.w.hayes@...il.com>
---
 drivers/base/core.c | 216 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 189 insertions(+), 27 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3dff5037943e..fec571f56843 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -4709,6 +4709,187 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
 }
 EXPORT_SYMBOL_GPL(device_change_owner);
 
+static void shutdown_device(struct device *dev, struct device *parent)
+{
+	/* hold lock to avoid race with probe/release */
+	if (parent)
+		device_lock(parent);
+	device_lock(dev);
+
+	/* Don't allow any more runtime suspends */
+	pm_runtime_get_noresume(dev);
+	pm_runtime_barrier(dev);
+
+	if (dev->class && dev->class->shutdown_pre) {
+		if (initcall_debug)
+			dev_info(dev, "shutdown_pre\n");
+		dev->class->shutdown_pre(dev);
+	}
+	if (dev->bus && dev->bus->shutdown) {
+		if (initcall_debug)
+			dev_info(dev, "shutdown\n");
+		dev->bus->shutdown(dev);
+	} else if (dev->driver && dev->driver->shutdown) {
+		if (initcall_debug)
+			dev_info(dev, "shutdown\n");
+		dev->driver->shutdown(dev);
+	}
+
+	device_unlock(dev);
+	if (parent)
+		device_unlock(parent);
+}
+
+struct shutdown_work {
+	struct list_head node;
+	struct device *dev;
+	struct work_struct work;
+	struct completion complete;
+	struct list_head children;
+};
+
+void shutdown_dev_work(struct work_struct *work)
+{
+	struct shutdown_work *sd_work = container_of(work, struct shutdown_work, work);
+	struct shutdown_work *child_sd_work;
+	struct device *dev = sd_work->dev;
+
+	/*
+	 * wait for child devices to finish shutdown
+	 */
+	list_for_each_entry(child_sd_work, &sd_work->children, node) {
+		wait_for_completion(&child_sd_work->complete);
+	}
+
+	if (dev) {
+		/*
+		 * Make sure the device is off the kset list, in the
+		 * event that dev->*->shutdown() doesn't remove it.
+		 */
+		spin_lock(&devices_kset->list_lock);
+		list_del_init(&dev->kobj.entry);
+		spin_unlock(&devices_kset->list_lock);
+
+		shutdown_device(dev, dev->parent);
+	}
+
+	complete(&sd_work->complete);
+}
+
+static void schedule_shutdown_work(struct shutdown_work *dev_shutdown_work)
+{
+	struct shutdown_work *child;
+
+	/*
+	 * schedule children to be shutdown before parents
+	 */
+	list_for_each_entry(child, &dev_shutdown_work->children, node) {
+		schedule_shutdown_work(child);
+	}
+
+	schedule_work(&dev_shutdown_work->work);
+}
+
+static void free_shutdown_tree(struct shutdown_work *tree)
+{
+	struct shutdown_work *childitem, *tmp;
+
+	if (tree) {
+		list_for_each_entry_safe(childitem, tmp, &tree->children, node) {
+			put_device(childitem->dev);
+			list_del(&childitem->node);
+			free_shutdown_tree(childitem);
+		}
+		kfree(tree);
+	}
+}
+
+static struct shutdown_work *create_shutdown_tree(struct device *dev,
+						  struct shutdown_work *parent)
+{
+	struct klist_iter i;
+	struct shutdown_work *dev_sdwork;
+	int error = 0;
+
+	/*
+	 * alloc & init shutdown_work for this device
+	 */
+	dev_sdwork = kzalloc(sizeof(*dev_sdwork), GFP_KERNEL);
+	if (!dev_sdwork)
+		goto fail;
+
+	if (dev) {
+		dev_sdwork->dev = dev;
+		get_device(dev);
+	}
+	INIT_WORK(&dev_sdwork->work, shutdown_dev_work);
+	INIT_LIST_HEAD(&dev_sdwork->children);
+	INIT_LIST_HEAD(&dev_sdwork->node);
+	init_completion(&dev_sdwork->complete);
+
+	if (parent) {
+		/*
+		 * add shutdown_work for a device's children
+		 */
+		klist_iter_init(&dev_sdwork->dev->p->klist_children, &i);
+		while (!error && (dev = next_device(&i)))
+			error = !create_shutdown_tree(dev, dev_sdwork);
+		klist_iter_exit(&i);
+
+		if (error)
+			goto fail;
+
+		list_add_tail(&dev_sdwork->node, &parent->children);
+		return dev_sdwork;
+	}
+
+	/*
+	 * add shutdown_work for top level devices
+	 */
+	spin_lock(&devices_kset->list_lock);
+	list_for_each_entry(dev, &devices_kset->list, kobj.entry) {
+		if (!dev->parent)
+			error = !create_shutdown_tree(dev, dev_sdwork);
+		if (error)
+			break;
+	}
+	spin_unlock(&devices_kset->list_lock);
+
+	if (error)
+		goto fail;
+
+	return dev_sdwork;
+
+fail:
+	free_shutdown_tree(dev_sdwork);
+	return NULL;
+}
+
+/**
+ * device_shutdown_async - schedule ->shutdown() on each device to shutdown
+ * asynchronously, ensuring each device's children are shut down before
+ * shutting down the device
+ */
+static int device_shutdown_async(void)
+{
+	struct shutdown_work *shutdown_work_tree;
+
+	/*
+	 * build tree with devices to be shut down
+	 */
+	shutdown_work_tree = create_shutdown_tree(NULL, NULL);
+	if (!shutdown_work_tree)
+		return -ENOMEM;
+
+	/*
+	 * schedule the work to run & wait for it to finish
+	 */
+	schedule_shutdown_work(shutdown_work_tree);
+	wait_for_completion(&shutdown_work_tree->complete);
+	free_shutdown_tree(shutdown_work_tree);
+	return 0;
+}
+
 /**
  * device_shutdown - call ->shutdown() on each device to shutdown.
  */
@@ -4721,6 +4902,13 @@ void device_shutdown(void)
 
 	cpufreq_suspend();
 
+	if (initcall_debug)
+		pr_info("attempting asynchronous device shutdown\n");
+	if (!device_shutdown_async())
+		return;
+	if (initcall_debug)
+		pr_info("starting synchronous device shutdown\n");
+
 	spin_lock(&devices_kset->list_lock);
 	/*
 	 * Walk the devices list backward, shutting down each in turn.
@@ -4745,33 +4933,7 @@ void device_shutdown(void)
 		list_del_init(&dev->kobj.entry);
 		spin_unlock(&devices_kset->list_lock);
 
-		/* hold lock to avoid race with probe/release */
-		if (parent)
-			device_lock(parent);
-		device_lock(dev);
-
-		/* Don't allow any more runtime suspends */
-		pm_runtime_get_noresume(dev);
-		pm_runtime_barrier(dev);
-
-		if (dev->class && dev->class->shutdown_pre) {
-			if (initcall_debug)
-				dev_info(dev, "shutdown_pre\n");
-			dev->class->shutdown_pre(dev);
-		}
-		if (dev->bus && dev->bus->shutdown) {
-			if (initcall_debug)
-				dev_info(dev, "shutdown\n");
-			dev->bus->shutdown(dev);
-		} else if (dev->driver && dev->driver->shutdown) {
-			if (initcall_debug)
-				dev_info(dev, "shutdown\n");
-			dev->driver->shutdown(dev);
-		}
-
-		device_unlock(dev);
-		if (parent)
-			device_unlock(parent);
+		shutdown_device(dev, parent);
 
 		put_device(dev);
 		put_device(parent);
-- 
2.39.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ