lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 3 Mar 2010 19:25:29 -0800
From:	"Hugh Daschbach" <hdasch@...adcom.com>
To:	"Alan Stern" <stern@...land.harvard.edu>,
	"Greg KH" <gregkh@...e.de>
cc:	"Kay Sievers" <kay.sievers@...y.org>,
	"Jan Blunck" <jblunck@...e.de>,
	"David Vrabel" <david.vrabel@....com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"linux-scsi@...r.kernel.org" <linux-scsi@...r.kernel.org>,
	"james Bottomley" <James.Bottomley@...e.de>,
	"James Smart" <james.smart@...lex.com>
Subject: RE: System reboot hangs due to race against devices_kset->list
 triggered by SCSI FC workqueue

Alan Stern [mailto:stern@...land.harvard.edu] writes:

> On Wed, 3 Mar 2010, Hugh Daschbach wrote:
>
>> > Can't we just protect the list?  What is wanting to write to the list
>> > while shutdown is happening?
>> 
>> Indeed, Alan suggested holding the kset spinlock while iterating the
>> list.  Unfortunately, the device shutdown routines may sleep.  At least
>> the SCSI sd_shutdown routine issues I/O to the device as part of
>> flushing device caches.  I would guess other subsystems sleep as well.
>
> What I meant was that you should hold the spinlock while finding and 
> unlinking the last device on the list.  Clearly you shouldn't hold it 
> while calling the device shutdown routine.

I misunderstood.  But I believe insertion and deletion is properly
serliaized.  It looks to me like the list structure is intact.  It's the
iterator that's been driven off into the weeds.

>> I'll try klist.  That looks like a good mediator between traversal and
>> removal.
>
> Yes, it removes a lot of difficulties.
>
> Alan Stern

Just to be clear, the list we're talking about is "list" in "struct
kset"  And the nodes of the list are chained by "entry" in "struct
kobject".  I just want to make sure that this is what's intended before
I get too far down the road.

At a minimum the change looks something like the patch below.  This
doesn't work yet.  And I'll need extensive testing in device plug and
unplug.  So I'm not looking for a detailed review.  But if I'm obviously
way off base, I'd like to know earlier rather than later.

Thanks for the guidance.

Hugh

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 58ae8e0..2c9b14a 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -18,6 +18,7 @@
 
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/klist.h>
 #include <linux/sysfs.h>
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
@@ -58,7 +59,7 @@ enum kobject_action {
 
 struct kobject {
 	const char		*name;
-	struct list_head	entry;
+	struct klist_node	entry;
 	struct kobject		*parent;
 	struct kset		*kset;
 	struct kobj_type	*ktype;
@@ -152,7 +153,7 @@ extern struct sysfs_ops kobj_sysfs_ops;
  * desired.
  */
 struct kset {
-	struct list_head list;
+	struct klist list;
 	spinlock_t list_lock;
 	struct kobject kobj;
 	struct kset_uevent_ops *uevent_ops;
diff --git a/include/linux/klist.h b/include/linux/klist.h
index e91a4e5..274fa91 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -64,5 +64,6 @@ extern void klist_iter_init_node(struct klist *k, struct klist_iter *i,
 				 struct klist_node *n);
 extern void klist_iter_exit(struct klist_iter *i);
 extern struct klist_node *klist_next(struct klist_iter *i);
+extern struct klist_node *klist_prev(struct klist_iter *i);
 
 #endif
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 07851e9..4ea25b0 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1726,15 +1726,24 @@ out:
 }
 EXPORT_SYMBOL_GPL(device_move);
 
+static struct device *klist_node_to_dev(struct klist_node *node)
+{
+	struct kobject *kobj = container_of(node, struct kobject, entry);
+	return container_of(kobj, struct device, kobj);
+}
+
 /**
  * device_shutdown - call ->shutdown() on each device to shutdown.
  */
 void device_shutdown(void)
 {
-	struct device *dev, *devn;
+	struct device *dev;
+	struct klist_iter iter;
+	struct klist_node *node;
 
-	list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
-				kobj.entry) {
+	klist_iter_init(&devices_kset->list, &iter);
+	while ((node = klist_prev(&iter)) != NULL) {
+		dev = klist_node_to_dev(node);
 		if (dev->bus && dev->bus->shutdown) {
 			dev_dbg(dev, "shutdown\n");
 			dev->bus->shutdown(dev);
@@ -1743,6 +1751,7 @@ retry:
 			dev_dbg(dev, "shutdown\n");
 			dev->driver->shutdown(dev);
 		}
 	}
+	klist_iter_exit(&iter);
 	async_synchronize_full();
 }
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 0d90390..5d77600 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -160,6 +160,30 @@ EXPORT_SYMBOL_GPL(sysdev_class_unregister);
 
 static DEFINE_MUTEX(sysdev_drivers_lock);
 
+static struct sysdev_class *sysdev_next_class(struct klist_iter *iter)
+{
+	struct klist_node *node = klist_next(iter);
+	return node
+		? container_of(node, struct sysdev_class, kset.kobj.entry)
+		: NULL;
+}
+
+static struct sysdev_class *sysdev_prev_class(struct klist_iter *iter)
+{
+	struct klist_node *node = klist_prev(iter);
+	return node
+		? container_of(node, struct sysdev_class, kset.kobj.entry)
+		: NULL;
+}
+
+static struct sys_device *sysdev_next_sysdev(struct klist_iter *iter)
+{
+	struct klist_node *node = klist_prev(iter);
+	return node
+		? container_of(node, struct sys_device, kobj.entry)
+		: NULL;
+}
+
 /**
  *	sysdev_driver_register - Register auxillary driver
  *	@cls:	Device class driver belongs to.
@@ -193,8 +217,12 @@ int sysdev_driver_register(struct sysdev_class *cls, struct sysdev_driver *drv)
 		/* If devices of this class already exist, tell the driver */
 		if (drv->add) {
 			struct sys_device *dev;
-			list_for_each_entry(dev, &cls->kset.list, kobj.entry)
+			struct klist_iter device_iter;
+
+			klist_iter_init(&cls->kset.list, &device_iter);
+			while ((dev = sysdev_next_sysdev(&device_iter)) != NULL)
 				drv->add(dev);
+			klist_iter_exit(&device_iter);
 		}
 	} else {
 		err = -EINVAL;
@@ -218,8 +246,12 @@ void sysdev_driver_unregister(struct sysdev_class *cls,
 	if (cls) {
 		if (drv->remove) {
 			struct sys_device *dev;
-			list_for_each_entry(dev, &cls->kset.list, kobj.entry)
+			struct klist_iter device_iter;
+
+			klist_iter_init(&cls->kset.list, &device_iter);
+			while ((dev = sysdev_next_sysdev(&device_iter)) != NULL)
 				drv->remove(dev);
+			klist_iter_exit(&device_iter);
 		}
 		kset_put(&cls->kset);
 	}
@@ -312,18 +344,23 @@ void sysdev_unregister(struct sys_device *sysdev)
  */
 void sysdev_shutdown(void)
 {
+	struct klist_iter class_iter;
 	struct sysdev_class *cls;
 
 	pr_debug("Shutting Down System Devices\n");
 
 	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
+
+	klist_iter_init(&system_kset->list, &class_iter);
+	while ((cls = sysdev_prev_class(&class_iter)) != NULL) {
+		struct klist_iter device_iter;
 		struct sys_device *sysdev;
 
 		pr_debug("Shutting down type '%s':\n",
 			 kobject_name(&cls->kset.kobj));
 
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
+		klist_iter_init(&cls->kset.list, &device_iter);
+		while ((sysdev = sysdev_next_sysdev(&device_iter)) != NULL) {
 			struct sysdev_driver *drv;
 			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
 
@@ -337,7 +374,9 @@ void sysdev_shutdown(void)
 			if (cls->shutdown)
 				cls->shutdown(sysdev);
 		}
+		klist_iter_exit(&device_iter);
 	}
+	klist_iter_exit(&class_iter);
 	mutex_unlock(&sysdev_drivers_lock);
 }
 
@@ -375,6 +414,9 @@ static void __sysdev_resume(struct sys_device *dev)
  */
 int sysdev_suspend(pm_message_t state)
 {
+	struct klist_iter class_iter;
+	struct klist_iter device_iter;
+	struct klist_iter err_device_iter;
 	struct sysdev_class *cls;
 	struct sys_device *sysdev, *err_dev;
 	struct sysdev_driver *drv, *err_drv;
@@ -392,15 +434,17 @@ int sysdev_suspend(pm_message_t state)
 
 	pr_debug("Suspending System Devices\n");
 
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
+	klist_iter_init(&system_kset->list, &class_iter);
+	while ((cls = sysdev_prev_class(&class_iter)) != NULL) {
 		pr_debug("Suspending type '%s':\n",
 			 kobject_name(&cls->kset.kobj));
 
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
+		klist_iter_init(&cls->kset.list, &device_iter);
+		while ((sysdev = sysdev_next_sysdev(&device_iter)) != NULL) {
 			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
 
 			/* Call auxillary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
+			list_for_each_entry (drv, &cls->drivers, entry) {
 				if (drv->suspend) {
 					ret = drv->suspend(sysdev, state);
 					if (ret)
@@ -421,7 +465,9 @@ int sysdev_suspend(pm_message_t state)
 					cls->suspend);
 			}
 		}
+		klist_iter_exit(&device_iter);
 	}
+	klist_iter_exit(&class_iter);
 	return 0;
 	/* resume current sysdev */
 cls_driver:
@@ -441,20 +487,26 @@ aux_driver:
 	}
 
 	/* resume other sysdevs in current class */
-	list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
+	klist_iter_init(&cls->kset.list, &err_device_iter);
+	while ((err_dev = sysdev_next_sysdev(&err_device_iter)) != NULL) {
 		if (err_dev == sysdev)
 			break;
 		pr_debug(" %s\n", kobject_name(&err_dev->kobj));
 		__sysdev_resume(err_dev);
 	}
+	klist_iter_exit(&err_device_iter);
 
 	/* resume other classes */
-	list_for_each_entry_continue(cls, &system_kset->list, kset.kobj.entry) {
-		list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
+	while ((cls = sysdev_next_class(&class_iter)) != NULL) {
+		klist_iter_init(&cls->kset.list, &err_device_iter);
+		while ((err_dev = sysdev_next_sysdev(&err_device_iter))) {
 			pr_debug(" %s\n", kobject_name(&err_dev->kobj));
 			__sysdev_resume(err_dev);
 		}
+		klist_iter_exit(&err_device_iter);
 	}
+	klist_iter_exit(&device_iter);
+	klist_iter_exit(&class_iter);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(sysdev_suspend);
@@ -469,6 +521,8 @@ EXPORT_SYMBOL_GPL(sysdev_suspend);
  */
 int sysdev_resume(void)
 {
+	struct klist_iter class_iter;
+	struct klist_iter device_iter;
 	struct sysdev_class *cls;
 
 	WARN_ONCE(!irqs_disabled(),
@@ -476,18 +530,21 @@ int sysdev_resume(void)
 
 	pr_debug("Resuming System Devices\n");
 
-	list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) {
+	while ((cls = sysdev_next_class(&class_iter)) != NULL) {
 		struct sys_device *sysdev;
 
 		pr_debug("Resuming type '%s':\n",
 			 kobject_name(&cls->kset.kobj));
 
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
+		klist_iter_init(&cls->kset.list, &device_iter);
+		while ((sysdev = sysdev_next_sysdev(&device_iter)) != NULL) {
 			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
 
 			__sysdev_resume(sysdev);
 		}
+		klist_iter_exit(&device_iter);
 	}
+	klist_iter_exit(&class_iter);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(sysdev_resume);
diff --git a/lib/klist.c b/lib/klist.c
index 573d606..fcb19c8 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -363,3 +363,44 @@ struct klist_node *klist_next(struct klist_iter *i)
 	return i->i_cur;
 }
 EXPORT_SYMBOL_GPL(klist_next);
+
+/**
+ * klist_prev - Ante up next previous in list.
+ * @i: Iterator structure.
+ *
+ * First grab list lock. Decrement the reference count of the last iterated
+ * node, if there was one. Grab the previous node, increment its reference
+ * count, drop the lock, and return that previous node.
+ */
+struct klist_node *klist_prev(struct klist_iter *i)
+{
+	void (*put)(struct klist_node *) = i->i_klist->put;
+	struct klist_node *last = i->i_cur;
+	struct klist_node *prev;
+
+	spin_lock(&i->i_klist->k_lock);
+
+	if (last) {
+		prev = to_klist_node(last->n_node.prev);
+		if (!klist_dec_and_del(last))
+			put = NULL;
+	} else
+		prev = to_klist_node(i->i_klist->k_list.prev);
+
+	i->i_cur = NULL;
+	while (prev != to_klist_node(&i->i_klist->k_list)) {
+		if (likely(!knode_dead(prev))) {
+			kref_get(&prev->n_ref);
+			i->i_cur = prev;
+			break;
+		}
+		prev = to_klist_node(prev->n_node.prev);
+	}
+
+	spin_unlock(&i->i_klist->k_lock);
+
+	if (put && last)
+		put(last);
+	return i->i_cur;
+}
+EXPORT_SYMBOL_GPL(klist_prev);
diff --git a/lib/kobject.c b/lib/kobject.c
index b512b74..96808c3 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -126,7 +126,7 @@ static void kobj_kset_join(struct kobject *kobj)
 
 	kset_get(kobj->kset);
 	spin_lock(&kobj->kset->list_lock);
-	list_add_tail(&kobj->entry, &kobj->kset->list);
+	klist_add_tail(&kobj->entry, &kobj->kset->list);
 	spin_unlock(&kobj->kset->list_lock);
 }
 
@@ -137,7 +137,7 @@ static void kobj_kset_leave(struct kobject *kobj)
 		return;
 
 	spin_lock(&kobj->kset->list_lock);
-	list_del_init(&kobj->entry);
+	klist_del(&kobj->entry);
 	spin_unlock(&kobj->kset->list_lock);
 	kset_put(kobj->kset);
 }
@@ -147,7 +147,6 @@ static void kobject_init_internal(struct kobject *kobj)
 	if (!kobj)
 		return;
 	kref_init(&kobj->kref);
-	INIT_LIST_HEAD(&kobj->entry);
 	kobj->state_in_sysfs = 0;
 	kobj->state_add_uevent_sent = 0;
 	kobj->state_remove_uevent_sent = 0;
@@ -671,7 +670,7 @@ EXPORT_SYMBOL_GPL(kobject_create_and_add);
 void kset_init(struct kset *k)
 {
 	kobject_init_internal(&k->kobj);
-	INIT_LIST_HEAD(&k->list);
+	klist_init(&k->list, NULL, NULL);
 	spin_lock_init(&k->list_lock);
 }
 
@@ -735,6 +734,14 @@ void kset_unregister(struct kset *k)
 	kobject_put(&k->kobj);
 }
 
+static struct kobject *kset_next_kobject(struct klist_iter *iter)
+{
+	struct klist_node *node = klist_next(iter);
+	return node
+		? container_of(node, struct kobject, entry)
+		: NULL;
+}
+
 /**
  * kset_find_obj - search for object in kset.
  * @kset: kset we're looking in.
@@ -746,17 +753,18 @@ void kset_unregister(struct kset *k)
  */
 struct kobject *kset_find_obj(struct kset *kset, const char *name)
 {
+	struct klist_iter i;
 	struct kobject *k;
 	struct kobject *ret = NULL;
 
-	spin_lock(&kset->list_lock);
-	list_for_each_entry(k, &kset->list, entry) {
+	klist_iter_init(&kset->list, &i);
+	while ((k = kset_next_kobject(&i))) {
 		if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
 			ret = kobject_get(k);
 			break;
 		}
 	}
-	spin_unlock(&kset->list_lock);
+	klist_iter_exit(&i);
 	return ret;
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ