lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Fri, 26 Jan 2024 10:11:20 -0500
From: Rodrigo Vivi <rodrigo.vivi@...el.com>
To: <linux-kernel@...r.kernel.org>
CC: Rodrigo Vivi <rodrigo.vivi@...el.com>, Mukesh Ojha
	<quic_mojha@...cinc.com>, Johannes Berg <johannes@...solutions.net>, "Greg
 Kroah-Hartman" <gregkh@...uxfoundation.org>, "Rafael J . Wysocki"
	<rafael@...nel.org>, Jose Souza <jose.souza@...el.com>
Subject: [PATCH 2/2] devcoredump: Remove the mutex serialization

The commit 01daccf74832 ("devcoredump : Serialize devcd_del work")
introduced the mutex to protect the case where mod_delayed_work
could be called before the delayed work even existed.

Instead, we can simply initialize the delayed work before the device
is added, so the race condition doesn't exist at first place.

The mutex_unlock is very problematic here. Although mod_delayed_work
is async, we have no warranty that the work is not finished before
the mutex_unlock(devcd->mutex), and if that happen 'devcd' is used
after freed.

Cc: Mukesh Ojha <quic_mojha@...cinc.com>
Cc: Johannes Berg <johannes@...solutions.net>
Cc: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
Cc: Rafael J. Wysocki <rafael@...nel.org>
Cc: Jose Souza <jose.souza@...el.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@...el.com>
---
 drivers/base/devcoredump.c | 97 +++-----------------------------------
 1 file changed, 6 insertions(+), 91 deletions(-)

diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index 678ecc2fa242..0e26b1273920 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -25,47 +25,6 @@ struct devcd_entry {
 	struct device devcd_dev;
 	void *data;
 	size_t datalen;
-	/*
-	 * Here, mutex is required to serialize the calls to del_wk work between
-	 * user/kernel space which happens when devcd is added with device_add()
-	 * and that sends uevent to user space. User space reads the uevents,
-	 * and calls to devcd_data_write() which try to modify the work which is
-	 * not even initialized/queued from devcoredump.
-	 *
-	 *
-	 *
-	 *        cpu0(X)                                 cpu1(Y)
-	 *
-	 *        dev_coredump() uevent sent to user space
-	 *        device_add()  ======================> user space process Y reads the
-	 *                                              uevents writes to devcd fd
-	 *                                              which results into writes to
-	 *
-	 *                                             devcd_data_write()
-	 *                                               mod_delayed_work()
-	 *                                                 try_to_grab_pending()
-	 *                                                   del_timer()
-	 *                                                     debug_assert_init()
-	 *       INIT_DELAYED_WORK()
-	 *       schedule_delayed_work()
-	 *
-	 *
-	 * Also, mutex alone would not be enough to avoid scheduling of
-	 * del_wk work after it get flush from a call to devcd_free()
-	 * mentioned as below.
-	 *
-	 *	disabled_store()
-	 *        devcd_free()
-	 *          mutex_lock()             devcd_data_write()
-	 *          flush_delayed_work()
-	 *          mutex_unlock()
-	 *                                   mutex_lock()
-	 *                                   mod_delayed_work()
-	 *                                   mutex_unlock()
-	 * So, delete_work flag is required.
-	 */
-	struct mutex mutex;
-	bool delete_work;
 	struct module *owner;
 	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
 			void *data, size_t datalen);
@@ -125,13 +84,8 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
 	struct device *dev = kobj_to_dev(kobj);
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
-	mutex_lock(&devcd->mutex);
-	if (!devcd->delete_work) {
-		devcd->delete_work = true;
-		mod_delayed_work(system_wq, &devcd->del_wk, 0);
-	}
-	mutex_unlock(&devcd->mutex);
-
+	/* This file needs to be closed before devcd can be deleted */
+	mod_delayed_work(system_wq, &devcd->del_wk, 0);
 	return count;
 }
 
@@ -158,12 +112,7 @@ static int devcd_free(struct device *dev, void *data)
 {
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
-	mutex_lock(&devcd->mutex);
-	if (!devcd->delete_work)
-		devcd->delete_work = true;
-
 	flush_delayed_work(&devcd->del_wk);
-	mutex_unlock(&devcd->mutex);
 	return 0;
 }
 
@@ -173,30 +122,6 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri
 	return sysfs_emit(buf, "%d\n", devcd_disabled);
 }
 
-/*
- *
- *	disabled_store()                                	worker()
- *	 class_for_each_device(&devcd_class,
- *		NULL, NULL, devcd_free)
- *         ...
- *         ...
- *	   while ((dev = class_dev_iter_next(&iter))
- *                                                             devcd_del()
- *                                                               device_del()
- *                                                                 put_device() <- last reference
- *             error = fn(dev, data)                           devcd_dev_release()
- *             devcd_free(dev, data)                           kfree(devcd)
- *             mutex_lock(&devcd->mutex);
- *
- *
- * In the above diagram, It looks like disabled_store() would be racing with parallely
- * running devcd_del() and result in memory abort while acquiring devcd->mutex which
- * is called after kfree of devcd memory  after dropping its last reference with
- * put_device(). However, this will not happens as fn(dev, data) runs
- * with its own reference to device via klist_node so it is not its last reference.
- * so, above situation would not occur.
- */
-
 static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr,
 			      const char *buf, size_t count)
 {
@@ -308,13 +233,7 @@ static void devcd_remove(void *data)
 {
 	struct devcd_entry *devcd = data;
 
-	mutex_lock(&devcd->mutex);
-	if (!devcd->delete_work) {
-		devcd->delete_work = true;
-		/* XXX: Cannot flush otherwise the mutex below will hit a UAF */
-		mod_delayed_work(system_wq, &devcd->del_wk, 0);
-	}
-	mutex_unlock(&devcd->mutex);
+	flush_delayed_work(&devcd->del_wk);
 }
 
 /**
@@ -365,16 +284,15 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 	devcd->read = read;
 	devcd->free = free;
 	devcd->failing_dev = get_device(dev);
-	devcd->delete_work = false;
 
-	mutex_init(&devcd->mutex);
 	device_initialize(&devcd->devcd_dev);
 
 	dev_set_name(&devcd->devcd_dev, "devcd%d",
 		     atomic_inc_return(&devcd_count));
 	devcd->devcd_dev.class = &devcd_class;
 
-	mutex_lock(&devcd->mutex);
+	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
 	dev_set_uevent_suppress(&devcd->devcd_dev, true);
 	if (device_add(&devcd->devcd_dev))
 		goto put_device;
@@ -392,15 +310,12 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 
 	dev_set_uevent_suppress(&devcd->devcd_dev, false);
 	kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
-	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
-	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
 	if (devm_add_action(dev, devcd_remove, devcd))
 		dev_warn(dev, "devcoredump managed auto-removal registration failed\n");
-	mutex_unlock(&devcd->mutex);
 	return;
  put_device:
+	cancel_delayed_work(&devcd->del_wk);
 	put_device(&devcd->devcd_dev);
-	mutex_unlock(&devcd->mutex);
  put_module:
 	module_put(owner);
  free:
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ