lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <66f229fc4daa9_2a86294ec@dwillia2-xfh.jf.intel.com.notmuch>
Date: Mon, 23 Sep 2024 19:54:52 -0700
From: Dan Williams <dan.j.williams@...el.com>
To: Dan Williams <dan.j.williams@...el.com>, Greg Kroah-Hartman
	<gregkh@...uxfoundation.org>, Linux regressions mailing list
	<regressions@...ts.linux.dev>
CC: Dan Williams <dan.j.williams@...el.com>, LKML
	<linux-kernel@...r.kernel.org>, "stable@...r.kernel.org"
	<stable@...r.kernel.org>
Subject: Re: [regression] frozen usb mouse pointer at boot

Dan Williams wrote:
> Greg Kroah-Hartman wrote:
> [..]
> > 
> > This is odd.
> > 
> > Does the latest 6.10.y release also show this problem?
> > 
> > I can't duplicate this here, and it's the first I've heard of it (given
> > that USB mice are pretty popular, I would suspect others would have hit
> > it as well...)
> 
> Sorry for missing this earlier. One thought is that userspace has a
> dependency on uevent_show() flushing device probing. In other words the
> side effect of taking the device_lock() in uevent_show() is that udev
> might enjoy some occasions where the reading the uevent flushes probing
> before the udev rule runs. With this change, uevent_show() no longer
> waits for any inflight probes to complete.
> 
> One idea to fix this problem is to create a special case sysfs attribute
> type that takes the device_lock() before kernfs_get_active() to avoid
> the deadlock on attribute teardown.
> 
> I'll take a look. Thanks for forwarding the report Thorsten!

Ok, the following boots and passes the CXL unit tests, would appreciate
if the reporter can give this a try:

-- >8 --
Subject: driver core: Fix userspace expectations of uevent_show() as a probe barrier

From: Dan Williams <dan.j.williams@...el.com>

Commit "driver core: Fix uevent_show() vs driver detach race" [1]
attempted to fix a lockdep report in uevent_show() by making the lookup
of driver information for a given device lockless. It turns out that
userspace likely depends on the side-effect of uevent_show() flushing
device probing.

Introduce a new "locked" type for sysfs attributes that arranges for the
attribute to be called under the device-lock, but without setting up a
reverse locking order dependency with the kernfs_get_active() lock.

This new facility holds a reference on a device while any locked-sysfs
attribute of that device is open. It then takes the lock around sysfs
read/write operations in the following order:

    of->mutex
    of->op_mutex <= device_lock()
    kernfs_get_active()
    <operation>

Compare that to problematic locking order of:

    of->mutex
    kernfs_get_active()
    <operation>
        device_lock()

...which causes potential deadlocks with kernfs_drain() that may be
called while the device_lock() is held.

Fixes: 15fffc6a5624 ("driver core: Fix uevent_show() vs driver detach race") [1]
Cc: stable@...r.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219244
Signed-off-by: Dan Williams <dan.j.williams@...el.com>
---
 drivers/base/core.c    |    2 +-
 fs/kernfs/file.c       |   24 +++++++++++++++++++-----
 fs/sysfs/file.c        |   47 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/sysfs/group.c       |    4 ++--
 include/linux/device.h |    3 +++
 include/linux/kernfs.h |    1 +
 include/linux/sysfs.h  |   10 ++++++++++
 7 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 8c0733d3aad8..1fd5a18cbb62 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2772,7 +2772,7 @@ static ssize_t uevent_store(struct device *dev, struct device_attribute *attr,
 
 	return count;
 }
-static DEVICE_ATTR_RW(uevent);
+static DEVICE_ATTR_LOCKED_RW(uevent);
 
 static ssize_t online_show(struct device *dev, struct device_attribute *attr,
 			   char *buf)
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8502ef68459b..eb5c2167beb9 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -142,6 +142,20 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
 	kernfs_put_active(of->kn);
 }
 
+static void kernfs_open_file_lock(struct kernfs_open_file *of)
+{
+	mutex_lock(&of->mutex);
+	if (of->op_mutex)
+		mutex_lock(of->op_mutex);
+}
+
+static void kernfs_open_file_unlock(struct kernfs_open_file *of)
+{
+	if (of->op_mutex)
+		mutex_unlock(of->op_mutex);
+	mutex_unlock(&of->mutex);
+}
+
 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
 {
 	struct kernfs_open_file *of = sf->private;
@@ -151,7 +165,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
 	 * @of->mutex nests outside active ref and is primarily to ensure that
 	 * the ops aren't called concurrently for the same open file.
 	 */
-	mutex_lock(&of->mutex);
+	kernfs_open_file_lock(of);
 	if (!kernfs_get_active(of->kn))
 		return ERR_PTR(-ENODEV);
 
@@ -193,7 +207,7 @@ static void kernfs_seq_stop(struct seq_file *sf, void *v)
 
 	if (v != ERR_PTR(-ENODEV))
 		kernfs_seq_stop_active(sf, v);
-	mutex_unlock(&of->mutex);
+	kernfs_open_file_unlock(of);
 }
 
 static int kernfs_seq_show(struct seq_file *sf, void *v)
@@ -322,9 +336,9 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	 * @of->mutex nests outside active ref and is used both to ensure that
 	 * the ops aren't called concurrently for the same open file.
 	 */
-	mutex_lock(&of->mutex);
+	kernfs_open_file_lock(of);
 	if (!kernfs_get_active(of->kn)) {
-		mutex_unlock(&of->mutex);
+		kernfs_open_file_unlock(of);
 		len = -ENODEV;
 		goto out_free;
 	}
@@ -336,7 +350,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		len = -EINVAL;
 
 	kernfs_put_active(of->kn);
-	mutex_unlock(&of->mutex);
+	kernfs_open_file_unlock(of);
 
 	if (len > 0)
 		iocb->ki_pos += len;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d1995e2d6c94..1bb878efcf00 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/seq_file.h>
+#include <linux/device.h>
 #include <linux/mm.h>
 
 #include "sysfs.h"
@@ -189,6 +190,26 @@ static int sysfs_kf_bin_open(struct kernfs_open_file *of)
 	return 0;
 }
 
+/* locked attributes are always device attributes */
+static int sysfs_kf_setup_lock(struct kernfs_open_file *of)
+{
+	struct kobject *kobj = of->kn->parent->priv;
+	struct device *dev = kobj_to_dev(kobj);
+
+	get_device(dev);
+	of->op_mutex = &dev->mutex;
+
+	return 0;
+}
+
+static void sysfs_kf_free_lock(struct kernfs_open_file *of)
+{
+	struct kobject *kobj = of->kn->parent->priv;
+	struct device *dev = kobj_to_dev(kobj);
+
+	put_device(dev);
+}
+
 void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr)
 {
 	struct kernfs_node *kn = kobj->sd, *tmp;
@@ -227,6 +248,25 @@ static const struct kernfs_ops sysfs_file_kfops_rw = {
 	.write		= sysfs_kf_write,
 };
 
+static const struct kernfs_ops sysfs_locked_kfops_ro = {
+	.seq_show	= sysfs_kf_seq_show,
+	.open		= sysfs_kf_setup_lock,
+	.release	= sysfs_kf_free_lock,
+};
+
+static const struct kernfs_ops sysfs_locked_kfops_wo = {
+	.write		= sysfs_kf_write,
+	.open		= sysfs_kf_setup_lock,
+	.release	= sysfs_kf_free_lock,
+};
+
+static const struct kernfs_ops sysfs_locked_kfops_rw = {
+	.seq_show	= sysfs_kf_seq_show,
+	.write		= sysfs_kf_write,
+	.open		= sysfs_kf_setup_lock,
+	.release	= sysfs_kf_free_lock,
+};
+
 static const struct kernfs_ops sysfs_prealloc_kfops_ro = {
 	.read		= sysfs_kf_read,
 	.prealloc	= true,
@@ -287,6 +327,13 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 			ops = &sysfs_prealloc_kfops_ro;
 		else if (sysfs_ops->store)
 			ops = &sysfs_prealloc_kfops_wo;
+	} else if (mode & SYSFS_LOCKED) {
+		if (sysfs_ops->show && sysfs_ops->store)
+			ops = &sysfs_locked_kfops_rw;
+		else if (sysfs_ops->show)
+			ops = &sysfs_locked_kfops_ro;
+		else if (sysfs_ops->store)
+			ops = &sysfs_locked_kfops_wo;
 	} else {
 		if (sysfs_ops->show && sysfs_ops->store)
 			ops = &sysfs_file_kfops_rw;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index d22ad67a0f32..0158367866be 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -68,11 +68,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 					continue;
 			}
 
-			WARN(mode & ~(SYSFS_PREALLOC | 0664),
+			WARN(mode & ~(SYSFS_PREALLOC | SYSFS_LOCKED | 0664),
 			     "Attribute %s: Invalid permissions 0%o\n",
 			     (*attr)->name, mode);
 
-			mode &= SYSFS_PREALLOC | 0664;
+			mode &= SYSFS_PREALLOC | SYSFS_LOCKED | 0664;
 			error = sysfs_add_file_mode_ns(parent, *attr, mode, uid,
 						       gid, NULL);
 			if (unlikely(error))
diff --git a/include/linux/device.h b/include/linux/device.h
index 34eb20f5966f..c38c33bed333 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -180,6 +180,9 @@ ssize_t device_show_string(struct device *dev, struct device_attribute *attr,
 #define DEVICE_ATTR_RW(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_RW(_name)
 
+#define DEVICE_ATTR_LOCKED_RW(_name) \
+	struct device_attribute dev_attr_##_name = __ATTR_LOCKED_RW(_name)
+
 /**
  * DEVICE_ATTR_ADMIN_RW - Define an admin-only read-write device attribute.
  * @_name: Attribute name.
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 87c79d076d6d..df6828a7cd3e 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -257,6 +257,7 @@ struct kernfs_open_file {
 
 	/* private fields, do not use outside kernfs proper */
 	struct mutex		mutex;
+	struct mutex		*op_mutex;
 	struct mutex		prealloc_mutex;
 	int			event;
 	struct list_head	list;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c4e64dc11206..981588c9c673 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -103,6 +103,7 @@ struct attribute_group {
 
 #define SYSFS_PREALLOC		010000
 #define SYSFS_GROUP_INVISIBLE	020000
+#define SYSFS_LOCKED		040000
 
 /*
  * DEFINE_SYSFS_GROUP_VISIBLE(name):
@@ -230,6 +231,13 @@ struct attribute_group {
 	.store	= _store,						\
 }
 
+#define __ATTR_LOCKED(_name, _mode, _show, _store) {			\
+	.attr = {.name = __stringify(_name),				\
+		 .mode = SYSFS_LOCKED | VERIFY_OCTAL_PERMISSIONS(_mode) },\
+	.show	= _show,						\
+	.store	= _store,						\
+}
+
 #define __ATTR_RO(_name) {						\
 	.attr	= { .name = __stringify(_name), .mode = 0444 },		\
 	.show	= _name##_show,						\
@@ -255,6 +263,8 @@ struct attribute_group {
 
 #define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store)
 
+#define __ATTR_LOCKED_RW(_name) __ATTR_LOCKED(_name, 0644, _name##_show, _name##_store)
+
 #define __ATTR_NULL { .attr = { .name = NULL } }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ