lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241112155713.269214-2-bigeasy@linutronix.de>
Date: Tue, 12 Nov 2024 16:52:38 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: cgroups@...r.kernel.org,
	linux-kernel@...r.kernel.org
Cc: Michal Koutný <mkoutny@...e.com>,
	"Paul E. McKenney" <paulmck@...nel.org>,
	Boqun Feng <boqun.feng@...il.com>,
	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	Hillf Danton <hdanton@...a.com>,
	Johannes Weiner <hannes@...xchg.org>,
	Marco Elver <elver@...gle.com>,
	Tejun Heo <tj@...nel.org>,
	Zefan Li <lizefan.x@...edance.com>,
	tglx@...utronix.de,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [PATCH v2 1/2] kernfs: Make it possible to use RCU for kernfs_node::name lookup.

Instead of using kernfs_rename_lock for lookups of ::name allow to use
RCU protection for lookup. Rely on kn's kernfs_root::kernfs_rwsem for
update synchronisation.

KERNFS_ROOT_SAME_PARENT is added to signal that the parent never
changes. kernfs_rename_ns() checks that flag and if it is seen, it
ensures that the parent is the same and then does not acquire
kernfs_rename_lock during parent/ name assignment and updates only the
name attribute. Without the flag, the update is performed as always.

kernfs_name_rcu() is a copy of kernfs_name() which is using RCU
protection while accessing the kernfs_node::name. Both functions
validate the KERNFS_ROOT_SAME_PARENT flag. The same is true for
kernfs_path_from_node() and kernfs_path_from_node_rcu().

Suggested-by: Tejun Heo <tj@...nel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
 fs/kernfs/dir.c        | 151 ++++++++++++++++++++++++++++++-----------
 include/linux/kernfs.h |  23 ++++++-
 2 files changed, 133 insertions(+), 41 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 458519e416fe7..41c87ee76aa70 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -51,14 +51,6 @@ static bool kernfs_lockdep(struct kernfs_node *kn)
 #endif
 }
 
-static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
-{
-	if (!kn)
-		return strscpy(buf, "(null)", buflen);
-
-	return strscpy(buf, kn->parent ? kn->name : "/", buflen);
-}
-
 /* kernfs_node_depth - compute depth from @from to @to */
 static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
 {
@@ -168,10 +160,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 
 	/* Calculate how many bytes we need for the rest */
 	for (i = depth_to - 1; i >= 0; i--) {
+		const char *name;
+
 		for (kn = kn_to, j = 0; j < i; j++)
 			kn = kn->parent;
 
-		len += scnprintf(buf + len, buflen - len, "/%s", kn->name);
+		name = rcu_dereference_check(kn->name_rcu, lockdep_is_held(&kernfs_rename_lock));
+		len += scnprintf(buf + len, buflen - len, "/%s", name);
 	}
 
 	return len;
@@ -184,7 +179,8 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
  * @buflen: size of @buf
  *
  * Copies the name of @kn into @buf of @buflen bytes.  The behavior is
- * similar to strscpy().
+ * similar to strscpy(). The root node must not be initialized with
+ * KERNFS_ROOT_SAME_PARENT.
  *
  * Fills buffer with "(null)" if @kn is %NULL.
  *
@@ -195,13 +191,47 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
  */
 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
 {
-	unsigned long flags;
-	int ret;
+	struct kernfs_root *root;
 
-	read_lock_irqsave(&kernfs_rename_lock, flags);
-	ret = kernfs_name_locked(kn, buf, buflen);
-	read_unlock_irqrestore(&kernfs_rename_lock, flags);
-	return ret;
+	guard(read_lock_irqsave)(&kernfs_rename_lock);
+	if (kn) {
+		root = kernfs_root(kn);
+		if (WARN_ON_ONCE(root->flags & KERNFS_ROOT_SAME_PARENT))
+			kn = NULL;
+	}
+
+	if (!kn)
+		return strscpy(buf, "(null)", buflen);
+
+	return strscpy(buf, kn->parent ? kn->name : "/", buflen);
+}
+
+/**
+ * kernfs_name_rcu - obtain the name of a given node
+ * @kn: kernfs_node of interest
+ * @buf: buffer to copy @kn's name into
+ * @buflen: size of @buf
+ *
+ * Same as kernfs_name except it uses RCU for name lookup. The root node must
+ * be with KERNFS_ROOT_SAME_PARENT.
+ *
+ * This function can be called from any context.
+ */
+
+int kernfs_name_rcu(struct kernfs_node *kn, char *buf, size_t buflen)
+{
+	struct kernfs_root *root;
+
+	if (kn) {
+		root = kernfs_root(kn);
+		if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_SAME_PARENT)))
+			kn = NULL;
+	}
+	if (!kn)
+		return strscpy(buf, "(null)", buflen);
+
+	guard(rcu)();
+	return strscpy(buf, kn->parent ? rcu_dereference(kn->name_rcu) : "/", buflen);
 }
 
 /**
@@ -214,7 +244,8 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
  * Builds @to's path relative to @from in @buf. @from and @to must
  * be on the same kernfs-root. If @from is not parent of @to, then a relative
  * path (which includes '..'s) as needed to reach from @from to @to is
- * returned.
+ * returned. The root node must not be initialized with
+ * KERNFS_ROOT_SAME_PARENT.
  *
  * Return: the length of the constructed path.  If the path would have been
  * greater than @buflen, @buf contains the truncated path with the trailing
@@ -223,16 +254,42 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
 int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
 			  char *buf, size_t buflen)
 {
-	unsigned long flags;
-	int ret;
+	struct kernfs_root *root;
 
-	read_lock_irqsave(&kernfs_rename_lock, flags);
-	ret = kernfs_path_from_node_locked(to, from, buf, buflen);
-	read_unlock_irqrestore(&kernfs_rename_lock, flags);
-	return ret;
+	guard(read_lock_irqsave)(&kernfs_rename_lock);
+	if (to) {
+		root = kernfs_root(to);
+		if (WARN_ON_ONCE(root->flags & KERNFS_ROOT_SAME_PARENT))
+			to = NULL;
+	}
+	return kernfs_path_from_node_locked(to, from, buf, buflen);
 }
 EXPORT_SYMBOL_GPL(kernfs_path_from_node);
 
+/**
+ * kernfs_path_from_node_rcu - build path of node @to relative to @from.
+ * @from: parent kernfs_node relative to which we need to build the path
+ * @to: kernfs_node of interest
+ * @buf: buffer to copy @to's path into
+ * @buflen: size of @buf
+ *
+ * Same as kernfs_path_from_node. Uses RCU for the name lookup. The root node
+ * must be initialized with KERNFS_ROOT_SAME_PARENT.
+ */
+int kernfs_path_from_node_rcu(struct kernfs_node *to, struct kernfs_node *from,
+			      char *buf, size_t buflen)
+{
+	struct kernfs_root *root;
+
+	if (to) {
+		root = kernfs_root(to);
+		if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_SAME_PARENT)))
+			to = NULL;
+	}
+	guard(rcu)();
+	return kernfs_path_from_node_locked(to, from, buf, buflen);
+}
+
 /**
  * pr_cont_kernfs_name - pr_cont name of a kernfs_node
  * @kn: kernfs_node of interest
@@ -1717,7 +1774,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 {
 	struct kernfs_node *old_parent;
 	struct kernfs_root *root;
-	const char *old_name = NULL;
+	bool rcu_name = false;
+	const char *kn_name;
 	int error;
 
 	/* can't move or rename root */
@@ -1732,9 +1790,18 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 	    (new_parent->flags & KERNFS_EMPTY_DIR))
 		goto out;
 
+	if (root->flags & KERNFS_ROOT_SAME_PARENT) {
+		error = -EINVAL;
+		if (WARN_ON_ONCE(kn->parent != new_parent))
+			goto out;
+		rcu_name = true;
+	}
+
 	error = 0;
+	kn_name = rcu_dereference_check(kn->name_rcu,
+					lockdep_is_held(&root->kernfs_rwsem));
 	if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
-	    (strcmp(kn->name, new_name) == 0))
+	    (strcmp(kn_name, new_name) == 0))
 		goto out;	/* nothing to rename */
 
 	error = -EEXIST;
@@ -1742,7 +1809,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 		goto out;
 
 	/* rename kernfs_node */
-	if (strcmp(kn->name, new_name) != 0) {
+	if (strcmp(kn_name, new_name) != 0) {
 		error = -ENOMEM;
 		new_name = kstrdup_const(new_name, GFP_KERNEL);
 		if (!new_name)
@@ -1755,27 +1822,33 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 	 * Move to the appropriate place in the appropriate directories rbtree.
 	 */
 	kernfs_unlink_sibling(kn);
-	kernfs_get(new_parent);
 
 	/* rename_lock protects ->parent and ->name accessors */
-	write_lock_irq(&kernfs_rename_lock);
+	if (!rcu_name) {
+		write_lock_irq(&kernfs_rename_lock);
 
-	old_parent = kn->parent;
-	kn->parent = new_parent;
+		kernfs_get(new_parent);
+		old_parent = kn->parent;
+		kn->parent = new_parent;
 
-	kn->ns = new_ns;
-	if (new_name) {
-		old_name = kn->name;
-		kn->name = new_name;
+		kn->ns = new_ns;
+		if (new_name)
+			kn->name = new_name;
+
+		write_unlock_irq(&kernfs_rename_lock);
+		kernfs_put(old_parent);
+	} else {
+		/* name assignment is RCU protected, parent is the same */
+		kn->ns = new_ns;
+		if (new_name)
+			rcu_assign_pointer(kn->name_rcu, new_name);
 	}
 
-	write_unlock_irq(&kernfs_rename_lock);
-
-	kn->hash = kernfs_name_hash(kn->name, kn->ns);
+	kn->hash = kernfs_name_hash(new_name ?: kn_name, kn->ns);
 	kernfs_link_sibling(kn);
 
-	kernfs_put(old_parent);
-	kfree_const(old_name);
+	if (new_name && !is_kernel_rodata((unsigned long)kn_name))
+		kfree_rcu_mightsleep(kn_name);
 
 	error = 0;
  out:
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 87c79d076d6d7..b52393f1045c6 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -147,6 +147,11 @@ enum kernfs_root_flag {
 	 * Support user xattrs to be written to nodes rooted at this root.
 	 */
 	KERNFS_ROOT_SUPPORT_USER_XATTR		= 0x0008,
+
+	/*
+	 * Renames must not change the parent node.
+	 */
+	KERNFS_ROOT_SAME_PARENT			= 0x0010,
 };
 
 /* type-specific structures for kernfs_node union members */
@@ -200,7 +205,10 @@ struct kernfs_node {
 	 * parent directly.
 	 */
 	struct kernfs_node	*parent;
-	const char		*name;
+	union {
+		const char		__rcu *name_rcu;
+		const char		*name;
+	};
 
 	struct rb_node		rb;
 
@@ -395,8 +403,11 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
 }
 
 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
-int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn,
+int kernfs_name_rcu(struct kernfs_node *kn, char *buf, size_t buflen);
+int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from,
 			  char *buf, size_t buflen);
+int kernfs_path_from_node_rcu(struct kernfs_node *kn_to, struct kernfs_node *kn_from,
+			      char *buf, size_t buflen);
 void pr_cont_kernfs_name(struct kernfs_node *kn);
 void pr_cont_kernfs_path(struct kernfs_node *kn);
 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
@@ -475,11 +486,19 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
 static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
 { return -ENOSYS; }
 
+static inline int kernfs_name_rcu(struct kernfs_node *kn, char *buf, size_t buflen)
+{ return -ENOSYS; }
+
 static inline int kernfs_path_from_node(struct kernfs_node *root_kn,
 					struct kernfs_node *kn,
 					char *buf, size_t buflen)
 { return -ENOSYS; }
 
+static inline int kernfs_path_from_node_rcu(struct kernfs_node *root_kn,
+					    struct kernfs_node *kn,
+					    char *buf, size_t buflen)
+{ return -ENOSYS; }
+
 static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
 static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { }
 
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ