lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  8 Sep 2021 20:15:12 +0800
From:   Yi Tao <escape@...ux.alibaba.com>
To:     gregkh@...uxfoundation.org, tj@...nel.org, lizefan.x@...edance.com,
        hannes@...xchg.org, mcgrof@...nel.org, keescook@...omium.org,
        yzaikin@...gle.com
Cc:     linux-kernel@...r.kernel.org, cgroups@...r.kernel.org,
        linux-fsdevel@...r.kernel.org, shanpeic@...ux.alibaba.com
Subject: [RFC PATCH 1/2] add pinned flags for kernfs node

This patch is preparing for the implementation of cgroup pool. If a
kernfs node is set to pinned. the data of this node will no longer be
protected by kernfs internally. When it performs the following actions,
the area protected by kernfs_rwsem will be protected by the specific
spinlock:
	1.rename this node
	2.remove this node
	3.create child node

Suggested-by: Shanpei Chen <shanpeic@...ux.alibaba.com>
Signed-off-by: Yi Tao <escape@...ux.alibaba.com>
---
 fs/kernfs/dir.c        | 74 ++++++++++++++++++++++++++++++++++++--------------
 include/linux/kernfs.h | 14 ++++++++++
 2 files changed, 68 insertions(+), 20 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index ba581429bf7b..68b05b5bc1a2 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -26,7 +26,6 @@
 
 static bool kernfs_active(struct kernfs_node *kn)
 {
-	lockdep_assert_held(&kernfs_rwsem);
 	return atomic_read(&kn->active) >= 0;
 }
 
@@ -461,10 +460,9 @@ static void kernfs_drain(struct kernfs_node *kn)
 {
 	struct kernfs_root *root = kernfs_root(kn);
 
-	lockdep_assert_held_write(&kernfs_rwsem);
 	WARN_ON_ONCE(kernfs_active(kn));
 
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(kn);
 
 	if (kernfs_lockdep(kn)) {
 		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
@@ -483,7 +481,7 @@ static void kernfs_drain(struct kernfs_node *kn)
 
 	kernfs_drain_open_files(kn);
 
-	down_write(&kernfs_rwsem);
+	kernfs_lock(kn);
 }
 
 /**
@@ -722,7 +720,7 @@ int kernfs_add_one(struct kernfs_node *kn)
 	bool has_ns;
 	int ret;
 
-	down_write(&kernfs_rwsem);
+	kernfs_lock(parent);
 
 	ret = -EINVAL;
 	has_ns = kernfs_ns_enabled(parent);
@@ -753,7 +751,7 @@ int kernfs_add_one(struct kernfs_node *kn)
 		ps_iattr->ia_mtime = ps_iattr->ia_ctime;
 	}
 
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(parent);
 
 	/*
 	 * Activate the new node unless CREATE_DEACTIVATED is requested.
@@ -767,7 +765,7 @@ int kernfs_add_one(struct kernfs_node *kn)
 	return 0;
 
 out_unlock:
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(parent);
 	return ret;
 }
 
@@ -788,8 +786,6 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
 	bool has_ns = kernfs_ns_enabled(parent);
 	unsigned int hash;
 
-	lockdep_assert_held(&kernfs_rwsem);
-
 	if (has_ns != (bool)ns) {
 		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 		     has_ns ? "required" : "invalid", parent->name, name);
@@ -1242,8 +1238,6 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
 {
 	struct rb_node *rbn;
 
-	lockdep_assert_held_write(&kernfs_rwsem);
-
 	/* if first iteration, visit leftmost descendant which may be root */
 	if (!pos)
 		return kernfs_leftmost_descendant(root);
@@ -1299,8 +1293,6 @@ static void __kernfs_remove(struct kernfs_node *kn)
 {
 	struct kernfs_node *pos;
 
-	lockdep_assert_held_write(&kernfs_rwsem);
-
 	/*
 	 * Short-circuit if non-root @kn has already finished removal.
 	 * This is for kernfs_remove_self() which plays with active ref
@@ -1369,9 +1361,9 @@ static void __kernfs_remove(struct kernfs_node *kn)
  */
 void kernfs_remove(struct kernfs_node *kn)
 {
-	down_write(&kernfs_rwsem);
+	kernfs_lock(kn);
 	__kernfs_remove(kn);
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(kn);
 }
 
 /**
@@ -1525,13 +1517,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
 		return -ENOENT;
 	}
 
-	down_write(&kernfs_rwsem);
+	kernfs_lock(parent);
 
 	kn = kernfs_find_ns(parent, name, ns);
 	if (kn)
 		__kernfs_remove(kn);
 
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(parent);
 
 	if (kn)
 		return 0;
@@ -1557,7 +1549,9 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 	if (!kn->parent)
 		return -EINVAL;
 
-	down_write(&kernfs_rwsem);
+	/* if parent is pinned, parent->lock protects rename */
+	if (!kn->parent->pinned)
+		down_write(&kernfs_rwsem);
 
 	error = -ENOENT;
 	if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
@@ -1576,7 +1570,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 	/* rename kernfs_node */
 	if (strcmp(kn->name, new_name) != 0) {
 		error = -ENOMEM;
-		new_name = kstrdup_const(new_name, GFP_KERNEL);
+		/* use GFP_ATOMIC to avoid sleep */
+		new_name = kstrdup_const(new_name, GFP_ATOMIC);
 		if (!new_name)
 			goto out;
 	} else {
@@ -1611,10 +1606,49 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 
 	error = 0;
  out:
-	up_write(&kernfs_rwsem);
+	if (!kn->parent->pinned)
+		up_write(&kernfs_rwsem);
 	return error;
 }
 
+/* Traverse all descendants and set pinned */
+void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock)
+{
+	struct kernfs_node *pos = NULL;
+
+	while ((pos = kernfs_next_descendant_post(pos, kn))) {
+		pos->pinned = true;
+		pos->lock = lock;
+	}
+}
+
+/* Traverse all descendants and clear pinned */
+void kernfs_clear_pinned(struct kernfs_node *kn)
+{
+	struct kernfs_node *pos = NULL;
+
+	while ((pos = kernfs_next_descendant_post(pos, kn))) {
+		pos->pinned = false;
+		pos->lock = NULL;
+	}
+}
+
+void kernfs_lock(struct kernfs_node *kn)
+{
+	if (!kn->pinned)
+		down_write(&kernfs_rwsem);
+	else
+		spin_lock(kn->lock);
+}
+
+void kernfs_unlock(struct kernfs_node *kn)
+{
+	if (!kn->pinned)
+		up_write(&kernfs_rwsem);
+	else
+		spin_unlock(kn->lock);
+}
+
 /* Relationship between mode and the DT_xxx types */
 static inline unsigned char dt_type(struct kernfs_node *kn)
 {
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 1093abf7c28c..a70d96308c51 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -161,6 +161,13 @@ struct kernfs_node {
 	unsigned short		flags;
 	umode_t			mode;
 	struct kernfs_iattrs	*iattr;
+
+	/*
+	 * If pinned is true, use lock to protect remove, rename this kernfs
+	 * node or create child kernfs node.
+	 */
+	bool			pinned;
+	spinlock_t		*lock;
 };
 
 /*
@@ -415,6 +422,11 @@ int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
 
 struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
 						   u64 id);
+
+void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock);
+void kernfs_clear_pinned(struct kernfs_node *kn);
+void kernfs_lock(struct kernfs_node *kn);
+void kernfs_unlock(struct kernfs_node *kn);
 #else	/* CONFIG_KERNFS */
 
 static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
@@ -528,6 +540,8 @@ static inline void kernfs_kill_sb(struct super_block *sb) { }
 
 static inline void kernfs_init(void) { }
 
+inline void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock) {}
+inline void kernfs_clear_pinned(struct kernfs_node *kn) {}
 #endif	/* CONFIG_KERNFS */
 
 /**
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ