lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20240717192937.527246-1-mjguzik@gmail.com>
Date: Wed, 17 Jul 2024 21:29:36 +0200
From: Mateusz Guzik <mjguzik@...il.com>
To: paul@...l-moore.com
Cc: linux-security-module@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	Mateusz Guzik <mjguzik@...il.com>
Subject: [PATCH] cred: separate the refcount from frequently read fields

Refcount is modified a lot, for example every time a file is closed or
opened. Then this avoidably dirties the same cacheline which contains
uid, gid and other frequently read fields.

Also tidy up non_rcu handling -- it only transitions true->false once,
and even then only for a rare case (access(2) system call which had to
duplicate creds). As such the field belongs in the read-mostly area.

In order to avoid growing the struct fill in the space with keyring.

No change in size on typical configs on 64-bit architectures.

Validated with a simple test where one thread opens and closes a file,
while another keeps re-reading another file in a loop (ops/s):
before:	4353763
after:	4742792 (+9%)

Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
---

Back in May I sent a patch to plug a small hole in the struct. I did not
get any traffic, but just in case I'll note it should be dropped in
favor of this one.

Also note this does not solve real scalability problems, but I'm going
to get to it at some point(tm).

 fs/open.c            |  2 +-
 include/linux/cred.h | 31 +++++++++++++++----------------
 kernel/cred.c        |  6 +++---
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 22adbef7ecc2..930e22fe8dba 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -455,7 +455,7 @@ static const struct cred *access_override_creds(void)
 	 * cred accesses will keep things non-racy to avoid RCU
 	 * freeing.
 	 */
-	override_cred->non_rcu = 1;
+	override_cred->non_rcu = true;
 
 	old_cred = override_creds(override_cred);
 
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 2976f534a7a3..3127eaad4140 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -110,7 +110,16 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp)
  */
 struct cred {
 	atomic_long_t	usage;
-	kuid_t		uid;		/* real UID of the task */
+	struct rcu_head	rcu;		/* RCU deletion hook */
+#ifdef CONFIG_KEYS
+	unsigned char	jit_keyring;	/* default keyring to attach requested
+					 * keys to */
+	struct key	*session_keyring; /* keyring inherited over fork */
+	struct key	*process_keyring; /* keyring private to this process */
+	struct key	*thread_keyring; /* keyring private to this thread */
+	struct key	*request_key_auth; /* assumed request_key authority */
+#endif
+	kuid_t		uid ____cacheline_aligned_in_smp;/* real UID of the task */
 	kgid_t		gid;		/* real GID of the task */
 	kuid_t		suid;		/* saved UID of the task */
 	kgid_t		sgid;		/* saved GID of the task */
@@ -119,19 +128,12 @@ struct cred {
 	kuid_t		fsuid;		/* UID for VFS ops */
 	kgid_t		fsgid;		/* GID for VFS ops */
 	unsigned	securebits;	/* SUID-less security management */
+	bool		non_rcu;	/* Can we skip RCU deletion? */
 	kernel_cap_t	cap_inheritable; /* caps our children can inherit */
 	kernel_cap_t	cap_permitted;	/* caps we're permitted */
 	kernel_cap_t	cap_effective;	/* caps we can actually use */
 	kernel_cap_t	cap_bset;	/* capability bounding set */
 	kernel_cap_t	cap_ambient;	/* Ambient capability set */
-#ifdef CONFIG_KEYS
-	unsigned char	jit_keyring;	/* default keyring to attach requested
-					 * keys to */
-	struct key	*session_keyring; /* keyring inherited over fork */
-	struct key	*process_keyring; /* keyring private to this process */
-	struct key	*thread_keyring; /* keyring private to this thread */
-	struct key	*request_key_auth; /* assumed request_key authority */
-#endif
 #ifdef CONFIG_SECURITY
 	void		*security;	/* LSM security */
 #endif
@@ -139,11 +141,6 @@ struct cred {
 	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
 	struct ucounts *ucounts;
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
-	/* RCU deletion */
-	union {
-		int non_rcu;			/* Can we skip RCU deletion? */
-		struct rcu_head	rcu;		/* RCU deletion hook */
-	};
 } __randomize_layout;
 
 extern void __put_cred(struct cred *);
@@ -217,7 +214,8 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr)
 	struct cred *nonconst_cred = (struct cred *) cred;
 	if (!cred)
 		return cred;
-	nonconst_cred->non_rcu = 0;
+	if (unlikely(nonconst_cred->non_rcu))
+		WRITE_ONCE(nonconst_cred->non_rcu, false);
 	return get_new_cred_many(nonconst_cred, nr);
 }
 
@@ -242,7 +240,8 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred)
 		return NULL;
 	if (!atomic_long_inc_not_zero(&nonconst_cred->usage))
 		return NULL;
-	nonconst_cred->non_rcu = 0;
+	if (unlikely(nonconst_cred->non_rcu))
+		WRITE_ONCE(nonconst_cred->non_rcu, false);
 	return cred;
 }
 
diff --git a/kernel/cred.c b/kernel/cred.c
index 075cfa7c896f..23b73ee2ec63 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -104,7 +104,7 @@ void __put_cred(struct cred *cred)
 	BUG_ON(cred == current->cred);
 	BUG_ON(cred == current->real_cred);
 
-	if (cred->non_rcu)
+	if (unlikely(cred->non_rcu))
 		put_cred_rcu(&cred->rcu);
 	else
 		call_rcu(&cred->rcu, put_cred_rcu);
@@ -218,7 +218,7 @@ struct cred *prepare_creds(void)
 	old = task->cred;
 	memcpy(new, old, sizeof(struct cred));
 
-	new->non_rcu = 0;
+	new->non_rcu = false;
 	atomic_long_set(&new->usage, 1);
 	get_group_info(new->group_info);
 	get_uid(new->user);
@@ -643,7 +643,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	old = get_task_cred(daemon);
 
 	*new = *old;
-	new->non_rcu = 0;
+	new->non_rcu = false;
 	atomic_long_set(&new->usage, 1);
 	get_uid(new->user);
 	get_user_ns(new->user_ns);
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ