lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260203194433.1738162-1-longman@redhat.com>
Date: Tue,  3 Feb 2026 14:44:33 -0500
From: Waiman Long <longman@...hat.com>
To: Paul Moore <paul@...l-moore.com>,
	Eric Paris <eparis@...hat.com>,
	Christian Brauner <brauner@...nel.org>,
	Al Viro <viro@...iv.linux.org.uk>
Cc: linux-kernel@...r.kernel.org,
	audit@...r.kernel.org,
	Richard Guy Briggs <rgb@...hat.com>,
	Ricardo Robaina <rrobaina@...hat.com>,
	Waiman Long <longman@...hat.com>
Subject: [PATCH v2] audit: Avoid excessive dput/dget in audit_context setup and reset paths

It is found that on large SMP systems with a large number of CPUs and
auditing enabled, workloads that generate a massive amount of syscalls
(like open/close) in parallel on the same working directory can cause
significant spinlock contention of the lockref.lock of the working
directory's dentry.

One possible way to reduce such spinlock contention scenario is to
keep the pwd references in audit_free_names() and reuse it in the next
audit_alloc_name() call if there is no change in working directory.

A new pwd_reset field is added to audit_context to indicate, if set,
that the pwd has been reset but still hold mount and dentry references.
A new fs_pwd_equal() helper is added to fs_struct.h to check if the
fs->pwd has been changed. When audit_alloc_name() is called with the
context->pwd still valid and the fs->pwd hasn't been changed, it can
return without calling get_fs_pwd().

By adding test code to count the number of effective audit_free_names()
and audit_alloc_name() calls and the number of relevant path_put() and
path_get() calls after rebooting a patched kernel with auditing enabled
on a 2-socket 96-CPU system, there were about 30k path_get()/path_put()
out of a total of about 1 million audit_free_names()/audit_alloc_name()
calls. It is about 3% of those before the patch for this particular case.

After resetting the counters and running a parallel kernel build,
the new figures were about 202k path_get()/path_put() out of about 56M
audit_free_names()/audit_alloc_name(). That is about 0.4%.

As auditing is increasingly used in production systems due to various
legal and commercial compliance requirements, it is important that we
should try to minimize performance overhead when auditing is enabled.

Signed-off-by: Waiman Long <longman@...hat.com>
---

 [v2] Merge 2 patches together & defer the path_put() call in
      audit_alloc_name() to workqueue to prevent might_sleep() in atomic
      context problem.

 include/linux/fs_struct.h | 11 ++++++++
 kernel/audit.h            |  7 +++++
 kernel/auditsc.c          | 56 +++++++++++++++++++++++++++++++++++----
 3 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 0070764b790a..3a3dda8adc11 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -40,6 +40,17 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
 	read_sequnlock_excl(&fs->seq);
 }
 
+/* Return true if fs->pwd matches the given pwd */
+static inline bool fs_pwd_equal(struct fs_struct *fs, struct path *pwd)
+{
+	bool match;
+
+	read_seqlock_excl(&fs->seq);
+	match = (fs->pwd.dentry == pwd->dentry) && (fs->pwd.mnt == pwd->mnt);
+	read_sequnlock_excl(&fs->seq);
+	return match;
+}
+
 extern bool current_chrooted(void);
 
 static inline int current_umask(void)
diff --git a/kernel/audit.h b/kernel/audit.h
index 7c401729e21b..03f3539b10e7 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -133,6 +133,13 @@ struct audit_context {
 	int		    name_count; /* total records in names_list */
 	struct list_head    names_list;	/* struct audit_names->list anchor */
 	char		    *filterkey;	/* key for rule that triggered record */
+	/*
+	 * pwd_reset is set if audit_free_names() has been called from
+	 * audit_reset_context() to reset pwd, but pwd is still holding dentry
+	 * and mount references to be used in later audit action without
+	 * the need to reacqure the references again.
+	 */
+	int		    pwd_reset;
 	struct path	    pwd;
 	struct audit_aux_data *aux;
 	struct audit_aux_data *aux_pids;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index dd0563a8e0be..e2e8bdb06185 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -123,6 +123,11 @@ struct audit_nfcfgop_tab {
 	const char		*s;
 };
 
+struct audit_pwd_put {
+	struct work_struct	work;
+	struct path		pwd;
+};
+
 static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
 	{ AUDIT_XT_OP_REGISTER,			"xt_register"		   },
 	{ AUDIT_XT_OP_REPLACE,			"xt_replace"		   },
@@ -931,6 +936,9 @@ static inline void audit_free_names(struct audit_context *context)
 {
 	struct audit_names *n, *next;
 
+	if (list_empty(&context->names_list))
+		return;	/* audit_alloc_name() has not been called */
+
 	list_for_each_entry_safe(n, next, &context->names_list, list) {
 		list_del(&n->list);
 		if (n->name)
@@ -939,9 +947,7 @@ static inline void audit_free_names(struct audit_context *context)
 			kfree(n);
 	}
 	context->name_count = 0;
-	path_put(&context->pwd);
-	context->pwd.dentry = NULL;
-	context->pwd.mnt = NULL;
+	context->pwd_reset = true;
 }
 
 static inline void audit_free_aux(struct audit_context *context)
@@ -1088,6 +1094,8 @@ static inline void audit_free_context(struct audit_context *context)
 	audit_reset_context(context);
 	audit_proctitle_free(context);
 	free_tree_refs(context);
+	if (context->pwd_reset)
+		path_put(&context->pwd);
 	kfree(context->filterkey);
 	kfree(context);
 }
@@ -1519,7 +1527,8 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
 			/* name was specified as a relative path and the
 			 * directory component is the cwd
 			 */
-			if (context->pwd.dentry && context->pwd.mnt)
+			if (context->pwd.dentry && context->pwd.mnt &&
+			    !context->pwd_reset)
 				audit_log_d_path(ab, " name=", &context->pwd);
 			else
 				audit_log_format(ab, " name=(null)");
@@ -1767,7 +1776,7 @@ static void audit_log_exit(void)
 				  context->target_comm))
 		call_panic = 1;
 
-	if (context->pwd.dentry && context->pwd.mnt) {
+	if (context->pwd.dentry && context->pwd.mnt && !context->pwd_reset) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
 		if (ab) {
 			audit_log_d_path(ab, "cwd=", &context->pwd);
@@ -2144,6 +2153,19 @@ static void handle_path(const struct dentry *dentry)
 	rcu_read_unlock();
 }
 
+static void audit_pwd_put_workfn(struct work_struct *work)
+{
+	struct audit_pwd_put *pp = container_of(work, struct audit_pwd_put, work);
+
+	path_put(&pp->pwd);
+	/*
+	 * The audit_pwd_put structure can be safely freed here without UAF
+	 * as all the workqueue related data have been copied out and processed
+	 * before this function is called.
+	 */
+	kfree(pp);
+}
+
 static struct audit_names *audit_alloc_name(struct audit_context *context,
 						unsigned char type)
 {
@@ -2164,6 +2186,30 @@ static struct audit_names *audit_alloc_name(struct audit_context *context,
 	list_add_tail(&aname->list, &context->names_list);
 
 	context->name_count++;
+	if (context->pwd_reset) {
+		struct audit_pwd_put *pp;
+
+		context->pwd_reset = false;
+		if (likely(fs_pwd_equal(current->fs, &context->pwd)))
+			return aname;
+
+		/*
+		 * Defer the path_put() call of the old pwd to workqueue as
+		 * we may be in an atomic context that cannot call path_put()
+		 * directly because of might_sleep().
+		 */
+		pp = kmalloc(sizeof(*pp), GFP_NOFS);
+		if (!pp) {
+			if (aname->should_free)
+				kfree(aname);
+			return NULL;
+		}
+		INIT_WORK(&pp->work, audit_pwd_put_workfn);
+		pp->pwd = context->pwd;
+		schedule_work(&pp->work);
+		context->pwd.dentry = NULL;
+		context->pwd.mnt = NULL;
+	}
 	if (!context->pwd.dentry)
 		get_fs_pwd(current->fs, &context->pwd);
 	return aname;
-- 
2.52.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ