lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250128160743.3142544-1-mjguzik@gmail.com>
Date: Tue, 28 Jan 2025 17:07:43 +0100
From: Mateusz Guzik <mjguzik@...il.com>
To: brauner@...nel.org,
	oleg@...hat.com,
	akpm@...ux-foundation.org
Cc: linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	Mateusz Guzik <mjguzik@...il.com>
Subject: [PATCH v2] exit: perform randomness and pid work without tasklist_lock

Both add_device_randomness() and attach_pid()/detach_pid() have their
own synchronisation mechanisms.

The clone side calls them *without* the tasklist_lock held, meaning
parallel calls can contend on their locks.

The exit side calls them *with* the tasklist_lock lock, which means the
hold time is avoidably extended by waiting on either of the 2 locks, in
turn exacerbating contention on tasklist_lock itself.

Postponing the work until after the lock is dropped bumps thread
creation/destruction rate by 15% on a 24 core vm.

Bench (plop into will-it-scale):
$ cat tests/threadspawn1.c

char *testcase_description = "Thread creation and teardown";

static void *worker(void *arg)
{
        return (NULL);
}

void testcase(unsigned long long *iterations, unsigned long nr)
{
        pthread_t thread;
        int error;

        while (1) {
                error = pthread_create(&thread, NULL, worker, NULL);
                assert(error == 0);
                error = pthread_join(thread, NULL);
                assert(error == 0);
                (*iterations)++;
        }
}

Run:
$ ./threadspawn1_processes -t 24

Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
---

v2:
- introduce a struct to collect work
- move out pids as well

there is more which can be pulled out

this may look suspicious:
+	proc_flush_pid(p->thread_pid);

AFAICS this is constant for the duration of the lifetime, so i don't
there is a problem

 include/linux/pid.h |  1 +
 kernel/exit.c       | 53 +++++++++++++++++++++++++++++++++------------
 kernel/pid.c        | 23 +++++++++++++++-----
 3 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 98837a1ff0f3..6e9fcacd02cd 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -101,6 +101,7 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
  * these helpers must be called with the tasklist_lock write-held.
  */
 extern void attach_pid(struct task_struct *task, enum pid_type);
+extern struct pid *detach_pid_return(struct task_struct *task, enum pid_type);
 extern void detach_pid(struct task_struct *task, enum pid_type);
 extern void change_pid(struct task_struct *task, enum pid_type,
 			struct pid *pid);
diff --git a/kernel/exit.c b/kernel/exit.c
index 1dcddfe537ee..4e452d3e3a89 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -122,14 +122,23 @@ static __init int kernel_exit_sysfs_init(void)
 late_initcall(kernel_exit_sysfs_init);
 #endif
 
-static void __unhash_process(struct task_struct *p, bool group_dead)
+/*
+ * For things release_task would like to do *after* tasklist_lock is released.
+ */
+struct release_task_post {
+	unsigned long long randomness;
+	struct pid *pids[PIDTYPE_MAX];
+};
+
+static void __unhash_process(struct release_task_post *post, struct task_struct *p,
+			     bool group_dead)
 {
 	nr_threads--;
-	detach_pid(p, PIDTYPE_PID);
+	post->pids[PIDTYPE_PID] = detach_pid_return(p, PIDTYPE_PID);
 	if (group_dead) {
-		detach_pid(p, PIDTYPE_TGID);
-		detach_pid(p, PIDTYPE_PGID);
-		detach_pid(p, PIDTYPE_SID);
+		post->pids[PIDTYPE_TGID] = detach_pid_return(p, PIDTYPE_TGID);
+		post->pids[PIDTYPE_PGID] = detach_pid_return(p, PIDTYPE_PGID);
+		post->pids[PIDTYPE_SID] = detach_pid_return(p, PIDTYPE_SID);
 
 		list_del_rcu(&p->tasks);
 		list_del_init(&p->sibling);
@@ -141,7 +150,8 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
 /*
  * This function expects the tasklist_lock write-locked.
  */
-static void __exit_signal(struct task_struct *tsk)
+static void __exit_signal(struct release_task_post *post,
+			  struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 	bool group_dead = thread_group_leader(tsk);
@@ -174,8 +184,7 @@ static void __exit_signal(struct task_struct *tsk)
 			sig->curr_target = next_thread(tsk);
 	}
 
-	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-			      sizeof(unsigned long long));
+	post->randomness = tsk->se.sum_exec_runtime;
 
 	/*
 	 * Accumulate here the counters for all threads as they die. We could
@@ -197,7 +206,7 @@ static void __exit_signal(struct task_struct *tsk)
 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
 	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 	sig->nr_threads--;
-	__unhash_process(tsk, group_dead);
+	__unhash_process(post, tsk, group_dead);
 	write_sequnlock(&sig->stats_lock);
 
 	/*
@@ -240,9 +249,13 @@ void __weak release_thread(struct task_struct *dead_task)
 void release_task(struct task_struct *p)
 {
 	struct task_struct *leader;
-	struct pid *thread_pid;
 	int zap_leader;
+	struct release_task_post post;
 repeat:
+	memset(&post, 0, sizeof(post));
+
+	proc_flush_pid(p->thread_pid);
+
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 	rcu_read_lock();
@@ -253,8 +266,7 @@ void release_task(struct task_struct *p)
 
 	write_lock_irq(&tasklist_lock);
 	ptrace_release_task(p);
-	thread_pid = get_pid(p->thread_pid);
-	__exit_signal(p);
+	__exit_signal(&post, p);
 
 	/*
 	 * If we are the last non-leader member of the thread
@@ -276,8 +288,21 @@ void release_task(struct task_struct *p)
 	}
 
 	write_unlock_irq(&tasklist_lock);
-	proc_flush_pid(thread_pid);
-	put_pid(thread_pid);
+
+	/*
+	 * Process clean up deferred to after we drop the tasklist lock.
+	 */
+	add_device_randomness((const void*) &post.randomness,
+			      sizeof(unsigned long long));
+	if (post.pids[PIDTYPE_PID])
+		free_pid(post.pids[PIDTYPE_PID]);
+	if (post.pids[PIDTYPE_TGID])
+		free_pid(post.pids[PIDTYPE_TGID]);
+	if (post.pids[PIDTYPE_PGID])
+		free_pid(post.pids[PIDTYPE_PGID]);
+	if (post.pids[PIDTYPE_SID])
+		free_pid(post.pids[PIDTYPE_SID]);
+
 	release_thread(p);
 	put_task_struct_rcu_user(p);
 
diff --git a/kernel/pid.c b/kernel/pid.c
index 3a10a7b6fcf8..047cdbcef5cf 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -343,7 +343,7 @@ void attach_pid(struct task_struct *task, enum pid_type type)
 	hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
 }
 
-static void __change_pid(struct task_struct *task, enum pid_type type,
+static struct pid *__change_pid(struct task_struct *task, enum pid_type type,
 			struct pid *new)
 {
 	struct pid **pid_ptr = task_pid_ptr(task, type);
@@ -362,20 +362,33 @@ static void __change_pid(struct task_struct *task, enum pid_type type,
 
 	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
 		if (pid_has_task(pid, tmp))
-			return;
+			return NULL;
 
-	free_pid(pid);
+	return pid;
+}
+
+struct pid *detach_pid_return(struct task_struct *task, enum pid_type type)
+{
+	return __change_pid(task, type, NULL);
 }
 
 void detach_pid(struct task_struct *task, enum pid_type type)
 {
-	__change_pid(task, type, NULL);
+	struct pid *pid;
+
+	pid = detach_pid_return(task, type);
+	if (pid)
+		free_pid(pid);
 }
 
 void change_pid(struct task_struct *task, enum pid_type type,
 		struct pid *pid)
 {
-	__change_pid(task, type, pid);
+	struct pid *opid;
+
+	opid = __change_pid(task, type, pid);
+	if (opid)
+		free_pid(opid);
 	attach_pid(task, type);
 }
 
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ