lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250128182932.GC24845@redhat.com>
Date: Tue, 28 Jan 2025 19:29:33 +0100
From: Oleg Nesterov <oleg@...hat.com>
To: Mateusz Guzik <mjguzik@...il.com>
Cc: brauner@...nel.org, akpm@...ux-foundation.org, linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	"Eric W. Biederman" <ebiederm@...ssion.com>
Subject: Re: [PATCH v2] exit: perform randomness and pid work without
 tasklist_lock

(Add Eric).


On 01/28, Mateusz Guzik wrote:
>
> Both add_device_randomness() and attach_pid()/detach_pid()

So afaics this patch does 2 different things, and I do think this needs
2 separate patches. Can you split this change please?

As for add_device_randomness(). I must have missed something, but I still can't
understand why we can't simply shift add_device_randomness(p->sum_exec_runtime)
to release_release_task() and avoid release_task_post->randomness.

You said:

	I wanted to keep the load where it was

but why??? Again, I must have missed something, but to me this simply adds the
unnecessary complications. Either way, ->sum_exec_runtime is not stable even if
task-to-release != current, so what is the point?

Oleg.

> have their
> own synchronisation mechanisms.
>
> The clone side calls them *without* the tasklist_lock held, meaning
> parallel calls can contend on their locks.
>
> The exit side calls them *with* the tasklist_lock lock, which means the
> hold time is avoidably extended by waiting on either of the 2 locks, in
> turn exacerbating contention on tasklist_lock itself.
>
> Postponing the work until after the lock is dropped bumps thread
> creation/destruction rate by 15% on a 24 core vm.
>
> Bench (plop into will-it-scale):
> $ cat tests/threadspawn1.c
>
> char *testcase_description = "Thread creation and teardown";
>
> static void *worker(void *arg)
> {
>         return (NULL);
> }
>
> void testcase(unsigned long long *iterations, unsigned long nr)
> {
>         pthread_t thread;
>         int error;
>
>         while (1) {
>                 error = pthread_create(&thread, NULL, worker, NULL);
>                 assert(error == 0);
>                 error = pthread_join(thread, NULL);
>                 assert(error == 0);
>                 (*iterations)++;
>         }
> }
>
> Run:
> $ ./threadspawn1_processes -t 24
>
> Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
> ---
>
> v2:
> - introduce a struct to collect work
> - move out pids as well
>
> there is more which can be pulled out
>
> this may look suspicious:
> +	proc_flush_pid(p->thread_pid);
>
> AFAICS this is constant for the duration of the lifetime, so i don't
> there is a problem
>
>  include/linux/pid.h |  1 +
>  kernel/exit.c       | 53 +++++++++++++++++++++++++++++++++------------
>  kernel/pid.c        | 23 +++++++++++++++-----
>  3 files changed, 58 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 98837a1ff0f3..6e9fcacd02cd 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -101,6 +101,7 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
>   * these helpers must be called with the tasklist_lock write-held.
>   */
>  extern void attach_pid(struct task_struct *task, enum pid_type);
> +extern struct pid *detach_pid_return(struct task_struct *task, enum pid_type);
>  extern void detach_pid(struct task_struct *task, enum pid_type);
>  extern void change_pid(struct task_struct *task, enum pid_type,
>  			struct pid *pid);
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 1dcddfe537ee..4e452d3e3a89 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -122,14 +122,23 @@ static __init int kernel_exit_sysfs_init(void)
>  late_initcall(kernel_exit_sysfs_init);
>  #endif
>
> -static void __unhash_process(struct task_struct *p, bool group_dead)
> +/*
> + * For things release_task would like to do *after* tasklist_lock is released.
> + */
> +struct release_task_post {
> +	unsigned long long randomness;
> +	struct pid *pids[PIDTYPE_MAX];
> +};
> +
> +static void __unhash_process(struct release_task_post *post, struct task_struct *p,
> +			     bool group_dead)
>  {
>  	nr_threads--;
> -	detach_pid(p, PIDTYPE_PID);
> +	post->pids[PIDTYPE_PID] = detach_pid_return(p, PIDTYPE_PID);
>  	if (group_dead) {
> -		detach_pid(p, PIDTYPE_TGID);
> -		detach_pid(p, PIDTYPE_PGID);
> -		detach_pid(p, PIDTYPE_SID);
> +		post->pids[PIDTYPE_TGID] = detach_pid_return(p, PIDTYPE_TGID);
> +		post->pids[PIDTYPE_PGID] = detach_pid_return(p, PIDTYPE_PGID);
> +		post->pids[PIDTYPE_SID] = detach_pid_return(p, PIDTYPE_SID);
>
>  		list_del_rcu(&p->tasks);
>  		list_del_init(&p->sibling);
> @@ -141,7 +150,8 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
>  /*
>   * This function expects the tasklist_lock write-locked.
>   */
> -static void __exit_signal(struct task_struct *tsk)
> +static void __exit_signal(struct release_task_post *post,
> +			  struct task_struct *tsk)
>  {
>  	struct signal_struct *sig = tsk->signal;
>  	bool group_dead = thread_group_leader(tsk);
> @@ -174,8 +184,7 @@ static void __exit_signal(struct task_struct *tsk)
>  			sig->curr_target = next_thread(tsk);
>  	}
>
> -	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
> -			      sizeof(unsigned long long));
> +	post->randomness = tsk->se.sum_exec_runtime;
>
>  	/*
>  	 * Accumulate here the counters for all threads as they die. We could
> @@ -197,7 +206,7 @@ static void __exit_signal(struct task_struct *tsk)
>  	task_io_accounting_add(&sig->ioac, &tsk->ioac);
>  	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
>  	sig->nr_threads--;
> -	__unhash_process(tsk, group_dead);
> +	__unhash_process(post, tsk, group_dead);
>  	write_sequnlock(&sig->stats_lock);
>
>  	/*
> @@ -240,9 +249,13 @@ void __weak release_thread(struct task_struct *dead_task)
>  void release_task(struct task_struct *p)
>  {
>  	struct task_struct *leader;
> -	struct pid *thread_pid;
>  	int zap_leader;
> +	struct release_task_post post;
>  repeat:
> +	memset(&post, 0, sizeof(post));
> +
> +	proc_flush_pid(p->thread_pid);
> +
>  	/* don't need to get the RCU readlock here - the process is dead and
>  	 * can't be modifying its own credentials. But shut RCU-lockdep up */
>  	rcu_read_lock();
> @@ -253,8 +266,7 @@ void release_task(struct task_struct *p)
>
>  	write_lock_irq(&tasklist_lock);
>  	ptrace_release_task(p);
> -	thread_pid = get_pid(p->thread_pid);
> -	__exit_signal(p);
> +	__exit_signal(&post, p);
>
>  	/*
>  	 * If we are the last non-leader member of the thread
> @@ -276,8 +288,21 @@ void release_task(struct task_struct *p)
>  	}
>
>  	write_unlock_irq(&tasklist_lock);
> -	proc_flush_pid(thread_pid);
> -	put_pid(thread_pid);
> +
> +	/*
> +	 * Process clean up deferred to after we drop the tasklist lock.
> +	 */
> +	add_device_randomness((const void*) &post.randomness,
> +			      sizeof(unsigned long long));
> +	if (post.pids[PIDTYPE_PID])
> +		free_pid(post.pids[PIDTYPE_PID]);
> +	if (post.pids[PIDTYPE_TGID])
> +		free_pid(post.pids[PIDTYPE_TGID]);
> +	if (post.pids[PIDTYPE_PGID])
> +		free_pid(post.pids[PIDTYPE_PGID]);
> +	if (post.pids[PIDTYPE_SID])
> +		free_pid(post.pids[PIDTYPE_SID]);
> +
>  	release_thread(p);
>  	put_task_struct_rcu_user(p);
>
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 3a10a7b6fcf8..047cdbcef5cf 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -343,7 +343,7 @@ void attach_pid(struct task_struct *task, enum pid_type type)
>  	hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
>  }
>
> -static void __change_pid(struct task_struct *task, enum pid_type type,
> +static struct pid *__change_pid(struct task_struct *task, enum pid_type type,
>  			struct pid *new)
>  {
>  	struct pid **pid_ptr = task_pid_ptr(task, type);
> @@ -362,20 +362,33 @@ static void __change_pid(struct task_struct *task, enum pid_type type,
>
>  	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
>  		if (pid_has_task(pid, tmp))
> -			return;
> +			return NULL;
>
> -	free_pid(pid);
> +	return pid;
> +}
> +
> +struct pid *detach_pid_return(struct task_struct *task, enum pid_type type)
> +{
> +	return __change_pid(task, type, NULL);
>  }
>
>  void detach_pid(struct task_struct *task, enum pid_type type)
>  {
> -	__change_pid(task, type, NULL);
> +	struct pid *pid;
> +
> +	pid = detach_pid_return(task, type);
> +	if (pid)
> +		free_pid(pid);
>  }
>
>  void change_pid(struct task_struct *task, enum pid_type type,
>  		struct pid *pid)
>  {
> -	__change_pid(task, type, pid);
> +	struct pid *opid;
> +
> +	opid = __change_pid(task, type, pid);
> +	if (opid)
> +		free_pid(opid);
>  	attach_pid(task, type);
>  }
>
> --
> 2.43.0
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ