lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 12 Dec 2022 09:51:06 +0800
From:   Ian Kent <ikent@...hat.com>
To:     Brian Foster <bfoster@...hat.com>, linux-mm@...ck.org,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     onestero@...hat.com, willy@...radead.org, ebiederm@...hat.com
Subject: Re: [PATCH v3 4/5] pid: mark pids associated with group leader tasks

On 3/12/22 01:16, Brian Foster wrote:
> Searching the pid_namespace for group leader tasks is a fairly
> inefficient operation. Listing the root directory of a procfs mount
> performs a linear scan of allocated pids, checking each entry for an
> associated PIDTYPE_TGID task to determine whether to populate a
> directory entry. This can cause a significant increase in readdir()
> syscall latency when run in namespaces that might have one or more
> processes with significant thread counts.
>
> To facilitate improved TGID pid searches, mark the ids of pid
> entries that are likely to have an associated PIDTYPE_TGID task. To
> keep the code simple and avoid having to maintain synchronization
> between mark state and post-fork pid-task association changes, the
> mark is applied to all pids allocated for tasks cloned without
> CLONE_THREAD.
>
> This means that it is possible for a pid to remain marked in the
> xarray after being disassociated from the group leader task. For
> example, a process that does a setsid() followed by fork() and
> exit() (to daemonize) will remain associated with the original pid
> for the session, but link with the child pid as the group leader.
> OTOH, the only place other than fork() where a tgid association
> occurs is in the exec() path, which kills all other tasks in the
> group and associates the current task with the preexisting leader
> pid. Therefore, the semantics of the mark are that false positives
> (marked pids without PIDTYPE_TGID tasks) are possible, but false
> negatives (unmarked pids without PIDTYPE_TGID tasks) should never
> occur.
>
> This is an effective optimization because false negatives are fairly
> uncommon and don't add overhead (i.e. we already have to check
> pid_task() for marked entries), but still filters out thread pids
> that are guaranteed not to have TGID task association.
>
> Mark entries in the pid allocation path when the caller specifies
> that the pid associates with a new thread group. Since false
> negatives are not allowed, warn in the event that a PIDTYPE_TGID
> task is ever attached to an unmarked pid. Finally, create a helper
> to implement the task search based on the mark semantics defined
> above (based on search logic currently implemented by next_tgid() in
> procfs).

Yes, the tricky bit, but the analysis sounds thorough so it

should work for all cases ...


>
> Signed-off-by: Brian Foster <bfoster@...hat.com>


Reviewed-by: Ian Kent <raven@...maw.net>

> ---
>   include/linux/pid.h |  3 ++-
>   kernel/fork.c       |  2 +-
>   kernel/pid.c        | 44 +++++++++++++++++++++++++++++++++++++++++++-
>   3 files changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 343abf22092e..64caf21be256 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -132,9 +132,10 @@ extern struct pid *find_vpid(int nr);
>    */
>   extern struct pid *find_get_pid(int nr);
>   extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
> +struct task_struct *find_get_tgid_task(int *id, struct pid_namespace *);
>   
>   extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
> -			     size_t set_tid_size);
> +			     size_t set_tid_size, bool group_leader);
>   extern void free_pid(struct pid *pid);
>   extern void disable_pid_allocation(struct pid_namespace *ns);
>   
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 08969f5aa38d..1cf2644c642e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -2267,7 +2267,7 @@ static __latent_entropy struct task_struct *copy_process(
>   
>   	if (pid != &init_struct_pid) {
>   		pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
> -				args->set_tid_size);
> +				args->set_tid_size, !(clone_flags & CLONE_THREAD));
>   		if (IS_ERR(pid)) {
>   			retval = PTR_ERR(pid);
>   			goto bad_fork_cleanup_thread;
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 53db06f9882d..d65f74c6186c 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -66,6 +66,9 @@ int pid_max = PID_MAX_DEFAULT;
>   int pid_max_min = RESERVED_PIDS + 1;
>   int pid_max_max = PID_MAX_LIMIT;
>   
> +/* MARK_0 used by XA_FREE_MARK */
> +#define TGID_MARK	XA_MARK_1
> +
>   struct pid_namespace init_pid_ns = {
>   	.ns.count = REFCOUNT_INIT(2),
>   	.xa = XARRAY_INIT(init_pid_ns.xa, PID_XA_FLAGS),
> @@ -137,7 +140,7 @@ void free_pid(struct pid *pid)
>   }
>   
>   struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
> -		      size_t set_tid_size)
> +		      size_t set_tid_size, bool group_leader)
>   {
>   	struct pid *pid;
>   	enum pid_type type;
> @@ -257,6 +260,8 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
>   
>   		/* Make the PID visible to find_pid_ns. */
>   		__xa_store(&tmp->xa, upid->nr, pid, 0);
> +		if (group_leader)
> +			__xa_set_mark(&tmp->xa, upid->nr, TGID_MARK);
>   		tmp->pid_allocated++;
>   		xa_unlock_irq(&tmp->xa);
>   	}
> @@ -314,6 +319,11 @@ static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
>   void attach_pid(struct task_struct *task, enum pid_type type)
>   {
>   	struct pid *pid = *task_pid_ptr(task, type);
> +	struct pid_namespace *pid_ns = ns_of_pid(pid);
> +	pid_t pid_nr = pid_nr_ns(pid, pid_ns);
> +
> +	WARN_ON(type == PIDTYPE_TGID &&
> +		!xa_get_mark(&pid_ns->xa, pid_nr, TGID_MARK));
>   	hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
>   }
>   
> @@ -506,6 +516,38 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
>   }
>   EXPORT_SYMBOL_GPL(find_ge_pid);
>   
> +/*
> + * Used by proc to find the first thread group leader task with an id greater
> + * than or equal to *id.
> + *
> + * Use the xarray mark as a hint to find the next best pid. The mark does not
> + * guarantee a linked group leader task exists, so retry until a suitable entry
> + * is found.
> + */
> +struct task_struct *find_get_tgid_task(int *id, struct pid_namespace *ns)
> +{
> +	struct pid *pid;
> +	struct task_struct *t;
> +	unsigned long nr = *id;
> +
> +	rcu_read_lock();
> +	do {
> +		pid = xa_find(&ns->xa, &nr, ULONG_MAX, TGID_MARK);
> +		if (!pid) {
> +			rcu_read_unlock();
> +			return NULL;
> +		}
> +		t = pid_task(pid, PIDTYPE_TGID);
> +		nr++;
> +	} while (!t);
> +
> +	*id = pid_nr_ns(pid, ns);
> +	get_task_struct(t);
> +	rcu_read_unlock();
> +
> +	return t;
> +}
> +
>   struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
>   {
>   	struct fd f;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ