linux-kernel - Re: [PATCH] proc: Fix proc_tid_readdir so it handles the weird cases.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090318135718.GA32106@hawkmoon.kerlabs.com>
Date:	Wed, 18 Mar 2009 14:57:18 +0100
From:	Louis Rilling <Louis.Rilling@...labs.com>
To:	"Eric W. Biederman" <ebiederm@...ssion.com>
Cc:	Alexey Dobriyan <adobriyan@...il.com>,
	linux-kernel@...r.kernel.org, torvalds@...l.org,
	Zhang Le <r0bertz@...too.org>,
	Al Viro <viro@...IV.linux.org.uk>,
	Oleg Nesterov <oleg@...sign.ru>
Subject: Re: [PATCH] proc:  Fix proc_tid_readdir so it handles the weird
	cases.

On 18/03/09  6:39 -0700, Eric W. Biederman wrote:
> 
> Fix the logic in proc_task_readdir so that we provide the
> guarantee that in the sequence:
> opendir
> readdir
> readdir
> ....
> readdir
> closedir
> If a thread exists from the beginning until the end we are
> guaranteed to see it, even if some threads are created or
> worse are destroyed during the readdir.
> 
> This guarantee is provided by reusing the same logic used
> by proc_pid_readdir for the root directory of /proc.  The
> pid is encoded into the offset, and we walk the pid bitmap
> of all pids in the system, and test each of them to see if
> it belongs to the specified thread group.
> 
> If we seek to a bad location or if the task we say last
> exits it is ok because we can numerically find the next
> task we would have returned.
> 
> This is slower that the previous version, but it should
> not be too slow as this techinique is already use for
> the root directory of /proc without problems.

This makes 'ps -T x' an O(n^2) thing, compared to O(n) before the patch, right?
It would be good to, at least, check for thread_group_empty().

Thanks,

Louis

> 
> Signed-off-by: Eric Biederman <ebiederm@...ssion.com>
> ---
>  fs/proc/base.c |  141 +++++++++++++++++++++-----------------------------------
>  1 files changed, 53 insertions(+), 88 deletions(-)
> 
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index beaa0ce..2d8e9c9 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2754,13 +2754,13 @@ retry:
>  	if (pid) {
>  		iter.tgid = pid_nr_ns(pid, ns);
>  		iter.task = pid_task(pid, PIDTYPE_PID);
> -		/* What we to know is if the pid we have find is the
> -		 * pid of a thread_group_leader.  Testing for task
> -		 * being a thread_group_leader is the obvious thing
> -		 * todo but there is a window when it fails, due to
> -		 * the pid transfer logic in de_thread.
> +		/* What we want to know is if the pid we have found is
> +		 * the pid of a thread_group_leader.  Testing for task
> +		 * being a thread_group_leader is the obvious thing to
> +		 * do but there is a window when it fails, due to the
> +		 * pid transfer logic in de_thread.
>  		 *
> -		 * So we perform the straight forward test of seeing
> +		 * So we perform the straight forward test, of seeing
>  		 * if the pid we have found is the pid of a thread
>  		 * group leader, and don't worry if the task we have
>  		 * found doesn't happen to be a thread group leader.
> @@ -2978,82 +2978,54 @@ out_no_task:
>  	return result;
>  }
>  
> +
>  /*
> - * Find the first tid of a thread group to return to user space.
> - *
> - * Usually this is just the thread group leader, but if the users
> - * buffer was too small or there was a seek into the middle of the
> - * directory we have more work todo.
> - *
> - * In the case of a short read we start with find_task_by_pid.
> + * Find the next task in the thread group with tid >= tid.
> + * Return iter.task == NULL if ther is an error or no next task.
>   *
> - * In the case of a seek we start with the leader and walk nr
> - * threads past it.
> + * The reference to the input task_struct is released.
>   */
> -static struct task_struct *first_tid(struct task_struct *leader,
> -		int tid, int nr, struct pid_namespace *ns)
> +struct tid_iter {
> +	struct task_struct *task;
> +	unsigned int tid;
> +};
> +static struct tid_iter next_tid(struct pid_namespace *ns, struct pid *tgid,
> +				struct tid_iter iter)
>  {
> -	struct task_struct *pos;
> +	struct pid *pid;
>  
> +	if (iter.task)
> +		put_task_struct(iter.task);
>  	rcu_read_lock();
> -	/* Attempt to start with the pid of a thread */
> -	if (tid && (nr > 0)) {
> -		pos = find_task_by_pid_ns(tid, ns);
> -		if (pos && (pos->group_leader == leader))
> -			goto found;
> -	}
> -
> -	/* If nr exceeds the number of threads there is nothing todo */
> -	pos = NULL;
> -	if (nr && nr >= get_nr_threads(leader))
> -		goto out;
> -
> -	/* If we haven't found our starting place yet start
> -	 * with the leader and walk nr threads forward.
> -	 */
> -	for (pos = leader; nr > 0; --nr) {
> -		pos = next_thread(pos);
> -		if (pos == leader) {
> -			pos = NULL;
> -			goto out;
> +retry:
> +	iter.task = NULL;
> +	pid = find_ge_pid(iter.tid, ns);
> +	if (pid) {
> +		iter.tid = pid_nr_ns(pid, ns);
> +		iter.task = pid_task(pid, PIDTYPE_PID);
> +		/* As the tgid does not change during de_thread
> +		 * this test if a task is in a thread group
> +		 * should always be accurate.
> +		 */
> +		if (!iter.task || task_tgid(iter.task) != tgid) {
> +			iter.tid += 1;
> +			goto retry;
>  		}
> +		get_task_struct(iter.task);
>  	}
> -found:
> -	get_task_struct(pos);
> -out:
>  	rcu_read_unlock();
> -	return pos;
> +	return iter;
>  }
>  
> -/*
> - * Find the next thread in the thread list.
> - * Return NULL if there is an error or no next thread.
> - *
> - * The reference to the input task_struct is released.
> - */
> -static struct task_struct *next_tid(struct task_struct *start)
> -{
> -	struct task_struct *pos = NULL;
> -	rcu_read_lock();
> -	if (pid_alive(start)) {
> -		pos = next_thread(start);
> -		if (thread_group_leader(pos))
> -			pos = NULL;
> -		else
> -			get_task_struct(pos);
> -	}
> -	rcu_read_unlock();
> -	put_task_struct(start);
> -	return pos;
> -}
> +#define TID_OFFSET 2
>  
>  static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
> -	struct task_struct *task, int tid)
> +	struct tid_iter iter)
>  {
>  	char name[PROC_NUMBUF];
> -	int len = snprintf(name, sizeof(name), "%d", tid);
> +	int len = snprintf(name, sizeof(name), "%d", iter.tid);
>  	return proc_fill_cache(filp, dirent, filldir, name, len,
> -				proc_task_instantiate, task, NULL);
> +				proc_task_instantiate, iter.task, NULL);
>  }
>  
>  /* for the /proc/TGID/task/ directories */
> @@ -3061,24 +3033,22 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
>  {
>  	struct dentry *dentry = filp->f_path.dentry;
>  	struct inode *inode = dentry->d_inode;
> -	struct task_struct *leader = NULL;
> +	struct pid *tgid = NULL;
>  	struct task_struct *task;
>  	int retval = -ENOENT;
>  	ino_t ino;
> -	int tid;
> +	struct tid_iter iter;
>  	struct pid_namespace *ns;
>  
>  	task = get_proc_task(inode);
>  	if (!task)
>  		goto out_no_task;
>  	rcu_read_lock();
> -	if (pid_alive(task)) {
> -		leader = task->group_leader;
> -		get_task_struct(leader);
> -	}
> +	if (pid_alive(task))
> +		tgid = get_pid(task_tgid(task));
>  	rcu_read_unlock();
>  	put_task_struct(task);
> -	if (!leader)
> +	if (!tgid)
>  		goto out_no_task;
>  	retval = 0;
>  
> @@ -3097,26 +3067,21 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
>  		/* fall through */
>  	}
>  
> -	/* f_version caches the tgid value that the last readdir call couldn't
> -	 * return. lseek aka telldir automagically resets f_version to 0.
> -	 */
>  	ns = filp->f_dentry->d_sb->s_fs_info;
> -	tid = (int)filp->f_version;
> -	filp->f_version = 0;
> -	for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
> -	     task;
> -	     task = next_tid(task), filp->f_pos++) {
> -		tid = task_pid_nr_ns(task, ns);
> -		if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
> -			/* returning this tgid failed, save it as the first
> -			 * pid for the next readir call */
> -			filp->f_version = (u64)tid;
> +	iter.task = NULL;
> +	iter.tid = filp->f_pos - TID_OFFSET;
> +	for (iter = next_tid(ns, tgid, iter);
> +	     iter.task;
> +	     iter.tid += 1, iter = next_tid(ns, tgid, iter)) {
> +		filp->f_pos = iter.tid + TID_OFFSET;
> +		if (proc_task_fill_cache(filp, dirent, filldir, iter) < 0) {
>  			put_task_struct(task);
> -			break;
> +			goto out;
>  		}
>  	}
> +	filp->f_pos = PID_MAX_LIMIT + TID_OFFSET;
>  out:
> -	put_task_struct(leader);
> +	put_pid(tgid);
>  out_no_task:
>  	return retval;
>  }
> -- 
> 1.6.1.2.350.g88cc
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Dr Louis Rilling			Kerlabs
Skype: louis.rilling			Batiment Germanium
Phone: (+33|0) 6 80 89 08 23		80 avenue des Buttes de Coesmes
http://www.kerlabs.com/			35700 Rennes

Download attachment "signature.asc" of type "application/pgp-signature" (190 bytes)