lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87sji0fdc2.fsf@xmission.com>
Date:	Fri, 24 Feb 2012 07:41:49 -0800
From:	ebiederm@...ssion.com (Eric W. Biederman)
To:	Christoph Lameter <cl@...ux.com>
Cc:	Dave Hansen <dave@...ux.vnet.ibm.com>,
	linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: Re: [RFC][PATCH] fix move/migrate_pages() race on task struct

Christoph Lameter <cl@...ux.com> writes:

> On Thu, 23 Feb 2012, Eric W. Biederman wrote:
>
>> > The bug in migrate_pages() is that we do a rcu_unlock and a rcu_lock. If
>> > we drop those then we should be safe if the use of a task pointer within a
>> > rcu section is safe without taking a refcount.
>>
>> Yes the user of a task_struct pointer found via a userspace pid is valid
>> for the life of an rcu critical section, and the bug is indeed that we
>> drop the rcu_lock and somehow expect the task to remain valid.
>>
>> The guarantee comes from release_task.  In release_task we call
>> __exit_signal which calls __unhash_process, and then we call
>> delayed_put_task to guarantee that the task lives until the end of the
>> rcu interval.
>
> Ah. Ok. Great.
>
>> In migrate_pages we have a lot of task accesses outside of the rcu
>> critical section, and without a reference count on task.
>
> Yes but that is only of interesting for setup and verification of
> permissions. What matters during migration is that the mm_struct does not
> go away and we take a refcount on that one.
>
>> I tell you the truth trying to figure out what that code needs to be
>> correct if task != current makes my head hurt.
>
> Hmm...

Especially because I was looking at move_pages...

>> I think we need to grab a reference on task_struct, to stop the task
>> from going away, and in addition we need to hold task_lock.  To keep
>> task->mm from changing (see exec_mmap).  But we can't do that and sleep
>> so I think the entire function needs to be rewritten, and the need for
>> task deep in the migrate_pages path needs to be removed as even with the
>> reference count held we can race with someone calling exec.
>
> We dont need the task during migration. We only need the mm. The task
> is safe until rcu_read_unlock therefore maybe the following should fix
> migrate pages:
>
>
> Subject: migration: Do not do rcu_read_unlock until the last time we need the task_struct pointer
>
> Migration functions perform the rcu_read_unlock too early. As a result the
> task pointed to may change. Bugs were introduced when adding security checks
> because rcu_unlock/lock sequences were inserted. Plus the security checks
> and do_move_pages used the task_struct pointer after rcu_unlock.
>
> Fix those issues by removing the unlock/lock sequences and moving the
> rcu_read_unlock after the last use of the task struct pointer.
>
> Signed-off-by: Christoph Lameter <cl@...ux.com>

Acked-by: "Eric W. Biederman" <ebiederm@...ssion.com>

Somehow in my skimming through the code earlier I thought the
situation was worse.  On the assumption there are not any
sleeping functions along the path this looks like a good fix.

> ---
>  mm/mempolicy.c |   22 +++++++++++-----------
>  mm/migrate.c   |   28 +++++++++++++++-------------
>  2 files changed, 26 insertions(+), 24 deletions(-)
>
> Index: linux-2.6/mm/mempolicy.c
> ===================================================================
> --- linux-2.6.orig/mm/mempolicy.c	2012-01-13 04:04:36.229807226 -0600
> +++ linux-2.6/mm/mempolicy.c	2012-02-24 03:11:44.913710625 -0600
> @@ -1318,16 +1318,14 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
>  	rcu_read_lock();
>  	task = pid ? find_task_by_vpid(pid) : current;
>  	if (!task) {
> -		rcu_read_unlock();
>  		err = -ESRCH;
> -		goto out;
> +		goto unlock_out;
>  	}
>  	mm = get_task_mm(task);
> -	rcu_read_unlock();
>
>  	err = -EINVAL;
>  	if (!mm)
> -		goto out;
> +		goto unlock_out;
>
>  	/*
>  	 * Check if this process has the right to modify the specified
> @@ -1335,33 +1333,31 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
>  	 * capabilities, superuser privileges or the same
>  	 * userid as the target process.
>  	 */
> -	rcu_read_lock();
>  	tcred = __task_cred(task);
>  	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
>  	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
>  	    !capable(CAP_SYS_NICE)) {
> -		rcu_read_unlock();
>  		err = -EPERM;
> -		goto out;
> +		goto unlock_out;
>  	}
> -	rcu_read_unlock();
>
>  	task_nodes = cpuset_mems_allowed(task);
>  	/* Is the user allowed to access the target nodes? */
>  	if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) {
>  		err = -EPERM;
> -		goto out;
> +		goto unlock_out;
>  	}
>
>  	if (!nodes_subset(*new, node_states[N_HIGH_MEMORY])) {
>  		err = -EINVAL;
> -		goto out;
> +		goto unlock_out;
>  	}
>
>  	err = security_task_movememory(task);
>  	if (err)
> -		goto out;
> +		goto unlock_out;
>
> +	rcu_read_unlock();
>  	err = do_migrate_pages(mm, old, new,
>  		capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
>  out:
> @@ -1370,6 +1366,10 @@ out:
>  	NODEMASK_SCRATCH_FREE(scratch);
>
>  	return err;
> +
> +unlock_out:
> +	rcu_read_unlock();
> +	goto out;
>  }
>
>
> Index: linux-2.6/mm/migrate.c
> ===================================================================
> --- linux-2.6.orig/mm/migrate.c	2012-02-06 04:25:35.857094372 -0600
> +++ linux-2.6/mm/migrate.c	2012-02-24 03:18:55.569698851 -0600
> @@ -1176,20 +1176,17 @@ set_status:
>   * Migrate an array of page address onto an array of nodes and fill
>   * the corresponding array of status.
>   */
> -static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
> +static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
>  			 unsigned long nr_pages,
>  			 const void __user * __user *pages,
>  			 const int __user *nodes,
>  			 int __user *status, int flags)
>  {
>  	struct page_to_node *pm;
> -	nodemask_t task_nodes;
>  	unsigned long chunk_nr_pages;
>  	unsigned long chunk_start;
>  	int err;
>
> -	task_nodes = cpuset_mems_allowed(task);
> -
>  	err = -ENOMEM;
>  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
>  	if (!pm)
> @@ -1351,6 +1348,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
>  	struct task_struct *task;
>  	struct mm_struct *mm;
>  	int err;
> +	nodemask_t task_nodes;
>
>  	/* Check flags */
>  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
> @@ -1367,10 +1365,11 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
>  		return -ESRCH;
>  	}
>  	mm = get_task_mm(task);
> -	rcu_read_unlock();
>
> -	if (!mm)
> +	if (!mm) {
> +		rcu_read_unlock();
>  		return -EINVAL;
> +	}
>
>  	/*
>  	 * Check if this process has the right to modify the specified
> @@ -1378,24 +1377,23 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
>  	 * capabilities, superuser privileges or the same
>  	 * userid as the target process.
>  	 */
> -	rcu_read_lock();
>  	tcred = __task_cred(task);
>  	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
>  	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
>  	    !capable(CAP_SYS_NICE)) {
> -		rcu_read_unlock();
>  		err = -EPERM;
> -		goto out;
> +		goto unlock_out;
>  	}
> -	rcu_read_unlock();
>
>   	err = security_task_movememory(task);
>   	if (err)
> -		goto out;
> +		goto unlock_out;
>
> +	task_nodes = cpuset_mems_allowed(task);
> +	rcu_read_unlock();
>  	if (nodes) {
> -		err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
> -				    flags);
> +		err = do_pages_move(mm, task_nodes, nr_pages, pages, nodes,
> +				status, flags);
>  	} else {
>  		err = do_pages_stat(mm, nr_pages, pages, status);
>  	}
> @@ -1403,6 +1401,10 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
>  out:
>  	mmput(mm);
>  	return err;
> +
> +unlock_out:
> +	rcu_read_unlock();
> +	goto out;
>  }
>
>  /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ