lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:	Mon, 25 May 2015 20:46:02 +0300
From:	Kirill Tkhai <ktkhai@...n.com>
To:	<linux-kernel@...r.kernel.org>
CC:	Oleg Nesterov <oleg@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Ingo Molnar <mingo@...hat.com>,
	"Peter Zijlstra" <peterz@...radead.org>,
	Michal Hocko <mhocko@...e.cz>,
	"Rik van Riel" <riel@...hat.com>,
	Ionut Alexa <ionut.m.alexa@...il.com>,
	Peter Hurley <peter@...leysoftware.com>,
	Kirill Tkhai <tkhai@...dex.ru>
Subject: [PATCH RFC 11/13] exit: Syncronize on kin_lock while
 do_notify_parent()

Note: do_wait() (working on tsk) may catch its child, which is being traced
by a thread from tsk's thread group. For proper synchronization, we must
hold both parent and real_parent locks for calling do_notify_parent().

Also delete tasklist_lock dependence in ptrace_attach() etc, because everything
is already synchronized on kin_lock (Due to previous patches).

Signed-off-by: Kirill Tkhai <ktkhai@...n.com>
---
 kernel/exit.c   |   42 ++++++++++++++++++++++++++++--------------
 kernel/ptrace.c |   20 ++++++--------------
 kernel/signal.c |   11 ++---------
 3 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index bb9d165..aeded00 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -183,6 +183,7 @@ void release_task(struct task_struct *p)
 	write_parent_and_real_parent_lock(p);
 	ptrace_release_task(p);
 	__exit_signal(p);
+	write_unlock(&tasklist_lock);
 
 	/*
 	 * If we are the last non-leader member of the thread
@@ -203,8 +204,7 @@ void release_task(struct task_struct *p)
 			leader->exit_state = EXIT_DEAD;
 	}
 
-	write_parent_and_real_parent_unlock(p);
-	write_unlock_irq(&tasklist_lock);
+	write_parent_and_real_parent_unlock_irq(p);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 
@@ -1016,7 +1016,7 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
 
 /*
  * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
- * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+ * read_lock(wo->held_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
@@ -1036,6 +1036,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 
 		get_task_struct(p);
 		read_unlock(wo->held_lock);
+		rcu_read_unlock();
 		sched_annotate_sleep();
 
 		if ((exit_code & 0x7f) == 0) {
@@ -1058,6 +1059,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 	 * We own this thread, nobody else can reap it.
 	 */
 	read_unlock(wo->held_lock);
+	rcu_read_unlock();
 	sched_annotate_sleep();
 
 	/*
@@ -1152,16 +1154,21 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		retval = pid;
 
 	if (state == EXIT_TRACE) {
-		write_lock_irq(&tasklist_lock);
+		struct task_struct *old_parent;
+		write_parent_and_real_parent_lock_irq(p);
+		old_parent = p->parent;
 		/* We dropped tasklist, ptracer could die and untrace */
 		ptrace_unlink(p);
 
+		if (p->parent != old_parent)
+			write_unlock(&old_parent->kin_lock);
+
 		/* If parent wants a zombie, don't release it now */
 		state = EXIT_ZOMBIE;
 		if (do_notify_parent(p, p->exit_signal))
 			state = EXIT_DEAD;
 		p->exit_state = state;
-		write_unlock_irq(&tasklist_lock);
+		write_parent_unlock_irq(p);
 	}
 	if (state == EXIT_DEAD)
 		release_task(p);
@@ -1191,13 +1198,13 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
  * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
  *
  * CONTEXT:
- * read_lock(&tasklist_lock), which is released if return value is
+ * read_lock(wo->held_lock), which is released if return value is
  * non-zero.  Also, grabs and releases @p->sighand->siglock.
  *
  * RETURNS:
  * 0 if wait condition didn't exist and search for other wait conditions
  * should continue.  Non-zero return, -errno on failure and @p's pid on
- * success, implies that tasklist_lock is released and wait condition
+ * success, implies that wo->held_lock is released and wait condition
  * search should terminate.
  */
 static int wait_task_stopped(struct wait_opts *wo,
@@ -1241,13 +1248,14 @@ static int wait_task_stopped(struct wait_opts *wo,
 	 * Now we are pretty sure this task is interesting.
 	 * Make sure it doesn't get reaped out from under us while we
 	 * give up the lock and then examine it below.  We don't want to
-	 * keep holding onto the tasklist_lock while we call getrusage and
+	 * keep holding onto wo->held_lock while we call getrusage and
 	 * possibly take page faults for user memory.
 	 */
 	get_task_struct(p);
 	pid = task_pid_vnr(p);
 	why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
 	read_unlock(wo->held_lock);
+	rcu_read_unlock();
 	sched_annotate_sleep();
 
 	if (unlikely(wo->wo_flags & WNOWAIT))
@@ -1281,7 +1289,7 @@ static int wait_task_stopped(struct wait_opts *wo,
 
 /*
  * Handle do_wait work for one task in a live, non-stopped state.
- * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+ * read_lock(wo->held_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
@@ -1311,6 +1319,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
 	pid = task_pid_vnr(p);
 	get_task_struct(p);
 	read_unlock(wo->held_lock);
+	rcu_read_unlock();
 	sched_annotate_sleep();
 
 	if (!wo->wo_info) {
@@ -1334,7 +1343,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
  * Consider @p for a wait by @parent.
  *
  * -ECHILD should be in ->notask_error before the first call.
- * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns nonzero for a final return, when we have unlocked wo->held_lock.
  * Returns zero if the search for a child should continue;
  * then ->notask_error is 0 if @p is an eligible child,
  * or another error from security_task_wait(), or still -ECHILD.
@@ -1392,6 +1401,9 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
 		 * If a ptracer wants to distinguish these two events for its
 		 * own children it should create a separate process which takes
 		 * the role of real parent.
+		 *
+		 * Since we use call do_notify_parent() under both parent's and
+		 * real_parent's kin_locks, we are protected from it.
 		 */
 		if (!ptrace_reparented(p))
 			ptrace = 1;
@@ -1460,7 +1472,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
  * Do the work of do_wait() for one thread in the group, @tsk.
  *
  * -ECHILD should be in ->notask_error before the first call.
- * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns nonzero for a final return, when we have unlocked wo->held_lock.
  * Returns zero if the search for a child should continue; then
  * ->notask_error is 0 if there were any eligible children,
  * or another error from security_task_wait(), or still -ECHILD.
@@ -1538,9 +1550,10 @@ static long do_wait(struct wait_opts *wo)
 		goto notask;
 
 	set_current_state(TASK_INTERRUPTIBLE);
-	read_lock(&tasklist_lock);
-	wo->held_lock = &tasklist_lock;
+	rcu_read_lock();
 	for_each_thread(current, tsk) {
+		read_lock(&tsk->kin_lock);
+		wo->held_lock = &tsk->kin_lock;
 		retval = do_wait_thread(wo, tsk);
 		if (retval)
 			goto end;
@@ -1548,11 +1561,12 @@ static long do_wait(struct wait_opts *wo)
 		retval = ptrace_do_wait(wo, tsk);
 		if (retval)
 			goto end;
+		read_unlock(&tsk->kin_lock);
 
 		if (wo->wo_flags & __WNOTHREAD)
 			break;
 	}
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 
 notask:
 	retval = wo->notask_error;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 817288d..6785f66 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -180,7 +180,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 	 * we are sure that this is our traced child and that can only
 	 * be changed by us so it's not changing right after this.
 	 */
-	read_lock(&tasklist_lock);
 	read_parent_lock(child);
 	if (child->ptrace && child->parent == current) {
 		WARN_ON(child->state == __TASK_TRACED);
@@ -192,7 +191,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 			ret = 0;
 	}
 	read_parent_unlock(child);
-	read_unlock(&tasklist_lock);
 
 	if (!ret && !ignore_state) {
 		if (!wait_task_inactive(child, __TASK_TRACED)) {
@@ -314,8 +312,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 	if (retval)
 		goto unlock_creds;
 
-	write_lock_irq(&tasklist_lock);
-	write_parent_and_given_lock(task, current);
+	write_parent_and_given_lock_irq(task, current);
 	old_parent = task->parent;
 	retval = -EPERM;
 	if (unlikely(task->exit_state) || task->ptrace) {
@@ -365,8 +362,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 
 	retval = 0;
 unlock_current:
-	write_unlock(&current->kin_lock);
-	write_unlock_irq(&tasklist_lock);
+	write_unlock_irq(&current->kin_lock);
 unlock_creds:
 	mutex_unlock(&task->signal->cred_guard_mutex);
 out:
@@ -389,8 +385,7 @@ static int ptrace_traceme(void)
 {
 	int ret = -EPERM;
 
-	write_lock_irq(&tasklist_lock);
-	write_parent_lock(current);
+	write_parent_lock_irq(current);
 	/* Are we already being traced? */
 	if (!current->ptrace) {
 		ret = security_ptrace_traceme(current->parent);
@@ -405,8 +400,7 @@ static int ptrace_traceme(void)
 			__ptrace_link(current, current->real_parent);
 		}
 	}
-	write_parent_unlock(current);
-	write_unlock_irq(&tasklist_lock);
+	write_parent_unlock_irq(current);
 
 	return ret;
 }
@@ -474,8 +468,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
 	ptrace_disable(child);
 	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 
-	write_lock_irq(&tasklist_lock);
-	write_parent_and_real_parent_lock(child);
+	write_parent_and_real_parent_lock_irq(child);
 	old_parent = child->parent;
 	/*
 	 * We rely on ptrace_freeze_traced(). It can't be killed and
@@ -490,8 +483,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
 	__ptrace_detach(current, child);
 	if (old_parent != child->real_parent)
 		write_unlock(&old_parent->kin_lock);
-	write_real_parent_unlock(child);
-	write_unlock_irq(&tasklist_lock);
+	write_real_parent_unlock_irq(child);
 
 	proc_ptrace_connector(child, PTRACE_DETACH);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 8288019..2e7b622 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1732,6 +1732,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	struct sighand_struct *sighand;
 	cputime_t utime, stime;
 
+	BUG_ON(!same_thread_group(tsk, current));
+
 	if (for_ptracer) {
 		parent = tsk->parent;
 	} else {
@@ -1881,7 +1883,6 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	task_clear_jobctl_trapping(current);
 
 	spin_unlock_irq(&current->sighand->siglock);
-	read_lock(&tasklist_lock);
 	read_parent_and_real_parent_lock(current);
 	if (may_ptrace_stop()) {
 		/*
@@ -1906,7 +1907,6 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 		 */
 		preempt_disable();
 		read_parent_and_real_parent_unlock(current);
-		read_unlock(&tasklist_lock);
 		preempt_enable_no_resched();
 		freezable_schedule();
 	} else {
@@ -1928,7 +1928,6 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 		if (clear_code)
 			current->exit_code = 0;
 		read_parent_and_real_parent_unlock(current);
-		read_unlock(&tasklist_lock);
 	}
 
 	/*
@@ -2081,11 +2080,9 @@ static bool do_signal_stop(int signr)
 		 * TASK_TRACED.
 		 */
 		if (notify) {
-			read_lock(&tasklist_lock);
 			read_parent_and_real_parent_lock(current);
 			do_notify_parent_cldstop(current, false, notify);
 			read_parent_and_real_parent_unlock(current);
-			read_unlock(&tasklist_lock);
 		}
 
 		/* Now we don't run again until woken by SIGCONT or SIGKILL */
@@ -2229,7 +2226,6 @@ int get_signal(struct ksignal *ksig)
 		 * the ptracer of the group leader too unless it's gonna be
 		 * a duplicate.
 		 */
-		read_lock(&tasklist_lock);
 		read_parent_and_real_parent_lock(current->group_leader);
 		do_notify_parent_cldstop(current, false, why);
 
@@ -2237,7 +2233,6 @@ int get_signal(struct ksignal *ksig)
 			do_notify_parent_cldstop(current->group_leader,
 						true, why);
 		read_parent_and_real_parent_unlock(current->group_leader);
-		read_unlock(&tasklist_lock);
 
 		goto relock;
 	}
@@ -2482,11 +2477,9 @@ void exit_signals(struct task_struct *tsk)
 	 * should always go to the real parent of the group leader.
 	 */
 	if (unlikely(group_stop)) {
-		read_lock(&tasklist_lock);
 		read_parent_and_real_parent_lock(tsk);
 		do_notify_parent_cldstop(tsk, false, group_stop);
 		read_parent_and_real_parent_unlock(tsk);
-		read_unlock(&tasklist_lock);
 	}
 }
 



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ