lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090617194813.GA26428@redhat.com>
Date:	Wed, 17 Jun 2009 21:48:13 +0200
From:	Oleg Nesterov <oleg@...hat.com>
To:	Roland McGrath <roland@...hat.com>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Stanislaw Gruszka <sgruszka@...hat.com>,
	Vitaly Mayatskikh <vmayatsk@...hat.com>,
	linux-kernel@...r.kernel.org
Subject: [rfc] ptrace: wait_task_zombie: fix the racy EXIT_ZOMBIE setting

I didn't try to test this patch, just for early review. I think we need
a helper or two for "if (noreap)" branches.

>From now EXIT_DEAD really means the task is dead and should be ignored.
Fixes the race with ->real_parent doing do_wait().


Problem: if we untrace but do not set EXIT_DEAD, ->real_parent can release
the task before getrusage(). In that case k_getrusage() silently fails, and
we fill ->wo_rusage with zeros.

Do you see any solution? Or can we ignore this minor (I think) problem?

On 06/15, Roland McGrath wrote:
>
> ACK, but I think it warrants a comment explaining that task_detached() here
> always means "ptrace'd but not reparented".

Yes. And the name of "int traced" is very bad and confusing.

But I am wondering, shouldn't we always untrace the traced chils, regardless
of ptrace_reparented() ? This looks more clean to me.

Oleg.

--- PTRACE/kernel/exit.c~3_ZOMBIE_DEAD	2009-06-15 23:06:45.000000000 +0200
+++ PTRACE/kernel/exit.c	2009-06-17 21:24:23.000000000 +0200
@@ -1153,7 +1153,7 @@ static int wait_noreap_copyout(struct wa
 static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 {
 	unsigned long state;
-	int retval, status, traced;
+	int retval, status, noreap;
 	pid_t pid = task_pid_vnr(p);
 	uid_t uid = __task_cred(p)->uid;
 	struct siginfo __user *infop;
@@ -1161,11 +1161,12 @@ static int wait_task_zombie(struct wait_
 	if (!likely(wo->wo_flags & WEXITED))
 		return 0;
 
+	get_task_struct(p);
+
 	if (unlikely(wo->wo_flags & WNOWAIT)) {
 		int exit_code = p->exit_code;
 		int why, status;
 
-		get_task_struct(p);
 		read_unlock(&tasklist_lock);
 		if ((exit_code & 0x7f) == 0) {
 			why = CLD_EXITED;
@@ -1177,84 +1178,105 @@ static int wait_task_zombie(struct wait_
 		return wait_noreap_copyout(wo, p, pid, uid, why, status);
 	}
 
-	/*
-	 * Try to move the task's state to DEAD
-	 * only one thread is allowed to do this:
-	 */
-	state = xchg(&p->exit_state, EXIT_DEAD);
-	if (state != EXIT_ZOMBIE) {
-		BUG_ON(state != EXIT_DEAD);
-		return 0;
-	}
-
-	traced = ptrace_reparented(p);
+	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
+		? p->signal->group_exit_code : p->exit_code;
 
-	if (likely(!traced) && likely(!task_detached(p))) {
-		struct signal_struct *psig;
-		struct signal_struct *sig;
+	noreap = ptrace_reparented(p);
+	if (unlikely(noreap)) {
+		read_unlock(&tasklist_lock);
 
+		retval = -EAGAIN;
+		write_lock_irq(&tasklist_lock);
+		if (task_ptrace(p)) {
+			BUG_ON(p->exit_state != EXIT_ZOMBIE);
+			__ptrace_unlink(p);
+			/*
+			 * If this is not a detached task, notify the parent.
+			 * If it's still not detached after that, don't release
+			 * it now.
+			 */
+			if (!task_detached(p))
+				do_notify_parent(p, p->exit_signal);
+			if (task_detached(p)) {
+				p->exit_state = EXIT_DEAD;
+				noreap = 0;
+			}
+			retval = 0;
+		}
+		write_unlock_irq(&tasklist_lock);
+		if (unlikely(retval))
+			goto out;
+	} else {
 		/*
-		 * The resource counters for the group leader are in its
-		 * own task_struct.  Those for dead threads in the group
-		 * are in its signal_struct, as are those for the child
-		 * processes it has previously reaped.  All these
-		 * accumulate in the parent's signal_struct c* fields.
-		 *
-		 * We don't bother to take a lock here to protect these
-		 * p->signal fields, because they are only touched by
-		 * __exit_signal, which runs with tasklist_lock
-		 * write-locked anyway, and so is excluded here.  We do
-		 * need to protect the access to parent->signal fields,
-		 * as other threads in the parent group can be right
-		 * here reaping other children at the same time.
+		 * Try to move the task's state to DEAD
+		 * only one thread is allowed to do this:
 		 */
-		spin_lock_irq(&p->real_parent->sighand->siglock);
-		psig = p->real_parent->signal;
-		sig = p->signal;
-		psig->cutime =
-			cputime_add(psig->cutime,
-			cputime_add(p->utime,
-			cputime_add(sig->utime,
-				    sig->cutime)));
-		psig->cstime =
-			cputime_add(psig->cstime,
-			cputime_add(p->stime,
-			cputime_add(sig->stime,
-				    sig->cstime)));
-		psig->cgtime =
-			cputime_add(psig->cgtime,
-			cputime_add(p->gtime,
-			cputime_add(sig->gtime,
-				    sig->cgtime)));
-		psig->cmin_flt +=
-			p->min_flt + sig->min_flt + sig->cmin_flt;
-		psig->cmaj_flt +=
-			p->maj_flt + sig->maj_flt + sig->cmaj_flt;
-		psig->cnvcsw +=
-			p->nvcsw + sig->nvcsw + sig->cnvcsw;
-		psig->cnivcsw +=
-			p->nivcsw + sig->nivcsw + sig->cnivcsw;
-		psig->cinblock +=
-			task_io_get_inblock(p) +
-			sig->inblock + sig->cinblock;
-		psig->coublock +=
-			task_io_get_oublock(p) +
-			sig->oublock + sig->coublock;
-		task_io_accounting_add(&psig->ioac, &p->ioac);
-		task_io_accounting_add(&psig->ioac, &sig->ioac);
-		spin_unlock_irq(&p->real_parent->sighand->siglock);
-	}
+		state = xchg(&p->exit_state, EXIT_DEAD);
+		if (unlikely(state != EXIT_ZOMBIE)) {
+			BUG_ON(state != EXIT_DEAD);
+			goto out;
+		}
 
-	/*
-	 * Now we are sure this task is interesting, and no other
-	 * thread can reap it because we set its state to EXIT_DEAD.
-	 */
-	read_unlock(&tasklist_lock);
+		if (likely(!task_detached(p))) {
+			struct signal_struct *psig;
+			struct signal_struct *sig;
+
+			/*
+			 * The resource counters for the group leader are in its
+			 * own task_struct.  Those for dead threads in the group
+			 * are in its signal_struct, as are those for the child
+			 * processes it has previously reaped.  All these
+			 * accumulate in the parent's signal_struct c* fields.
+			 *
+			 * We don't bother to take a lock here to protect these
+			 * p->signal fields, because they are only touched by
+			 * __exit_signal, which runs with tasklist_lock
+			 * write-locked anyway, and so is excluded here.  We do
+			 * need to protect the access to parent->signal fields,
+			 * as other threads in the parent group can be right
+			 * here reaping other children at the same time.
+			 */
+			spin_lock_irq(&p->real_parent->sighand->siglock);
+			psig = p->real_parent->signal;
+			sig = p->signal;
+			psig->cutime =
+				cputime_add(psig->cutime,
+				cputime_add(p->utime,
+				cputime_add(sig->utime,
+					    sig->cutime)));
+			psig->cstime =
+				cputime_add(psig->cstime,
+				cputime_add(p->stime,
+				cputime_add(sig->stime,
+					    sig->cstime)));
+			psig->cgtime =
+				cputime_add(psig->cgtime,
+				cputime_add(p->gtime,
+				cputime_add(sig->gtime,
+					    sig->cgtime)));
+			psig->cmin_flt +=
+				p->min_flt + sig->min_flt + sig->cmin_flt;
+			psig->cmaj_flt +=
+				p->maj_flt + sig->maj_flt + sig->cmaj_flt;
+			psig->cnvcsw +=
+				p->nvcsw + sig->nvcsw + sig->cnvcsw;
+			psig->cnivcsw +=
+				p->nivcsw + sig->nivcsw + sig->cnivcsw;
+			psig->cinblock +=
+				task_io_get_inblock(p) +
+				sig->inblock + sig->cinblock;
+			psig->coublock +=
+				task_io_get_oublock(p) +
+				sig->oublock + sig->coublock;
+			task_io_accounting_add(&psig->ioac, &p->ioac);
+			task_io_accounting_add(&psig->ioac, &sig->ioac);
+			spin_unlock_irq(&p->real_parent->sighand->siglock);
+		}
+		read_unlock(&tasklist_lock);
+	}
 
 	retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
-	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
-		? p->signal->group_exit_code : p->exit_code;
 	if (!retval && wo->wo_stat)
 		retval = put_user(status, wo->wo_stat);
 
@@ -1284,27 +1306,10 @@ static int wait_task_zombie(struct wait_
 	if (!retval)
 		retval = pid;
 
-	if (traced) {
-		write_lock_irq(&tasklist_lock);
-		/* We dropped tasklist, ptracer could die and untrace */
-		ptrace_unlink(p);
-		/*
-		 * If this is not a detached task, notify the parent.
-		 * If it's still not detached after that, don't release
-		 * it now.
-		 */
-		if (!task_detached(p)) {
-			do_notify_parent(p, p->exit_signal);
-			if (!task_detached(p)) {
-				p->exit_state = EXIT_ZOMBIE;
-				p = NULL;
-			}
-		}
-		write_unlock_irq(&tasklist_lock);
-	}
-	if (p != NULL)
+	if (likely(!noreap))
 		release_task(p);
-
+out:
+	put_task_struct(p);
 	return retval;
 }
 
@@ -1587,8 +1592,11 @@ repeat:
 			goto end;
 
 		retval = ptrace_do_wait(wo, tsk);
-		if (retval)
+		if (retval) {
+			if (unlikely(retval == -EAGAIN))
+				goto repeat;
 			goto end;
+		}
 
 		if (wo->wo_flags & __WNOTHREAD)
 			break;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ