[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220518225355.784371-16-ebiederm@xmission.com>
Date: Wed, 18 May 2022 17:53:55 -0500
From: "Eric W. Biederman" <ebiederm@...ssion.com>
To: linux-kernel@...r.kernel.org
Cc: rjw@...ysocki.net, Oleg Nesterov <oleg@...hat.com>,
mingo@...nel.org, vincent.guittot@...aro.org,
dietmar.eggemann@....com, rostedt@...dmis.org, mgorman@...e.de,
bigeasy@...utronix.de, Will Deacon <will@...nel.org>,
tj@...nel.org, linux-pm@...r.kernel.org,
Peter Zijlstra <peterz@...radead.org>,
Richard Weinberger <richard@....at>,
Anton Ivanov <anton.ivanov@...bridgegreys.com>,
Johannes Berg <johannes@...solutions.net>,
linux-um@...ts.infradead.org, Chris Zankel <chris@...kel.net>,
Max Filippov <jcmvbkbc@...il.com>,
linux-xtensa@...ux-xtensa.org, Kees Cook <keescook@...omium.org>,
Jann Horn <jannh@...gle.com>, linux-ia64@...r.kernel.org,
Robert OCallahan <roc@...nos.co>, Kyle Huey <khuey@...nos.co>,
Richard Henderson <rth@...ddle.net>,
Ivan Kokshaysky <ink@...assic.park.msu.ru>,
Matt Turner <mattst88@...il.com>,
Jason Wessel <jason.wessel@...driver.com>,
Daniel Thompson <daniel.thompson@...aro.org>,
Douglas Anderson <dianders@...omium.org>,
Douglas Miller <dougmill@...ux.vnet.ibm.com>,
Michael Ellerman <mpe@...erman.id.au>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Paul Mackerras <paulus@...ba.org>,
"Eric W. Biederman" <ebiederm@...ssion.com>
Subject: [PATCH 16/16] signal: Always call do_notify_parent_cldstop with siglock held
Now that siglock keeps tsk->parent and tsk->real_parent constant
require that do_notify_parent_cldstop is called with tsk->siglock held
instead of the tasklist_lock.
As all of the callers of do_notify_parent_cldstop had to drop the
siglock and take tasklist_lock this simplifies all of it's callers.
This removes one reason for taking tasklist_lock.
This makes ptrace_stop so that it should reliably work correctly and
reliably with PREEMPT_RT enabled and CONFIG_CGROUPS disabled. The
remaining challenge is that cgroup_enter_frozen takes spin_lock after
__state has been set to TASK_TRACED. Which on PREEMPT_RT means the
code can sleep and change __state. Not only that but it means that
wait_task_inactive could potentially detect the code scheduling away
at that point and fail, causing ptrace_check_attach to fail.
Signed-off-by: "Eric W. Biederman" <ebiederm@...ssion.com>
---
kernel/signal.c | 262 ++++++++++++++++++++++++++++++++++--------------
1 file changed, 189 insertions(+), 73 deletions(-)
diff --git a/kernel/signal.c b/kernel/signal.c
index 2cc45e8448e2..d4956be51939 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1994,6 +1994,129 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
return ret;
}
+/**
+ * lock_parents_siglocks - Take current, real_parent, and parent's siglock
+ * @lock_tracer: The tracers siglock is needed.
+ *
+ * There is no natural ordering to these locks so they must be sorted
+ * before being taken.
+ *
+ * There are two complicating factors here:
+ * - The locks live in sighand and sighand can be arbitrarily shared
+ * - parent and real_parent can change when current's siglock is unlocked.
+ *
+ * To deal with this first the all of the sighand pointers are
+ * gathered under current's siglock, and the sighand pointers are
+ * sorted. As siglock lives inside of sighand this also sorts the
+ * siglock's by address.
+ *
+ * Then the siglocks are taken in order dropping current's siglock if
+ * necessary.
+ *
+ * Finally if parent and real_parent have not changed return.
+ * If they either parent has changed drop their locks and try again.
+ *
+ * Changing sighand is an infrequent and somewhat expensive operation
+ * (unshare or exec) and so even in the worst case this loop
+ * should not loop too many times before all of the proper locks are
+ * taken in order.
+ *
+ * CONTEXT:
+ * Must be called with @current->sighand->siglock held
+ *
+ * RETURNS:
+ * current's, real_parent's, and parent's siglock held.
+ */
+static void lock_parents_siglocks(bool lock_tracer)
+ __releases(¤t->sighand->siglock)
+ __acquires(¤t->sighand->siglock)
+ __acquires(¤t->real_parent->sighand->siglock)
+ __acquires(¤t->parent->sighand->siglock)
+{
+ struct task_struct *me = current;
+ struct sighand_struct *m_sighand = me->sighand;
+
+ lockdep_assert_held(&m_sighand->siglock);
+
+ rcu_read_lock();
+ for (;;) {
+ struct task_struct *parent, *tracer;
+ struct sighand_struct *p_sighand, *t_sighand, *s1, *s2, *s3;
+
+ parent = me->real_parent;
+ tracer = ptrace_parent(me);
+ if (!tracer || !lock_tracer)
+ tracer = parent;
+
+ p_sighand = rcu_dereference(parent->sighand);
+ t_sighand = rcu_dereference(tracer->sighand);
+
+ /* Sort the sighands so that s1 >= s2 >= s3 */
+ s1 = m_sighand;
+ s2 = p_sighand;
+ s3 = t_sighand;
+ if (s1 > s2)
+ swap(s1, s2);
+ if (s1 > s3)
+ swap(s1, s3);
+ if (s2 > s3)
+ swap(s2, s3);
+
+ /* Take the locks in order */
+ if (s1 != m_sighand) {
+ spin_unlock(&m_sighand->siglock);
+ spin_lock(&s1->siglock);
+ }
+ if (s1 != s2)
+ spin_lock_nested(&s2->siglock, 1);
+ if (s2 != s3)
+ spin_lock_nested(&s3->siglock, 2);
+
+ /* Verify the proper locks are held */
+ if (likely((s1 == m_sighand) ||
+ ((me->real_parent == parent) &&
+ (me->parent == tracer) &&
+ (parent->sighand == p_sighand) &&
+ (tracer->sighand == t_sighand)))) {
+ break;
+ }
+
+ /* Drop all but current's siglock */
+ if (p_sighand != m_sighand)
+ spin_unlock(&p_sighand->siglock);
+ if (t_sighand != p_sighand)
+ spin_unlock(&t_sighand->siglock);
+
+ /*
+ * Since [pt]_sighand will likely change if we go
+ * around, and m_sighand is the only one held, make sure
+ * it is subclass-0, since the above 's1 != m_sighand'
+ * clause very much relies on that.
+ */
+ lock_set_subclass(&m_sighand->siglock.dep_map, 0, _RET_IP_);
+ }
+ rcu_read_unlock();
+}
+
+static void unlock_parents_siglocks(bool unlock_tracer)
+ __releases(¤t->real_parent->sighand->siglock)
+ __releases(¤t->parent->sighand->siglock)
+{
+ struct task_struct *me = current;
+ struct task_struct *parent = me->real_parent;
+ struct task_struct *tracer = ptrace_parent(me);
+ struct sighand_struct *m_sighand = me->sighand;
+ struct sighand_struct *p_sighand = parent->sighand;
+
+ if (p_sighand != m_sighand)
+ spin_unlock(&p_sighand->siglock);
+ if (tracer && unlock_tracer) {
+ struct sighand_struct *t_sighand = tracer->sighand;
+ if (t_sighand != p_sighand)
+ spin_unlock(&t_sighand->siglock);
+ }
+}
+
static void do_notify_pidfd(struct task_struct *task)
{
struct pid *pid;
@@ -2125,11 +2248,12 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
bool for_ptracer, int why)
{
struct kernel_siginfo info;
- unsigned long flags;
struct task_struct *parent;
struct sighand_struct *sighand;
u64 utime, stime;
+ lockdep_assert_held(&tsk->sighand->siglock);
+
if (for_ptracer) {
parent = tsk->parent;
} else {
@@ -2137,6 +2261,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
parent = tsk->real_parent;
}
+ lockdep_assert_held(&parent->sighand->siglock);
+
clear_siginfo(&info);
info.si_signo = SIGCHLD;
info.si_errno = 0;
@@ -2168,7 +2294,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
}
sighand = parent->sighand;
- spin_lock_irqsave(&sighand->siglock, flags);
if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
!(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID);
@@ -2176,7 +2301,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
* Even if SIGCHLD is not generated, we must wake up wait4 calls.
*/
__wake_up_parent(tsk, parent);
- spin_unlock_irqrestore(&sighand->siglock, flags);
}
/*
@@ -2208,14 +2332,18 @@ static void ptrace_stop(int exit_code, int why, unsigned long message,
spin_lock_irq(¤t->sighand->siglock);
}
+ lock_parents_siglocks(true);
/*
* After this point ptrace_signal_wake_up or signal_wake_up
* will clear TASK_TRACED if ptrace_unlink happens or a fatal
* signal comes in. Handle previous ptrace_unlinks and fatal
* signals here to prevent ptrace_stop sleeping in schedule.
*/
- if (!current->ptrace || __fatal_signal_pending(current))
+
+ if (!current->ptrace || __fatal_signal_pending(current)) {
+ unlock_parents_siglocks(true);
return;
+ }
set_special_state(TASK_TRACED);
current->jobctl |= JOBCTL_TRACED;
@@ -2254,16 +2382,6 @@ static void ptrace_stop(int exit_code, int why, unsigned long message,
if (why == CLD_STOPPED && (current->jobctl & JOBCTL_STOP_PENDING))
gstop_done = task_participate_group_stop(current);
- /* any trap clears pending STOP trap, STOP trap clears NOTIFY */
- task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP);
- if (info && info->si_code >> 8 == PTRACE_EVENT_STOP)
- task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY);
-
- /* entering a trap, clear TRAPPING */
- task_clear_jobctl_trapping(current);
-
- spin_unlock_irq(¤t->sighand->siglock);
- read_lock(&tasklist_lock);
/*
* Notify parents of the stop.
*
@@ -2279,14 +2397,25 @@ static void ptrace_stop(int exit_code, int why, unsigned long message,
if (gstop_done && (!current->ptrace || ptrace_reparented(current)))
do_notify_parent_cldstop(current, false, why);
+ unlock_parents_siglocks(true);
+
+ /* any trap clears pending STOP trap, STOP trap clears NOTIFY */
+ task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP);
+ if (info && info->si_code >> 8 == PTRACE_EVENT_STOP)
+ task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY);
+
+ /* entering a trap, clear TRAPPING */
+ task_clear_jobctl_trapping(current);
+
/*
* Don't want to allow preemption here, because
* sys_ptrace() needs this task to be inactive.
*
- * XXX: implement read_unlock_no_resched().
+ * XXX: implement spin_unlock_no_resched().
*/
preempt_disable();
- read_unlock(&tasklist_lock);
+ spin_unlock_irq(¤t->sighand->siglock);
+
cgroup_enter_frozen();
preempt_enable_no_resched();
freezable_schedule();
@@ -2361,8 +2490,8 @@ int ptrace_notify(int exit_code, unsigned long message)
* on %true return.
*
* RETURNS:
- * %false if group stop is already cancelled or ptrace trap is scheduled.
- * %true if participated in group stop.
+ * %false if group stop is already cancelled.
+ * %true otherwise (as lock_parents_siglocks may have dropped siglock).
*/
static bool do_signal_stop(int signr)
__releases(¤t->sighand->siglock)
@@ -2425,36 +2554,24 @@ static bool do_signal_stop(int signr)
}
}
+ lock_parents_siglocks(false);
+ /* Recheck JOBCTL_STOP_PENDING after unlock+lock of siglock */
+ if (unlikely(!(current->jobctl & JOBCTL_STOP_PENDING)))
+ goto out;
if (likely(!current->ptrace)) {
- int notify = 0;
-
/*
* If there are no other threads in the group, or if there
* is a group stop in progress and we are the last to stop,
- * report to the parent.
+ * report to the real_parent.
*/
if (task_participate_group_stop(current))
- notify = CLD_STOPPED;
+ do_notify_parent_cldstop(current, false, CLD_STOPPED);
+ unlock_parents_siglocks(false);
current->jobctl |= JOBCTL_STOPPED;
set_special_state(TASK_STOPPED);
spin_unlock_irq(¤t->sighand->siglock);
- /*
- * Notify the parent of the group stop completion. Because
- * we're not holding either the siglock or tasklist_lock
- * here, ptracer may attach inbetween; however, this is for
- * group stop and should always be delivered to the real
- * parent of the group leader. The new ptracer will get
- * its notification when this task transitions into
- * TASK_TRACED.
- */
- if (notify) {
- read_lock(&tasklist_lock);
- do_notify_parent_cldstop(current, false, notify);
- read_unlock(&tasklist_lock);
- }
-
/* Now we don't run again until woken by SIGCONT or SIGKILL */
cgroup_enter_frozen();
freezable_schedule();
@@ -2465,8 +2582,11 @@ static bool do_signal_stop(int signr)
* Schedule it and let the caller deal with it.
*/
task_set_jobctl_pending(current, JOBCTL_TRAP_STOP);
- return false;
}
+out:
+ unlock_parents_siglocks(false);
+ spin_unlock_irq(¤t->sighand->siglock);
+ return true;
}
/**
@@ -2624,32 +2744,30 @@ bool get_signal(struct ksignal *ksig)
if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
int why;
- if (signal->flags & SIGNAL_CLD_CONTINUED)
- why = CLD_CONTINUED;
- else
- why = CLD_STOPPED;
+ lock_parents_siglocks(true);
+ /* Recheck signal->flags after unlock+lock of siglock */
+ if (likely(signal->flags & SIGNAL_CLD_MASK)) {
+ if (signal->flags & SIGNAL_CLD_CONTINUED)
+ why = CLD_CONTINUED;
+ else
+ why = CLD_STOPPED;
- signal->flags &= ~SIGNAL_CLD_MASK;
+ signal->flags &= ~SIGNAL_CLD_MASK;
- spin_unlock_irq(&sighand->siglock);
-
- /*
- * Notify the parent that we're continuing. This event is
- * always per-process and doesn't make whole lot of sense
- * for ptracers, who shouldn't consume the state via
- * wait(2) either, but, for backward compatibility, notify
- * the ptracer of the group leader too unless it's gonna be
- * a duplicate.
- */
- read_lock(&tasklist_lock);
- do_notify_parent_cldstop(current, false, why);
-
- if (ptrace_reparented(current->group_leader))
- do_notify_parent_cldstop(current->group_leader,
- true, why);
- read_unlock(&tasklist_lock);
-
- goto relock;
+ /*
+ * Notify the parent that we're continuing. This event is
+ * always per-process and doesn't make whole lot of sense
+ * for ptracers, who shouldn't consume the state via
+ * wait(2) either, but, for backward compatibility, notify
+ * the ptracer of the group leader too unless it's gonna be
+ * a duplicate.
+ */
+ do_notify_parent_cldstop(current, false, why);
+ if (ptrace_reparented(current->group_leader))
+ do_notify_parent_cldstop(current->group_leader,
+ true, why);
+ }
+ unlock_parents_siglocks(true);
}
for (;;) {
@@ -2906,7 +3024,6 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
void exit_signals(struct task_struct *tsk)
{
- int group_stop = 0;
sigset_t unblocked;
/*
@@ -2937,21 +3054,20 @@ void exit_signals(struct task_struct *tsk)
signotset(&unblocked);
retarget_shared_pending(tsk, &unblocked);
- if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING) &&
- task_participate_group_stop(tsk))
- group_stop = CLD_STOPPED;
-out:
- spin_unlock_irq(&tsk->sighand->siglock);
-
/*
* If group stop has completed, deliver the notification. This
* should always go to the real parent of the group leader.
*/
- if (unlikely(group_stop)) {
- read_lock(&tasklist_lock);
- do_notify_parent_cldstop(tsk, false, group_stop);
- read_unlock(&tasklist_lock);
+ if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING)) {
+ lock_parents_siglocks(false);
+ /* Recheck JOBCTL_STOP_PENDING after unlock+lock of siglock */
+ if ((tsk->jobctl & JOBCTL_STOP_PENDING) &&
+ task_participate_group_stop(tsk))
+ do_notify_parent_cldstop(tsk, false, CLD_STOPPED);
+ unlock_parents_siglocks(false);
}
+out:
+ spin_unlock_irq(&tsk->sighand->siglock);
}
/*
--
2.35.3
Powered by blists - more mailing lists