lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <YW14cgrrRWSAzmYn@hirez.programming.kicks-ass.net>
Date:   Mon, 18 Oct 2021 15:36:50 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     rjw@...ysocki.net, oleg@...hat.com, mingo@...nel.org,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        rostedt@...dmis.org, mgorman@...e.de, Will Deacon <will@...nel.org>
Cc:     linux-kernel@...r.kernel.org, tj@...nel.org,
        linux-pm@...r.kernel.org
Subject: Re: [PATCH v3 6/6] freezer,sched: Rewrite core freezer logic

On Sat, Oct 09, 2021 at 12:08:00PM +0200, Peter Zijlstra wrote:

> +static inline unsigned int __can_freeze(struct task_struct *p)
> +{
> +	unsigned int state = READ_ONCE(p->__state);
> +
> +	if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
> +		return 0;
> +
> +	/*
> +	 * Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
> +	 * can suffer spurious wakeups.
> +	 */
> +	if (state & TASK_FREEZABLE)
> +		WARN_ON_ONCE(!(state & TASK_NORMAL));
> +
> +#ifdef CONFIG_LOCKDEP
> +	/*
> +	 * It's dangerous to freeze with locks held; there be dragons there.
> +	 */
> +	if (!(state & __TASK_FREEZABLE_UNSAFE))
> +		WARN_ON_ONCE(debug_locks && p->lockdep_depth);
> +#endif
> +
> +	return TASK_FROZEN;
> +}
> +
> +/* See task_cond_set_special_state(); serializes against ttwu() */
> +static bool __freeze_task(struct task_struct *p)
> +{
> +	return task_cond_set_special_state(p, __can_freeze(p));
> +}

Will found an issue with this, notably task_cond_set_special() only
takes ->pi_lock and as such doesn't serialize against __schedule(),
which then yields the following fun scenario:


	__schedule()					__freeze_task()


	prev_state = READ_ONCE(prev->__state); // INTERRUPTIBLE

							task_cond_set_special_state()
							  ...
							  WRITE_ONCE(prev->__state, TASK_FROZEN);

	if (signal_pending_state(prev_state, prev)) // SIGPENDING
	  WRITE_ONCE(prev->__state, TASK_RUNNING)




And *whoopsie*, freezer things we're frozen, but we're back in the game.


AFAICT the below, which uses the brand-spanking-new task_call_func()
which currently sits in tip/sched/core to also serialize against
rq->lock should avoid this scenario.


--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -233,25 +233,6 @@ struct task_group;
 	} while (0)
 
 /*
- * task_cond_set_special_state() is a cmpxchg like operation on task->state.
- *
- * This operation isn't safe in general and should only be used to transform
- * one (special) blocked state into another, such as:
- *   TASK_STOPPED <-> TASK_FROZEN.
- */
-#define task_cond_set_special_state(task, cond_state)			\
-	({								\
-		struct task_struct *__p = (task);			\
-		unsigned long __flags; /* may shadow */			\
-		unsigned int __state;					\
-		raw_spin_lock_irqsave(&__p->pi_lock, __flags);		\
-		if ((__state = (cond_state)))				\
-			WRITE_ONCE(__p->__state, __state);		\
-		raw_spin_unlock_irqrestore(&__p->pi_lock, __flags);	\
-		!!__state;						\
-	})
-
-/*
  * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
  *
  * RT's spin/rwlock substitutions are state preserving. The state of the
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -101,7 +101,7 @@ static void fake_signal_wake_up(struct t
 	}
 }
 
-static inline unsigned int __can_freeze(struct task_struct *p)
+static int __set_task_frozen(struct task_struct *p, void *arg)
 {
 	unsigned int state = READ_ONCE(p->__state);
 
@@ -123,13 +123,14 @@ static inline unsigned int __can_freeze(
 		WARN_ON_ONCE(debug_locks && p->lockdep_depth);
 #endif
 
+	WRITE_ONCE(p->__state, TASK_FROZEN);
 	return TASK_FROZEN;
 }
 
-/* See task_cond_set_special_state(); serializes against ttwu() */
 static bool __freeze_task(struct task_struct *p)
 {
-	return task_cond_set_special_state(p, __can_freeze(p));
+	/* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */
+	return task_call_func(p, __set_task_frozen, NULL);
 }
 
 /**
@@ -169,7 +170,7 @@ bool freeze_task(struct task_struct *p)
  * reflects that and the below will refuse to restore the special state and
  * instead issue the wakeup.
  */
-static inline unsigned int __thaw_special(struct task_struct *p)
+static int __set_task_special(struct task_struct *p, void *arg)
 {
 	unsigned int state = 0;
 
@@ -188,6 +189,9 @@ static inline unsigned int __thaw_specia
 		state = TASK_STOPPED;
 	}
 
+	if (state)
+		WRITE_ONCE(p->__state, state);
+
 	return state;
 }
 
@@ -200,7 +204,8 @@ void __thaw_task(struct task_struct *p)
 		goto unlock;
 
 	if (lock_task_sighand(p, &flags2)) {
-		bool ret = task_cond_set_special_state(p, __thaw_special(p));
+		/* TASK_FROZEN -> TASK_{STOPPED,TRACED} */
+		bool ret = task_call_func(p, __set_task_special, NULL);
 		unlock_task_sighand(p, &flags2);
 		if (ret)
 			goto unlock;
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -193,6 +193,17 @@ static bool looks_like_a_spurious_pid(st
 	return true;
 }
 
+static int __set_task_traced(struct task_struct *task, void *arg)
+{
+	unsigned int *state = arg;
+
+	if (!(task->__state & __TASK_TRACED))
+		return 0;
+
+	WRITE_ONCE(task->__state, *state);
+	return *state;
+}
+
 /* Ensure that nothing can wake it up, even SIGKILL */
 static bool ptrace_freeze_traced(struct task_struct *task)
 {
@@ -205,10 +216,12 @@ static bool ptrace_freeze_traced(struct
 	spin_lock_irq(&task->sighand->siglock);
 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
 	    !__fatal_signal_pending(task)) {
+		unsigned int state = __TASK_TRACED;
+
 		task->ptrace &= ~PT_STOPPED_MASK;
 		task->ptrace |= PT_STOPPED;
 		/* *TASK_TRACED -> __TASK_TRACED */
-		task_cond_set_special_state(task, !!(task->__state & __TASK_TRACED) * __TASK_TRACED);
+		task_call_func(task, __set_task_traced, &state);
 		ret = true;
 	}
 	spin_unlock_irq(&task->sighand->siglock);
@@ -233,9 +246,11 @@ static void ptrace_unfreeze_traced(struc
 			task->ptrace &= ~PT_STOPPED_MASK;
 			wake_up_state(task, __TASK_TRACED);
 		} else {
+			unsigned int state = TASK_TRACED;
+
 			task->ptrace |= PT_STOPPED_MASK;
 			/* *TASK_TRACED -> TASK_TRACED */
-			task_cond_set_special_state(task, !!(task->__state & __TASK_TRACED) * TASK_TRACED);
+			task_call_func(task, __set_task_traced, &state);
 		}
 	}
 	spin_unlock_irq(&task->sighand->siglock);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3181,7 +3181,7 @@ int migrate_swap(struct task_struct *cur
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
-static inline __wti_match(struct task_struct *p, unsigned int match_state)
+static inline bool __wti_match(struct task_struct *p, unsigned int match_state)
 {
 	unsigned int state = READ_ONCE(p->__state);
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ