linux-kernel - Re: [RFC PATCH v15 7/7] sched: Start blocked_on chain processing in find_proxy

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250317164356.GB6888@noisy.programming.kicks-ass.net>
Date: Mon, 17 Mar 2025 17:43:56 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: John Stultz <jstultz@...gle.com>
Cc: LKML <linux-kernel@...r.kernel.org>,
	Joel Fernandes <joelagnelf@...dia.com>,
	Qais Yousef <qyousef@...alina.io>, Ingo Molnar <mingo@...hat.com>,
	Juri Lelli <juri.lelli@...hat.com>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Valentin Schneider <vschneid@...hat.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Zimuzo Ezeozue <zezeozue@...gle.com>, Mel Gorman <mgorman@...e.de>,
	Will Deacon <will@...nel.org>, Waiman Long <longman@...hat.com>,
	Boqun Feng <boqun.feng@...il.com>,
	"Paul E. McKenney" <paulmck@...nel.org>,
	Metin Kaya <Metin.Kaya@....com>,
	Xuewen Yan <xuewen.yan94@...il.com>,
	K Prateek Nayak <kprateek.nayak@....com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Daniel Lezcano <daniel.lezcano@...aro.org>,
	Suleiman Souhlal <suleiman@...gle.com>, kernel-team@...roid.com,
	Valentin Schneider <valentin.schneider@....com>,
	Connor O'Brien <connoro@...gle.com>
Subject: Re: [RFC PATCH v15 7/7] sched: Start blocked_on chain processing in
 find_proxy_task()

On Wed, Mar 12, 2025 at 03:11:37PM -0700, John Stultz wrote:
> @@ -6668,47 +6676,138 @@ static bool proxy_deactivate(struct rq *rq, struct task_struct *donor)
>  }
>  
>  /*
> + * Find runnable lock owner to proxy for mutex blocked donor
> + *
> + * Follow the blocked-on relation:
> + *   task->blocked_on -> mutex->owner -> task...
> + *
> + * Lock order:
> + *
> + *   p->pi_lock
> + *     rq->lock
> + *       mutex->wait_lock
> + *
> + * Returns the task that is going to be used as execution context (the one
> + * that is actually going to be run on cpu_of(rq)).
>   */
>  static struct task_struct *
>  find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
>  {
> +	struct task_struct *owner = NULL;
> +	struct task_struct *ret = NULL;
> +	int this_cpu = cpu_of(rq);
> +	struct task_struct *p;
>  	struct mutex *mutex;
>  
> +	/* Follow blocked_on chain. */
> +	for (p = donor; task_is_blocked(p); p = owner) {
> +		mutex = p->blocked_on;
> +		/* Something changed in the chain, so pick again */
> +		if (!mutex)
> +			return NULL;
>  		/*
> +		 * By taking mutex->wait_lock we hold off concurrent mutex_unlock()
> +		 * and ensure @owner sticks around.
>  		 */
> +		raw_spin_lock(&mutex->wait_lock);

This comment -- that is only true if you kill __mutex_unlock_fast(),
which I don't think you did in the previous patches.

> +
> +		/* Check again that p is blocked with wait_lock held */
> +		if (mutex != __get_task_blocked_on(p)) {
> +			/*
> +			 * Something changed in the blocked_on chain and
> +			 * we don't know if only at this level. So, let's
> +			 * just bail out completely and let __schedule
> +			 * figure things out (pick_again loop).
> +			 */
> +			goto out;
> +		}
> +
> +		owner = __mutex_owner(mutex);
> +		if (!owner) {
> +			__clear_task_blocked_on(p, mutex);
> +			ret = p;
> +			goto out;
> +		}
> +
> +		if (task_cpu(owner) != this_cpu) {
> +			/* XXX Don't handle migrations yet */
> +			if (!proxy_deactivate(rq, donor))
> +				goto deactivate_failed;
> +			goto out;
> +		}
> +
> +		if (task_on_rq_migrating(owner)) {
> +			/*
> +			 * One of the chain of mutex owners is currently migrating to this
> +			 * CPU, but has not yet been enqueued because we are holding the
> +			 * rq lock. As a simple solution, just schedule rq->idle to give
> +			 * the migration a chance to complete. Much like the migrate_task
> +			 * case we should end up back in find_proxy_task(), this time
> +			 * hopefully with all relevant tasks already enqueued.
> +			 */
> +			raw_spin_unlock(&mutex->wait_lock);
> +			return proxy_resched_idle(rq);
> +		}
> +
> +		if (!owner->on_rq) {
> +			/* XXX Don't handle blocked owners yet */
> +			if (!proxy_deactivate(rq, donor))
> +				goto deactivate_failed;
> +			goto out;
> +		}
> +
> +		if (owner->se.sched_delayed) {
> +			/* XXX Don't handle delayed dequeue yet */
> +			if (!proxy_deactivate(rq, donor))
> +				goto deactivate_failed;
> +			goto out;
> +		}
> +
> +		if (owner == p) {
> +			/*
> +			 * It's possible we interleave with mutex_unlock like:
> +			 *
> +			 *				lock(&rq->lock);
> +			 *				  find_proxy_task()
> +			 * mutex_unlock()
> +			 *   lock(&wait_lock);
> +			 *   donor(owner) = current->blocked_donor;
> +			 *   unlock(&wait_lock);
> +			 *
> +			 *   wake_up_q();
> +			 *     ...
> +			 *       ttwu_runnable()
> +			 *         __task_rq_lock()
> +			 *				  lock(&wait_lock);
> +			 *				  owner == p
> +			 *
> +			 * Which leaves us to finish the ttwu_runnable() and make it go.
> +			 *
> +			 * So schedule rq->idle so that ttwu_runnable can get the rq lock
> +			 * and mark owner as running.
> +			 */
> +			raw_spin_unlock(&mutex->wait_lock);
> +			return proxy_resched_idle(rq);
> +		}
>  
>  		/*
> +		 * OK, now we're absolutely sure @owner is on this
> +		 * rq, therefore holding @rq->lock is sufficient to
> +		 * guarantee its existence, as per ttwu_remote().
>  		 */
>  		raw_spin_unlock(&mutex->wait_lock);
>  	}
> +
> +	WARN_ON_ONCE(owner && !owner->on_rq);
> +	return owner;
> +
> +deactivate_failed:
> +	/*
> +	 * XXX: For now, if deactivation failed, set donor
> +	 * as unblocked, as we aren't doing proxy-migrations
> +	 * yet (more logic will be needed then).
> +	 */
> +	donor->blocked_on = NULL; /* XXX not following locking rules :( */
>  out:
>  	raw_spin_unlock(&mutex->wait_lock);
>  	return NULL; /* do pick_next_task again */


Also, something like the below might be a cleanup -- I didn't check your
full series.

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6653,7 +6653,7 @@ proxy_resched_idle(struct rq *rq)
 	return rq->idle;
 }
 
-static bool proxy_deactivate(struct rq *rq, struct task_struct *donor)
+static bool __proxy_deactivate(struct rq *rq, struct task_struct *donor)
 {
 	unsigned long state = READ_ONCE(donor->__state);
 
@@ -6673,6 +6673,19 @@ static bool proxy_deactivate(struct rq *
 	return try_to_block_task(rq, donor, state, true);
 }
 
+static struct task_struct *proxy_deactivate(struct rq *rq, struct task_struct *donor)
+{
+	if (!__proxy_deactivate(rq, donor)) {
+		/*
+		 * XXX: For now, if deactivation failed, set donor
+		 * as unblocked, as we aren't doing proxy-migrations
+		 * yet (more logic will be needed then).
+		 */
+		donor->blocked_on = NULL;
+	}
+	return NULL;
+}
+
 /*
  * Find runnable lock owner to proxy for mutex blocked donor
  *
@@ -6691,23 +6704,17 @@ static bool proxy_deactivate(struct rq *
 static struct task_struct *
 find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
 {
-	struct task_struct *owner = NULL;
-	struct task_struct *ret = NULL;
+	struct task_struct *owner, *p;
 	int this_cpu = cpu_of(rq);
-	struct task_struct *p;
 	struct mutex *mutex;
 
 	/* Follow blocked_on chain. */
-	for (p = donor; task_is_blocked(p); p = owner) {
-		mutex = p->blocked_on;
-		/* Something changed in the chain, so pick again */
-		if (!mutex)
-			return NULL;
+	for (p = donor; (mutex = p->blocked_on); p = owner) {
 		/*
 		 * By taking mutex->wait_lock we hold off concurrent mutex_unlock()
 		 * and ensure @owner sticks around.
 		 */
-		raw_spin_lock(&mutex->wait_lock);
+		guard(raw_spinlock)(&mutex->wait_lock);
 
 		/* Check again that p is blocked with wait_lock held */
 		if (mutex != __get_task_blocked_on(p)) {
@@ -6717,22 +6724,17 @@ find_proxy_task(struct rq *rq, struct ta
 			 * just bail out completely and let __schedule
 			 * figure things out (pick_again loop).
 			 */
-			goto out;
+			return NULL;
 		}
 
 		owner = __mutex_owner(mutex);
 		if (!owner) {
 			__clear_task_blocked_on(p, mutex);
-			ret = p;
-			goto out;
+			return p;
 		}
 
-		if (task_cpu(owner) != this_cpu) {
-			/* XXX Don't handle migrations yet */
-			if (!proxy_deactivate(rq, donor))
-				goto deactivate_failed;
-			goto out;
-		}
+		if (task_cpu(owner) != this_cpu)
+			return proxy_deactivate(rq, donor);
 
 		if (task_on_rq_migrating(owner)) {
 			/*
@@ -6743,22 +6745,17 @@ find_proxy_task(struct rq *rq, struct ta
 			 * case we should end up back in find_proxy_task(), this time
 			 * hopefully with all relevant tasks already enqueued.
 			 */
-			raw_spin_unlock(&mutex->wait_lock);
-			return proxy_resched_idle(rq);
+			goto ret_idle;
 		}
 
 		if (!owner->on_rq) {
 			/* XXX Don't handle blocked owners yet */
-			if (!proxy_deactivate(rq, donor))
-				goto deactivate_failed;
-			goto out;
+			return proxy_deactivate(rq, donor);
 		}
 
 		if (owner->se.sched_delayed) {
 			/* XXX Don't handle delayed dequeue yet */
-			if (!proxy_deactivate(rq, donor))
-				goto deactivate_failed;
-			goto out;
+			return proxy_deactivate(rq, donor);
 		}
 
 		if (owner == p) {
@@ -6784,8 +6781,7 @@ find_proxy_task(struct rq *rq, struct ta
 			 * So schedule rq->idle so that ttwu_runnable can get the rq lock
 			 * and mark owner as running.
 			 */
-			raw_spin_unlock(&mutex->wait_lock);
-			return proxy_resched_idle(rq);
+			goto ret_idle;
 		}
 
 		/*
@@ -6793,22 +6789,13 @@ find_proxy_task(struct rq *rq, struct ta
 		 * rq, therefore holding @rq->lock is sufficient to
 		 * guarantee its existence, as per ttwu_remote().
 		 */
-		raw_spin_unlock(&mutex->wait_lock);
 	}
 
 	WARN_ON_ONCE(owner && !owner->on_rq);
 	return owner;
 
-deactivate_failed:
-	/*
-	 * XXX: For now, if deactivation failed, set donor
-	 * as unblocked, as we aren't doing proxy-migrations
-	 * yet (more logic will be needed then).
-	 */
-	donor->blocked_on = NULL; /* XXX not following locking rules :( */
-out:
-	raw_spin_unlock(&mutex->wait_lock);
-	return NULL; /* do pick_next_task again */
+ret_idle:
+	return proxy_resched_idle(rq);
 }
 #else /* SCHED_PROXY_EXEC */
 static struct task_struct *