lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 28 Apr 2020 00:02:40 -0500
From:   Scott Wood <swood@...hat.com>
To:     Steven Rostedt <rostedt@...dmis.org>,
        Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Vincent Guittot <vincent.guittot@...aro.org>
Cc:     Dietmar Eggemann <dietmar.eggemann@....com>,
        Rik van Riel <riel@...riel.com>,
        Mel Gorman <mgorman@...e.de>, linux-kernel@...r.kernel.org,
        linux-rt-users <linux-rt-users@...r.kernel.org>,
        Scott Wood <swood@...hat.com>
Subject: [RFC PATCH 1/3] sched/fair: Call newidle_balance() from finish_task_switch()

Thus, newidle_balance() is entered with interrupts enabled, which allows
(in the next patch) enabling interrupts when the lock is dropped.

Signed-off-by: Scott Wood <swood@...hat.com>
---
 kernel/sched/core.c  |  7 ++++---
 kernel/sched/fair.c  | 45 ++++++++++++++++----------------------------
 kernel/sched/sched.h |  6 ++----
 3 files changed, 22 insertions(+), 36 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9a2fbf98fd6f..0294beb8d16c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3241,6 +3241,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	}
 
 	tick_nohz_task_switch();
+
+	if (is_idle_task(current))
+		newidle_balance();
+
 	return rq;
 }
 
@@ -3919,8 +3923,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 		   rq->nr_running == rq->cfs.h_nr_running)) {
 
 		p = pick_next_task_fair(rq, prev, rf);
-		if (unlikely(p == RETRY_TASK))
-			goto restart;
 
 		/* Assumes fair_sched_class->next == idle_sched_class */
 		if (!p) {
@@ -3931,7 +3933,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 		return p;
 	}
 
-restart:
 #ifdef CONFIG_SMP
 	/*
 	 * We must do the balancing pass before put_next_task(), such
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 02f323b85b6d..74c3c5280d6b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6758,8 +6758,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
 	if (rq->nr_running)
 		return 1;
-
-	return newidle_balance(rq, rf) != 0;
 }
 #endif /* CONFIG_SMP */
 
@@ -6934,9 +6932,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 	struct cfs_rq *cfs_rq = &rq->cfs;
 	struct sched_entity *se;
 	struct task_struct *p;
-	int new_tasks;
 
-again:
 	if (!sched_fair_runnable(rq))
 		goto idle;
 
@@ -7050,19 +7046,6 @@ done: __maybe_unused;
 	if (!rf)
 		return NULL;
 
-	new_tasks = newidle_balance(rq, rf);
-
-	/*
-	 * Because newidle_balance() releases (and re-acquires) rq->lock, it is
-	 * possible for any higher priority task to appear. In that case we
-	 * must re-start the pick_next_entity() loop.
-	 */
-	if (new_tasks < 0)
-		return RETRY_TASK;
-
-	if (new_tasks > 0)
-		goto again;
-
 	/*
 	 * rq is about to be idle, check if we need to update the
 	 * lost_idle_time of clock_pelt
@@ -10425,14 +10408,23 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
  *     0 - failed, no new tasks
  *   > 0 - success, new (fair) tasks present
  */
-int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+int newidle_balance(void)
 {
 	unsigned long next_balance = jiffies + HZ;
-	int this_cpu = this_rq->cpu;
+	int this_cpu;
 	struct sched_domain *sd;
+	struct rq *this_rq;
 	int pulled_task = 0;
 	u64 curr_cost = 0;
 
+	preempt_disable();
+	this_rq = this_rq();
+	this_cpu = this_rq->cpu;
+	local_bh_disable();
+	raw_spin_lock_irq(&this_rq->lock);
+
+	update_rq_clock(this_rq);
+
 	update_misfit_status(NULL, this_rq);
 	/*
 	 * We must set idle_stamp _before_ calling idle_balance(), such that we
@@ -10444,15 +10436,7 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 	 * Do not pull tasks towards !active CPUs...
 	 */
 	if (!cpu_active(this_cpu))
-		return 0;
-
-	/*
-	 * This is OK, because current is on_cpu, which avoids it being picked
-	 * for load-balance and preemption/IRQs are still disabled avoiding
-	 * further scheduler activity on it and we're being very careful to
-	 * re-start the picking loop.
-	 */
-	rq_unpin_lock(this_rq, rf);
+		goto out_unlock;
 
 	if (this_rq->avg_idle < sysctl_sched_migration_cost ||
 	    !READ_ONCE(this_rq->rd->overload)) {
@@ -10534,7 +10518,10 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 	if (pulled_task)
 		this_rq->idle_stamp = 0;
 
-	rq_repin_lock(this_rq, rf);
+out_unlock:
+	raw_spin_unlock_irq(&this_rq->lock);
+	local_bh_enable();
+	preempt_enable();
 
 	return pulled_task;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index db3a57675ccf..3d97c51544d7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1504,13 +1504,13 @@ static inline void unregister_sched_domain_sysctl(void)
 }
 #endif
 
-extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
+extern int newidle_balance(void);
 
 #else
 
 static inline void sched_ttwu_pending(void) { }
 
-static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; }
+static inline int newidle_balance(void) { return 0; }
 
 #endif /* CONFIG_SMP */
 
@@ -1742,8 +1742,6 @@ extern const u32		sched_prio_to_wmult[40];
 #define ENQUEUE_MIGRATED	0x00
 #endif
 
-#define RETRY_TASK		((void *)-1UL)
-
 struct sched_class {
 	const struct sched_class *next;
 
-- 
2.18.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ