linux-kernel - [RFC PATCH 3/6] sched/dl: Try better placement even for deadline tasks that do not block

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190506044836.2914-4-luca.abeni@santannapisa.it>
Date:   Mon,  6 May 2019 06:48:33 +0200
From:   Luca Abeni <luca.abeni@...tannapisa.it>
To:     linux-kernel@...r.kernel.org
Cc:     Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        "Rafael J . Wysocki" <rafael@...nel.org>,
        Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        "Paul E . McKenney" <paulmck@...ux.ibm.com>,
        Joel Fernandes <joel@...lfernandes.org>,
        Quentin Perret <quentin.perret@....com>,
        Luc Van Oostenryck <luc.vanoostenryck@...il.com>,
        Morten Rasmussen <morten.rasmussen@....com>,
        Juri Lelli <juri.lelli@...hat.com>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        Patrick Bellasi <patrick.bellasi@....com>,
        Tommaso Cucinotta <tommaso.cucinotta@...tannapisa.it>,
        luca abeni <luca.abeni@...tannapisa.it>
Subject: [RFC PATCH 3/6] sched/dl: Try better placement even for deadline tasks that do not block

From: luca abeni <luca.abeni@...tannapisa.it>

Currently, the scheduler tries to find a proper placement for
SCHED_DEADLINE tasks when they are pushed out of a core or when
they wake up. Hence, if there is a single SCHED_DEADLINE task
that never blocks and wakes up, such a task is never migrated to
an appropriate CPU core, but continues to execute on its original
core.

This commit addresses the issue by trying to migrate a SCHED_DEADLINE
task (searching for an appropriate CPU core) the first time it is
throttled.

Signed-off-by: luca abeni <luca.abeni@...tannapisa.it>
---
 include/linux/sched.h   |  1 +
 kernel/sched/deadline.c | 53 ++++++++++++++++++++++++++++++++++++-----
 kernel/sched/sched.h    |  2 ++
 3 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 863f70843875..5e322c8a94e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -560,6 +560,7 @@ struct sched_dl_entity {
 	unsigned int			dl_yielded        : 1;
 	unsigned int			dl_non_contending : 1;
 	unsigned int			dl_overrun	  : 1;
+	unsigned int			dl_adjust	  : 1;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3436f3d8fa8f..db471889196b 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -515,6 +515,7 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
 	return dl_task(prev);
 }
 
+static DEFINE_PER_CPU(struct callback_head, dl_migrate_head);
 static DEFINE_PER_CPU(struct callback_head, dl_push_head);
 static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
 
@@ -1149,6 +1150,32 @@ static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
 	return (delta * u_act) >> BW_SHIFT;
 }
 
+#ifdef CONFIG_SMP
+static int find_later_rq(struct task_struct *task);
+
+static void migrate_dl_task(struct rq *rq)
+{
+	struct task_struct *t = rq->migrating_task;
+	struct sched_dl_entity *dl_se = &t->dl;
+	int cpu = find_later_rq(t);
+
+	if ((cpu != -1) && (cpu != rq->cpu)) {
+		struct rq *later_rq;
+
+		later_rq = cpu_rq(cpu);
+
+		double_lock_balance(rq, later_rq);
+		sub_running_bw(&t->dl, &rq->dl);
+		sub_rq_bw(&t->dl, &rq->dl);
+		set_task_cpu(t, later_rq->cpu);
+		add_rq_bw(&t->dl, &later_rq->dl);
+		add_running_bw(&t->dl, &later_rq->dl);
+		double_unlock_balance(rq, later_rq);
+	}
+	rq->migrating_task = NULL;
+	dl_se->dl_adjust = 0;
+}
+#endif
 /*
  * Update the current task's runtime statistics (provided it is still
  * a -deadline task and has not been removed from the dl_rq).
@@ -1223,8 +1250,17 @@ static void update_curr_dl(struct rq *rq)
 			dl_se->dl_overrun = 1;
 
 		__dequeue_task_dl(rq, curr, 0);
-		if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
+		if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) {
 			enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
+#ifdef CONFIG_SMP
+		} else if (dl_se->dl_adjust) {
+			if (rq->migrating_task == NULL) {
+				queue_balance_callback(rq, &per_cpu(dl_migrate_head, rq->cpu), migrate_dl_task);
+				rq->migrating_task = current;
+			} else
+				printk_deferred("Throttled task before migratin g the previous one???\n");
+#endif
+		}
 
 		if (!is_leftmost(curr, &rq->dl))
 			resched_curr(rq);
@@ -1573,13 +1609,12 @@ static void yield_task_dl(struct rq *rq)
 
 #ifdef CONFIG_SMP
 
-static int find_later_rq(struct task_struct *task);
-
 static int
 select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
 	struct task_struct *curr;
 	struct rq *rq;
+	bool het;
 
 	if (sd_flag != SD_BALANCE_WAKE)
 		goto out;
@@ -1591,6 +1626,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 
 	rcu_read_lock();
 	curr = READ_ONCE(rq->curr); /* unlocked access */
+	het = static_branch_unlikely(&sched_asym_cpucapacity);
 
 	/*
 	 * If we are dealing with a -deadline task, we must
@@ -1604,15 +1640,17 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 	if ((unlikely(dl_task(curr)) &&
 	    (curr->nr_cpus_allowed < 2 ||
 	     !dl_entity_preempt(&p->dl, &curr->dl)) &&
-	    (p->nr_cpus_allowed > 1)) ||
-	    static_branch_unlikely(&sched_asym_cpucapacity)) {
+	    (p->nr_cpus_allowed > 1)) || het) {
 		int target = find_later_rq(p);
 
 		if (target != -1 &&
 				(dl_time_before(p->dl.deadline,
 					cpu_rq(target)->dl.earliest_dl.curr) ||
-				(cpu_rq(target)->dl.dl_nr_running == 0)))
+				(cpu_rq(target)->dl.dl_nr_running == 0))) {
+			if (het && (target != cpu))
+				p->dl.dl_adjust = 1;
 			cpu = target;
+		}
 	}
 	rcu_read_unlock();
 
@@ -2369,6 +2407,9 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 		else
 			resched_curr(rq);
 	}
+
+	if (static_branch_unlikely(&sched_asym_cpucapacity))
+		p->dl.dl_adjust = 1;
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e5f9fd3aee80..1a8f75338ac2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -963,6 +963,8 @@ struct rq {
 
 	/* This is used to determine avg_idle's max value */
 	u64			max_idle_balance_cost;
+
+	struct task_struct	*migrating_task;
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-- 
2.20.1