[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <11bd7179ece42536f81101414ef6564f6792cb51.1583332765.git.vpillai@digitalocean.com>
Date: Wed, 4 Mar 2020 16:59:56 +0000
From: vpillai <vpillai@...italocean.com>
To: Nishanth Aravamudan <naravamudan@...italocean.com>,
Julien Desfossez <jdesfossez@...italocean.com>,
Peter Zijlstra <peterz@...radead.org>,
Tim Chen <tim.c.chen@...ux.intel.com>, mingo@...nel.org,
tglx@...utronix.de, pjt@...gle.com, torvalds@...ux-foundation.org
Cc: linux-kernel@...r.kernel.org, fweisbec@...il.com,
keescook@...omium.org, kerrnel@...gle.com,
Phil Auld <pauld@...hat.com>, Aaron Lu <aaron.lwe@...il.com>,
Aubrey Li <aubrey.intel@...il.com>, aubrey.li@...ux.intel.com,
Valentin Schneider <valentin.schneider@....com>,
Mel Gorman <mgorman@...hsingularity.net>,
Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
Paolo Bonzini <pbonzini@...hat.com>,
Joel Fernandes <joelaf@...gle.com>, joel@...lfernandes.org
Subject: [RFC PATCH 06/13] sched: Update core scheduler queue when taking cpu online/offline
From: Tim Chen <tim.c.chen@...ux.intel.com>
When we bring a CPU online and enable core scheduler, tasks that need
core scheduling need to be placed in the core's core scheduling queue.
Likewise when we taks a CPU offline or disable core scheudling on a
core, tasks in the core's core scheduling queue need to be removed.
Without such mechanisms, the core scheduler causes OOPs due to
inconsistent core scheduling state of a task.
Implement such enqueue and dequeue mechanisms according to a CPU's change
in core scheduling status. The switch of core scheduling mode of a core,
and enqueue/dequeue of tasks on a core's queue due to the core scheduling
mode change has to be run in a separate context as it cannot be done in
the context taking cpu online/offline.
Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
---
kernel/sched/core.c | 156 ++++++++++++++++++++++++++++++++++++----
kernel/sched/deadline.c | 35 +++++++++
kernel/sched/fair.c | 38 ++++++++++
kernel/sched/rt.c | 43 +++++++++++
kernel/sched/sched.h | 7 ++
5 files changed, 264 insertions(+), 15 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 452ce5bb9321..445f0d519336 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -75,6 +75,11 @@ int sysctl_sched_rt_runtime = 950000;
#ifdef CONFIG_SCHED_CORE
+struct core_sched_cpu_work {
+ struct work_struct work;
+ cpumask_t smt_mask;
+};
+
DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
/* kernel prio, less is more */
@@ -183,6 +188,18 @@ static void sched_core_dequeue(struct rq *rq, struct task_struct *p)
rb_erase(&p->core_node, &rq->core_tree);
}
+void sched_core_add(struct rq *rq, struct task_struct *p)
+{
+ if (p->core_cookie && task_on_rq_queued(p))
+ sched_core_enqueue(rq, p);
+}
+
+void sched_core_remove(struct rq *rq, struct task_struct *p)
+{
+ if (sched_core_enqueued(p))
+ sched_core_dequeue(rq, p);
+}
+
/*
* Find left-most (aka, highest priority) task matching @cookie.
*/
@@ -270,10 +287,132 @@ void sched_core_put(void)
mutex_unlock(&sched_core_mutex);
}
+enum cpu_action {
+ CPU_ACTIVATE = 1,
+ CPU_DEACTIVATE = 2
+};
+
+static int __activate_cpu_core_sched(void *data);
+static int __deactivate_cpu_core_sched(void *data);
+static void core_sched_cpu_update(unsigned int cpu, enum cpu_action action);
+
+static int activate_cpu_core_sched(struct core_sched_cpu_work *work)
+{
+ if (static_branch_unlikely(&__sched_core_enabled))
+ stop_machine(__activate_cpu_core_sched, (void *) work, NULL);
+
+ return 0;
+}
+
+static int deactivate_cpu_core_sched(struct core_sched_cpu_work *work)
+{
+ if (static_branch_unlikely(&__sched_core_enabled))
+ stop_machine(__deactivate_cpu_core_sched, (void *) work, NULL);
+
+ return 0;
+}
+
+static void core_sched_cpu_activate_fn(struct work_struct *work)
+{
+ struct core_sched_cpu_work *cpu_work;
+
+ cpu_work = container_of(work, struct core_sched_cpu_work, work);
+ activate_cpu_core_sched(cpu_work);
+ kfree(cpu_work);
+}
+
+static void core_sched_cpu_deactivate_fn(struct work_struct *work)
+{
+ struct core_sched_cpu_work *cpu_work;
+
+ cpu_work = container_of(work, struct core_sched_cpu_work, work);
+ deactivate_cpu_core_sched(cpu_work);
+ kfree(cpu_work);
+}
+
+static void core_sched_cpu_update(unsigned int cpu, enum cpu_action action)
+{
+ struct core_sched_cpu_work *work;
+
+ work = kmalloc(sizeof(struct core_sched_cpu_work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ if (action == CPU_ACTIVATE)
+ INIT_WORK(&work->work, core_sched_cpu_activate_fn);
+ else
+ INIT_WORK(&work->work, core_sched_cpu_deactivate_fn);
+
+ cpumask_copy(&work->smt_mask, cpu_smt_mask(cpu));
+
+ queue_work(system_highpri_wq, &work->work);
+}
+
+static int __activate_cpu_core_sched(void *data)
+{
+ struct core_sched_cpu_work *work = (struct core_sched_cpu_work *) data;
+ struct rq *rq;
+ int i;
+
+ if (cpumask_weight(&work->smt_mask) < 2)
+ return 0;
+
+ for_each_cpu(i, &work->smt_mask) {
+ const struct sched_class *class;
+
+ rq = cpu_rq(i);
+
+ if (rq->core_enabled)
+ continue;
+
+ for_each_class(class) {
+ if (!class->core_sched_activate)
+ continue;
+
+ if (cpu_online(i))
+ class->core_sched_activate(rq);
+ }
+
+ rq->core_enabled = true;
+ }
+ return 0;
+}
+
+static int __deactivate_cpu_core_sched(void *data)
+{
+ struct core_sched_cpu_work *work = (struct core_sched_cpu_work *) data;
+ struct rq *rq;
+ int i;
+
+ if (cpumask_weight(&work->smt_mask) > 2)
+ return 0;
+
+ for_each_cpu(i, &work->smt_mask) {
+ const struct sched_class *class;
+
+ rq = cpu_rq(i);
+
+ if (!rq->core_enabled)
+ continue;
+
+ for_each_class(class) {
+ if (!class->core_sched_deactivate)
+ continue;
+
+ if (cpu_online(i))
+ class->core_sched_deactivate(cpu_rq(i));
+ }
+
+ rq->core_enabled = false;
+ }
+ return 0;
+}
+
#else /* !CONFIG_SCHED_CORE */
static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
+static inline void core_sched_cpu_update(unsigned int cpu, int action) { }
#endif /* CONFIG_SCHED_CORE */
@@ -6612,13 +6751,8 @@ int sched_cpu_activate(unsigned int cpu)
*/
if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
static_branch_inc_cpuslocked(&sched_smt_present);
-#ifdef CONFIG_SCHED_CORE
- if (static_branch_unlikely(&__sched_core_enabled)) {
- rq->core_enabled = true;
- }
-#endif
}
-
+ core_sched_cpu_update(cpu, CPU_ACTIVATE);
#endif
set_cpu_active(cpu, true);
@@ -6665,15 +6799,10 @@ int sched_cpu_deactivate(unsigned int cpu)
* When going down, decrement the number of cores with SMT present.
*/
if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
-#ifdef CONFIG_SCHED_CORE
- struct rq *rq = cpu_rq(cpu);
- if (static_branch_unlikely(&__sched_core_enabled)) {
- rq->core_enabled = false;
- }
-#endif
static_branch_dec_cpuslocked(&sched_smt_present);
}
+ core_sched_cpu_update(cpu, CPU_DEACTIVATE);
#endif
if (!sched_smp_initialized)
@@ -6748,9 +6877,6 @@ int sched_cpu_dying(unsigned int cpu)
update_max_interval();
nohz_balance_exit_idle(rq);
hrtick_clear(rq);
-#ifdef CONFIG_SCHED_CORE
- rq->core = NULL;
-#endif
return 0;
}
#endif
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ee7fd8611ee4..e916bba0159c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1773,6 +1773,37 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
return rb_entry(left, struct sched_dl_entity, rb_node);
}
+static void for_each_dl_task(struct rq *rq,
+ void (*fn)(struct rq *rq, struct task_struct *p))
+{
+ struct dl_rq *dl_rq = &rq->dl;
+ struct sched_dl_entity *dl_ent;
+ struct task_struct *task;
+ struct rb_node *rb_node;
+
+ rb_node = rb_first_cached(&dl_rq->root);
+ while (rb_node) {
+ dl_ent = rb_entry(rb_node, struct sched_dl_entity, rb_node);
+ task = dl_task_of(dl_ent);
+ fn(rq, task);
+ rb_node = rb_next(rb_node);
+ }
+}
+
+#ifdef CONFIG_SCHED_CORE
+
+static void core_sched_activate_dl(struct rq *rq)
+{
+ for_each_dl_task(rq, sched_core_add);
+}
+
+static void core_sched_deactivate_dl(struct rq *rq)
+{
+ for_each_dl_task(rq, sched_core_remove);
+}
+
+#endif
+
static struct task_struct *pick_task_dl(struct rq *rq)
{
struct sched_dl_entity *dl_se;
@@ -2460,6 +2491,10 @@ const struct sched_class dl_sched_class = {
.rq_online = rq_online_dl,
.rq_offline = rq_offline_dl,
.task_woken = task_woken_dl,
+#ifdef CONFIG_SCHED_CORE
+ .core_sched_activate = core_sched_activate_dl,
+ .core_sched_deactivate = core_sched_deactivate_dl,
+#endif
#endif
.task_tick = task_tick_dl,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d6c932e8d554..a9eeef896c78 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10249,6 +10249,40 @@ static void rq_offline_fair(struct rq *rq)
unthrottle_offline_cfs_rqs(rq);
}
+static void for_each_fair_task(struct rq *rq,
+ void (*fn)(struct rq *rq, struct task_struct *p))
+{
+ struct cfs_rq *cfs_rq, *pos;
+ struct sched_entity *se;
+ struct task_struct *task;
+
+ for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
+ for (se = __pick_first_entity(cfs_rq);
+ se != NULL;
+ se = __pick_next_entity(se)) {
+
+ if (!entity_is_task(se))
+ continue;
+
+ task = task_of(se);
+ fn(rq, task);
+ }
+ }
+}
+
+#ifdef CONFIG_SCHED_CORE
+
+static void core_sched_activate_fair(struct rq *rq)
+{
+ for_each_fair_task(rq, sched_core_add);
+}
+
+static void core_sched_deactivate_fair(struct rq *rq)
+{
+ for_each_fair_task(rq, sched_core_remove);
+}
+
+#endif
#endif /* CONFIG_SMP */
/*
@@ -10769,6 +10803,10 @@ const struct sched_class fair_sched_class = {
.task_dead = task_dead_fair,
.set_cpus_allowed = set_cpus_allowed_common,
+#ifdef CONFIG_SCHED_CORE
+ .core_sched_activate = core_sched_activate_fair,
+ .core_sched_deactivate = core_sched_deactivate_fair,
+#endif
#endif
.task_tick = task_tick_fair,
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d044baedc617..ccb585223fad 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1567,6 +1567,45 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
return rt_task_of(rt_se);
}
+static void for_each_rt_task(struct rq *rq,
+ void (*fn)(struct rq *rq, struct task_struct *p))
+{
+ rt_rq_iter_t iter;
+ struct rt_prio_array *array;
+ struct list_head *queue;
+ int i;
+ struct rt_rq *rt_rq = &rq->rt;
+ struct sched_rt_entity *rt_se = NULL;
+ struct task_struct *task;
+
+ for_each_rt_rq(rt_rq, iter, rq) {
+ array = &rt_rq->active;
+ for (i = 0; i < MAX_RT_PRIO; i++) {
+ queue = array->queue + i;
+ list_for_each_entry(rt_se, queue, run_list) {
+ if (rt_entity_is_task(rt_se)) {
+ task = rt_task_of(rt_se);
+ fn(rq, task);
+ }
+ }
+ }
+ }
+}
+
+#ifdef CONFIG_SCHED_CORE
+
+static void core_sched_activate_rt(struct rq *rq)
+{
+ for_each_rt_task(rq, sched_core_add);
+}
+
+static void core_sched_deactivate_rt(struct rq *rq)
+{
+ for_each_rt_task(rq, sched_core_remove);
+}
+
+#endif
+
static struct task_struct *pick_task_rt(struct rq *rq)
{
struct task_struct *p;
@@ -2384,6 +2423,10 @@ const struct sched_class rt_sched_class = {
.rq_offline = rq_offline_rt,
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
+#ifdef CONFIG_SCHED_CORE
+ .core_sched_activate = core_sched_activate_rt,
+ .core_sched_deactivate = core_sched_deactivate_rt,
+#endif
#endif
.task_tick = task_tick_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a38ae770dfd6..03d502357599 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1052,6 +1052,9 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
return &rq->__lock;
}
+void sched_core_add(struct rq *rq, struct task_struct *p);
+void sched_core_remove(struct rq *rq, struct task_struct *p);
+
#else /* !CONFIG_SCHED_CORE */
static inline bool sched_core_enabled(struct rq *rq)
@@ -1823,6 +1826,10 @@ struct sched_class {
void (*rq_online)(struct rq *rq);
void (*rq_offline)(struct rq *rq);
+#ifdef CONFIG_SCHED_CORE
+ void (*core_sched_activate)(struct rq *rq);
+ void (*core_sched_deactivate)(struct rq *rq);
+#endif
#endif
void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
--
2.17.1
Powered by blists - more mailing lists