[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250220093257.9380-7-kprateek.nayak@amd.com>
Date: Thu, 20 Feb 2025 09:32:41 +0000
From: K Prateek Nayak <kprateek.nayak@....com>
To: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot
<vincent.guittot@...aro.org>, Valentin Schneider <vschneid@...hat.com>, "Ben
Segall" <bsegall@...gle.com>, Thomas Gleixner <tglx@...utronix.de>, "Andy
Lutomirski" <luto@...nel.org>, <linux-kernel@...r.kernel.org>
CC: Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt
<rostedt@...dmis.org>, Mel Gorman <mgorman@...e.de>, "Sebastian Andrzej
Siewior" <bigeasy@...utronix.de>, Clark Williams <clrkwllms@...nel.org>,
<linux-rt-devel@...ts.linux.dev>, Tejun Heo <tj@...nel.org>, "Frederic
Weisbecker" <frederic@...nel.org>, Barret Rhoden <brho@...gle.com>, "Petr
Mladek" <pmladek@...e.com>, Josh Don <joshdon@...gle.com>, Qais Yousef
<qyousef@...alina.io>, "Paul E. McKenney" <paulmck@...nel.org>, David Vernet
<dvernet@...a.com>, K Prateek Nayak <kprateek.nayak@....com>, "Gautham R.
Shenoy" <gautham.shenoy@....com>, Swapnil Sapkal <swapnil.sapkal@....com>
Subject: [RFC PATCH 06/22] sched/fair: Propagate the min_vruntime of kernel mode preempted entity
Propagate the min_vruntime of the kernel mode preempted entity to the
root of the cfs_rq's rbtree. This will be soon used to pick amongst the
kernel mode entities on a throttled hierarchy using the similar min-heap
approach that pick_eevdf() currently implements.
Signed-off-by: K Prateek Nayak <kprateek.nayak@....com>
---
include/linux/sched.h | 6 ++++++
kernel/sched/fair.c | 47 ++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 63f3f235a5c1..4bb7e45758f4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -593,6 +593,12 @@ struct sched_entity {
*/
int kernel_cs_count;
/* hole */
+
+ /*
+ * min_vruntime of the kernel mode preempted entities
+ * in the subtree of this sched entity.
+ */
+ s64 min_kcs_vruntime;
#endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cbb7a227afe7..ba1bd60ce433 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -828,6 +828,9 @@ static inline void __min_slice_update(struct sched_entity *se, struct rb_node *n
}
}
+static __always_inline void init_se_kcs_stats(struct sched_entity *se);
+static inline bool min_kcs_vruntime_update(struct sched_entity *se);
+
/*
* se->min_vruntime = min(se->vruntime, {left,right}->min_vruntime)
*/
@@ -836,6 +839,7 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit)
u64 old_min_vruntime = se->min_vruntime;
u64 old_min_slice = se->min_slice;
struct rb_node *node = &se->run_node;
+ bool kcs_stats_unchanged = min_kcs_vruntime_update(se);
se->min_vruntime = se->vruntime;
__min_vruntime_update(se, node->rb_right);
@@ -846,7 +850,8 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit)
__min_slice_update(se, node->rb_left);
return se->min_vruntime == old_min_vruntime &&
- se->min_slice == old_min_slice;
+ se->min_slice == old_min_slice &&
+ kcs_stats_unchanged;
}
RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
@@ -858,6 +863,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
avg_vruntime_add(cfs_rq, se);
+ init_se_kcs_stats(se);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -6778,6 +6784,39 @@ static __always_inline void avg_kcs_vruntime_update(struct cfs_rq *cfs_rq, s64 d
cfs_rq->avg_kcs_vruntime -= cfs_rq->avg_kcs_load * delta;
}
+static __always_inline void init_se_kcs_stats(struct sched_entity *se)
+{
+ /*
+ * With the introduction of EEVDF, the vruntime of entities can go negative when
+ * a lagging entity joins a runqueue with avg_vruntime < vlag. Use LLONG_MAX as
+ * the upper bound to differentiate the case where no kernel mode preempted
+ * entities are queued on the subtree.
+ */
+ se->min_kcs_vruntime = (se_in_kernel(se)) ? se->vruntime : LLONG_MAX;
+}
+
+static inline void __min_kcs_vruntime_update(struct sched_entity *se, struct rb_node *node)
+{
+ if (node) {
+ struct sched_entity *rse = __node_2_se(node);
+
+ if (rse->min_kcs_vruntime < se->min_kcs_vruntime)
+ se->min_kcs_vruntime = rse->min_kcs_vruntime;
+ }
+}
+
+static inline bool min_kcs_vruntime_update(struct sched_entity *se)
+{
+ u64 old_min_kcs_vruntime = se->min_kcs_vruntime;
+ struct rb_node *node = &se->run_node;
+
+ init_se_kcs_stats(se);
+ __min_kcs_vruntime_update(se, node->rb_right);
+ __min_kcs_vruntime_update(se, node->rb_left);
+
+ return se->min_kcs_vruntime == old_min_kcs_vruntime;
+}
+
#ifdef CONFIG_NO_HZ_FULL
/* called from pick_next_task_fair() */
static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
@@ -6853,6 +6892,12 @@ __always_inline void sched_notify_critical_section_exit(void) {}
static __always_inline void avg_kcs_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
static __always_inline void avg_kcs_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
static __always_inline void avg_kcs_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) {}
+static __always_inline void init_se_kcs_stats(struct sched_entity *se) {}
+
+static inline bool min_kcs_vruntime_update(struct sched_entity *se)
+{
+ return true;
+}
#endif /* CONFIG_CFS_BANDWIDTH */
--
2.43.0
Powered by blists - more mailing lists