[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250929092221.10947-13-yurand2000@gmail.com>
Date: Mon, 29 Sep 2025 11:22:09 +0200
From: Yuri Andriaccio <yurand2000@...il.com>
To: Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>
Cc: linux-kernel@...r.kernel.org,
Luca Abeni <luca.abeni@...tannapisa.it>,
Yuri Andriaccio <yuri.andriaccio@...tannapisa.it>
Subject: [RFC PATCH v3 12/24] sched/rt: Update task event callbacks for HCBS scheduling
Update wakeup_preempt_rt, switched_{from/to}_rt and prio_changed_rt with
rt-cgroup's specific preemption rules.
Add checks whether a rt-task can be attached or not to a rt-cgroup.
Update task_is_throttled_rt for SCHED_CORE.
Co-developed-by: Alessio Balsini <a.balsini@...up.it>
Signed-off-by: Alessio Balsini <a.balsini@...up.it>
Co-developed-by: Andrea Parri <parri.andrea@...il.com>
Signed-off-by: Andrea Parri <parri.andrea@...il.com>
Co-developed-by: luca abeni <luca.abeni@...tannapisa.it>
Signed-off-by: luca abeni <luca.abeni@...tannapisa.it>
Signed-off-by: Yuri Andriaccio <yurand2000@...il.com>
---
kernel/sched/core.c | 2 +-
kernel/sched/rt.c | 88 ++++++++++++++++++++++++++++++++++++++---
kernel/sched/syscalls.c | 13 ++++++
3 files changed, 96 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e5b4facee24..2cfbe3b7b17 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9346,7 +9346,7 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
goto scx_check;
cgroup_taskset_for_each(task, css, tset) {
- if (!sched_rt_can_attach(css_tg(css), task))
+ if (rt_task(task) && !sched_rt_can_attach(css_tg(css), task))
return -EINVAL;
}
scx_check:
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d9442f64c6b..ce114823fe7 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -946,6 +946,50 @@ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct task_struct *donor = rq->donor;
+ if (!rt_group_sched_enabled())
+ goto no_group_sched;
+
+ /*
+ * Preemption checks are different if the waking task and the current
+ * task are running on the global runqueue or in a cgroup.
+ * The following rules apply:
+ * - dl-tasks (and equally dl_servers) always preempt FIFO/RR tasks.
+ * - if curr is inside a cgroup (i.e. run by a dl_server) and
+ * waking is not, do nothing.
+ * - if waking is inside a cgroup but not curr, always reschedule.
+ * - if they are both on the global runqueue, run the standard code.
+ * - if they are both in the same cgroup, check for tasks priorities.
+ * - if they are both in a cgroup, but not the same one, check whether
+ * the woken task's dl_server preempts the current's dl_server.
+ */
+ if (is_dl_group(rt_rq_of_se(&p->rt)) &&
+ is_dl_group(rt_rq_of_se(&rq->curr->rt))) {
+ struct sched_dl_entity *woken_dl_se, *curr_dl_se;
+
+ woken_dl_se = dl_group_of(rt_rq_of_se(&p->rt));
+ curr_dl_se = dl_group_of(rt_rq_of_se(&rq->curr->rt));
+
+ if (rt_rq_of_se(&p->rt)->tg == rt_rq_of_se(&rq->curr->rt)->tg) {
+ if (p->prio < rq->curr->prio)
+ resched_curr(rq);
+
+ return;
+ }
+
+ if (dl_entity_preempt(woken_dl_se, curr_dl_se))
+ resched_curr(rq);
+
+ return;
+
+ } else if (is_dl_group(rt_rq_of_se(&p->rt))) {
+ resched_curr(rq);
+ return;
+
+ } else if (is_dl_group(rt_rq_of_se(&rq->curr->rt))) {
+ return;
+ }
+
+no_group_sched:
if (p->prio < donor->prio) {
resched_curr(rq);
return;
@@ -1705,6 +1749,8 @@ static void rq_offline_rt(struct rq *rq)
*/
static void switched_from_rt(struct rq *rq, struct task_struct *p)
{
+ struct rt_rq *rt_rq = rt_rq_of_se(&p->rt);
+
/*
* If there are other RT tasks then we will reschedule
* and the scheduling of the other RT tasks will handle
@@ -1712,10 +1758,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
* we may need to handle the pulling of RT tasks
* now.
*/
- if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
+ if (!task_on_rq_queued(p) || rt_rq->rt_nr_running)
return;
- rt_queue_pull_task(rt_rq_of_se(&p->rt));
+ if (!IS_ENABLED(CONFIG_RT_GROUP_SCHED))
+ rt_queue_pull_task(rt_rq);
}
void __init init_sched_rt_class(void)
@@ -1750,8 +1797,17 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
* then see if we can move to another run queue.
*/
if (task_on_rq_queued(p)) {
+
+#ifndef CONFIG_RT_GROUP_SCHED
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
rt_queue_push_tasks(rt_rq_of_se(&p->rt));
+#else
+ if (rt_rq_of_se(&p->rt)->overloaded) {
+ } else {
+ if (p->prio < rq->curr->prio)
+ resched_curr(rq);
+ }
+#endif
if (p->prio < rq->donor->prio && cpu_online(cpu_of(rq)))
resched_curr(rq);
}
@@ -1764,6 +1820,8 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
static void
prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
{
+ struct rt_rq *rt_rq = rt_rq_of_se(&p->rt);
+
if (!task_on_rq_queued(p))
return;
@@ -1772,16 +1830,25 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
* If our priority decreases while running, we
* may need to pull tasks to this runqueue.
*/
- if (oldprio < p->prio)
- rt_queue_pull_task(rt_rq_of_se(&p->rt));
+ if (!IS_ENABLED(CONFIG_RT_GROUP_SCHED) && oldprio < p->prio)
+ rt_queue_pull_task(rt_rq);
/*
* If there's a higher priority task waiting to run
* then reschedule.
*/
- if (p->prio > rq->rt.highest_prio.curr)
+ if (p->prio > rt_rq->highest_prio.curr)
resched_curr(rq);
} else {
+ /*
+ * This task is not running, thus we check against the currently
+ * running task for preemption. We can preempt only if both tasks are
+ * in the same cgroup or on the global runqueue.
+ */
+ if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) &&
+ rt_rq_of_se(&p->rt)->tg != rt_rq_of_se(&rq->curr->rt)->tg)
+ return;
+
/*
* This task is not running, but if it is
* greater than the current running task
@@ -1876,7 +1943,16 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
#ifdef CONFIG_SCHED_CORE
static int task_is_throttled_rt(struct task_struct *p, int cpu)
{
+#ifdef CONFIG_RT_GROUP_SCHED
+ struct rt_rq *rt_rq;
+
+ rt_rq = task_group(p)->rt_rq[cpu];
+ WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
+
+ return dl_group_of(rt_rq)->dl_throttled;
+#else
return 0;
+#endif
}
#endif /* CONFIG_SCHED_CORE */
@@ -2131,7 +2207,7 @@ static int sched_rt_global_constraints(void)
int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
{
/* Don't accept real-time tasks when there is no way for them to run */
- if (rt_group_sched_enabled() && rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
+ if (rt_group_sched_enabled() && tg->dl_bandwidth.dl_runtime == 0)
return 0;
return 1;
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 93a9c03b28e..71f20be6f29 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -626,6 +626,19 @@ int __sched_setscheduler(struct task_struct *p,
change:
if (user) {
+#ifdef CONFIG_RT_GROUP_SCHED
+ /*
+ * Do not allow real-time tasks into groups that have no runtime
+ * assigned.
+ */
+ if (rt_group_sched_enabled() &&
+ dl_bandwidth_enabled() && rt_policy(policy) &&
+ !sched_rt_can_attach(task_group(p), p) &&
+ !task_group_is_autogroup(task_group(p))) {
+ retval = -EPERM;
+ goto unlock;
+ }
+#endif
if (dl_bandwidth_enabled() && dl_policy(policy) &&
!(attr->sched_flags & SCHED_FLAG_SUGOV)) {
cpumask_t *span = rq->rd->span;
--
2.51.0
Powered by blists - more mailing lists