[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAO7JXPhWvLaaGqCGUZ_YCuja2T1ciWZoUnsUDnNPQ2b4yDB2Jw@mail.gmail.com>
Date: Fri, 21 Jun 2024 10:41:35 -0400
From: Vineeth Remanan Pillai <vineeth@...byteword.org>
To: Daniel Bristot de Oliveira <bristot@...nel.org>
Cc: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
Daniel Bristot de Oliveira <bristot@...hat.com>, Valentin Schneider <vschneid@...hat.com>, linux-kernel@...r.kernel.org,
Luca Abeni <luca.abeni@...tannapisa.it>,
Tommaso Cucinotta <tommaso.cucinotta@...tannapisa.it>, Thomas Gleixner <tglx@...utronix.de>,
Joel Fernandes <joel@...lfernandes.org>, Shuah Khan <skhan@...uxfoundation.org>,
Phil Auld <pauld@...hat.com>, Suleiman Souhlal <suleiman@...gle.com>,
Youssef Esmat <youssefesmat@...gle.com>
Subject: Re: [PATCH V7 0/9] SCHED_DEADLINE server infrastructure
Hi Daniel,
>
> This is v7 of Peter's SCHED_DEADLINE server infrastructure
> implementation [1].
>
Thanks for the v7 :-)
Sorry that I could not get to reviewing and testing this revision. In
v6 we had experienced a minor bug where suspend/resume had issues with
dlserver. Since suspend does not do dequeue, dlserver is not stopped
and this causes the premature wakeups. I haven't looked at v7 in
detail, but I think the issue might still be present. We have a
workaround patch for this in our 5.15 kernel based on v5 which I am
attaching for reference. This might not apply cleanly on v7 and
possibly not be the best solution, but thought of sharing it to give
an insight about the issue.
Thanks,
Vineeth
Attached patch
-----------------------
Subject: [PATCH] sched/dlserver: Freeze dlserver on system suspend.
dlserver is stopped only if a dequeue or cfs rq throttle results in no
runnable cfs tasks. But this doesn't happen during a system suspend and
can cause the dl server to stay active and break suspend/resume.
Freeze the dlserver on system suspend. Freezing is stopping the
dlserver, but maintaining the dl_server_active state so as to not
confuse the enqueue/dequeue path.
Signed-off-by: Vineeth Pillai (Google) <vineeth@...byteword.org>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/5528103
Reviewed-by: Suleiman Souhlal <suleiman@...gle.com>
Tested-by: Vineeth Pillai <vineethrp@...gle.com>
Commit-Queue: Vineeth Pillai <vineethrp@...gle.com>
---
include/linux/sched.h | 27 +++++++++++++
kernel/power/suspend.c | 3 ++
kernel/sched/deadline.c | 87 +++++++++++++++++++++++++++++++++++++----
3 files changed, 110 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 123ef7804d95..23beff5e48a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -650,6 +650,15 @@ struct sched_dl_entity {
unsigned int dl_defer : 1;
unsigned int dl_defer_armed : 1;
unsigned int dl_server_active : 1;
+ /*
+ * dl_server is marked as frozen when the system suspends. Frozen
+ * means that dl_server is stopped, but the dl_server_active state
+ * is maintained so that the enqueue/dequeue path is not confused.
+ * We need this separate state other than dl_server_active because
+ * suspend doesn't dequeue the tasks and hence does not stop the
+ * dl_server during suspend. And this may lead to spurious resumes.
+ */
+ unsigned int dl_server_frozen : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
@@ -690,6 +699,24 @@ struct sched_dl_entity {
#endif
};
+/*
+ * Power management related actions for dl_server
+ */
+enum dl_server_pm_action {
+ dl_server_pm_freeze = 0,
+ dl_server_pm_thaw = 1
+};
+extern void freeze_thaw_dl_server(enum dl_server_pm_action action);
+static inline void freeze_dl_server(void)
+{
+ freeze_thaw_dl_server(dl_server_pm_freeze);
+}
+static inline void thaw_dl_server(void)
+{
+ freeze_thaw_dl_server(dl_server_pm_thaw);
+}
+
+
#ifdef CONFIG_UCLAMP_TASK
/* Number of utilization clamp buckets (shorter alias) */
#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 55235bf52c7e..a6d5f8f3072e 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -592,6 +592,8 @@ static int enter_state(suspend_state_t state)
if (error)
goto Unlock;
+ freeze_dl_server();
+
if (suspend_test(TEST_FREEZER))
goto Finish;
@@ -602,6 +604,7 @@ static int enter_state(suspend_state_t state)
pm_restore_gfp_mask();
Finish:
+ thaw_dl_server();
events_check_enabled = false;
pm_pr_dbg("Finishing wakeup.\n");
suspend_finish();
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5fc40caf20d7..f95a375af329 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1478,10 +1478,11 @@ static void update_curr_dl_se(struct rq *rq,
struct sched_dl_entity *dl_se, s64
void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
{
- update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
+ if (!dl_se->dl_server_frozen)
+ update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}
-void dl_server_start(struct sched_dl_entity *dl_se)
+static inline void __dl_server_start(struct sched_dl_entity *dl_se)
{
/*
* XXX: the apply do not work fine at the init phase for the
@@ -1500,25 +1501,97 @@ void dl_server_start(struct sched_dl_entity *dl_se)
setup_new_dl_entity(dl_se);
}
+ enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+}
+
+void dl_server_start(struct sched_dl_entity *dl_se)
+{
+ if (dl_se->dl_server_frozen)
+ goto set_active;
+
if (WARN_ON_ONCE(dl_se->dl_server_active))
return;
- enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+ __dl_server_start(dl_se);
+
+set_active:
dl_se->dl_server_active = 1;
}
-void dl_server_stop(struct sched_dl_entity *dl_se)
+static inline void __dl_server_stop(struct sched_dl_entity *dl_se)
{
- if (WARN_ON_ONCE(!dl_se->dl_server_active))
- return;
-
dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
hrtimer_try_to_cancel(&dl_se->dl_timer);
dl_se->dl_defer_armed = 0;
dl_se->dl_throttled = 0;
+}
+
+void dl_server_stop(struct sched_dl_entity *dl_se)
+{
+ if (dl_se->dl_server_frozen)
+ goto reset_active;
+
+ if (WARN_ON_ONCE(!dl_se->dl_server_active))
+ return;
+
+ __dl_server_stop(dl_se);
+
+reset_active:
dl_se->dl_server_active = 0;
}
+void dl_server_freeze(struct sched_dl_entity *dl_se)
+{
+ if (dl_se->dl_server_active) {
+ update_rq_clock(dl_se->rq);
+ __dl_server_stop(dl_se);
+ }
+ dl_se->dl_server_frozen = 1;
+}
+
+void dl_server_thaw(struct sched_dl_entity *dl_se)
+{
+ if (dl_se->dl_server_active) {
+ update_rq_clock(dl_se->rq);
+ __dl_server_start(dl_se);
+ }
+ dl_se->dl_server_frozen = 0;
+}
+
+void freeze_thaw_dl_server(enum dl_server_pm_action action)
+{
+ int cpu;
+
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ struct rq_flags rf;
+ struct rq *rq = cpu_rq(cpu);
+ struct sched_dl_entity *dl_se;
+
+ sched_clock_tick();
+ rq_lock_irqsave(rq, &rf);
+ dl_se = &rq->fair_server;
+ switch (action) {
+ case dl_server_pm_freeze:
+ if (WARN_ON_ONCE(dl_se->dl_server_frozen))
+ break;
+
+ dl_server_freeze(dl_se);
+ break;
+ case dl_server_pm_thaw:
+ if (WARN_ON_ONCE(!dl_se->dl_server_frozen))
+ break;
+
+
+ dl_server_thaw(dl_se);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ rq_unlock_irqrestore(rq, &rf);
+ }
+ cpus_read_unlock();
+}
+
void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_next,
--
2.45.2.741.gdbec12cfda-goog
Powered by blists - more mailing lists