lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAO7JXPhWvLaaGqCGUZ_YCuja2T1ciWZoUnsUDnNPQ2b4yDB2Jw@mail.gmail.com>
Date: Fri, 21 Jun 2024 10:41:35 -0400
From: Vineeth Remanan Pillai <vineeth@...byteword.org>
To: Daniel Bristot de Oliveira <bristot@...nel.org>
Cc: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>, 
	Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>, 
	Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt <rostedt@...dmis.org>, 
	Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>, 
	Daniel Bristot de Oliveira <bristot@...hat.com>, Valentin Schneider <vschneid@...hat.com>, linux-kernel@...r.kernel.org, 
	Luca Abeni <luca.abeni@...tannapisa.it>, 
	Tommaso Cucinotta <tommaso.cucinotta@...tannapisa.it>, Thomas Gleixner <tglx@...utronix.de>, 
	Joel Fernandes <joel@...lfernandes.org>, Shuah Khan <skhan@...uxfoundation.org>, 
	Phil Auld <pauld@...hat.com>, Suleiman Souhlal <suleiman@...gle.com>, 
	Youssef Esmat <youssefesmat@...gle.com>
Subject: Re: [PATCH V7 0/9] SCHED_DEADLINE server infrastructure

Hi Daniel,


>
> This is v7 of Peter's SCHED_DEADLINE server infrastructure
> implementation [1].
>
Thanks for the v7 :-)

Sorry that I could not get to reviewing and testing this revision. In
v6 we had experienced a minor bug where suspend/resume had issues with
dlserver. Since suspend does not do dequeue, dlserver is not stopped
and this causes the premature wakeups. I haven't looked at v7 in
detail, but I think the issue might still be present. We have a
workaround patch for this in our 5.15 kernel based on v5 which I am
attaching for reference. This might not apply cleanly on v7 and
possibly not be the best solution, but thought of sharing it to give
an insight about the issue.

Thanks,
Vineeth

Attached patch
-----------------------

Subject: [PATCH] sched/dlserver: Freeze dlserver on system suspend.

dlserver is stopped only if a dequeue or cfs rq throttle results in no
runnable cfs tasks. But this doesn't happen during a system suspend and
can cause the dl server to stay active and break suspend/resume.

Freeze the dlserver on system suspend. Freezing is stopping the
dlserver, but maintaining the dl_server_active state so as to not
confuse the enqueue/dequeue path.

Signed-off-by: Vineeth Pillai (Google) <vineeth@...byteword.org>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/5528103
Reviewed-by: Suleiman Souhlal <suleiman@...gle.com>
Tested-by: Vineeth Pillai <vineethrp@...gle.com>
Commit-Queue: Vineeth Pillai <vineethrp@...gle.com>
---
 include/linux/sched.h   | 27 +++++++++++++
 kernel/power/suspend.c  |  3 ++
 kernel/sched/deadline.c | 87 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 110 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 123ef7804d95..23beff5e48a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -650,6 +650,15 @@ struct sched_dl_entity {
        unsigned int                    dl_defer          : 1;
        unsigned int                    dl_defer_armed    : 1;
        unsigned int                    dl_server_active  : 1;
+       /*
+        * dl_server is marked as frozen when the system suspends. Frozen
+        * means that dl_server is stopped, but the dl_server_active state
+        * is maintained so that the enqueue/dequeue path is not confused.
+        * We need this separate state other than dl_server_active because
+        * suspend doesn't dequeue the tasks and hence does not stop the
+        * dl_server during suspend. And this may lead to spurious resumes.
+        */
+       unsigned int                    dl_server_frozen  : 1;

        /*
         * Bandwidth enforcement timer. Each -deadline task has its
@@ -690,6 +699,24 @@ struct sched_dl_entity {
 #endif
 };

+/*
+ * Power management related actions for dl_server
+ */
+enum dl_server_pm_action {
+       dl_server_pm_freeze = 0,
+       dl_server_pm_thaw = 1
+};
+extern void freeze_thaw_dl_server(enum dl_server_pm_action action);
+static inline void freeze_dl_server(void)
+{
+       freeze_thaw_dl_server(dl_server_pm_freeze);
+}
+static inline void thaw_dl_server(void)
+{
+       freeze_thaw_dl_server(dl_server_pm_thaw);
+}
+
+
 #ifdef CONFIG_UCLAMP_TASK
 /* Number of utilization clamp buckets (shorter alias) */
 #define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 55235bf52c7e..a6d5f8f3072e 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -592,6 +592,8 @@ static int enter_state(suspend_state_t state)
        if (error)
                goto Unlock;

+       freeze_dl_server();
+
        if (suspend_test(TEST_FREEZER))
                goto Finish;

@@ -602,6 +604,7 @@ static int enter_state(suspend_state_t state)
        pm_restore_gfp_mask();

  Finish:
+       thaw_dl_server();
        events_check_enabled = false;
        pm_pr_dbg("Finishing wakeup.\n");
        suspend_finish();
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5fc40caf20d7..f95a375af329 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1478,10 +1478,11 @@ static void update_curr_dl_se(struct rq *rq,
struct sched_dl_entity *dl_se, s64

 void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
 {
-       update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
+       if (!dl_se->dl_server_frozen)
+               update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
 }

-void dl_server_start(struct sched_dl_entity *dl_se)
+static inline void __dl_server_start(struct sched_dl_entity *dl_se)
 {
        /*
         * XXX: the apply do not work fine at the init phase for the
@@ -1500,25 +1501,97 @@ void dl_server_start(struct sched_dl_entity *dl_se)
                setup_new_dl_entity(dl_se);
        }

+       enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+}
+
+void dl_server_start(struct sched_dl_entity *dl_se)
+{
+       if (dl_se->dl_server_frozen)
+               goto set_active;
+
        if (WARN_ON_ONCE(dl_se->dl_server_active))
                return;

-       enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+       __dl_server_start(dl_se);
+
+set_active:
        dl_se->dl_server_active = 1;
 }

-void dl_server_stop(struct sched_dl_entity *dl_se)
+static inline void __dl_server_stop(struct sched_dl_entity *dl_se)
 {
-       if (WARN_ON_ONCE(!dl_se->dl_server_active))
-               return;
-
        dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
        hrtimer_try_to_cancel(&dl_se->dl_timer);
        dl_se->dl_defer_armed = 0;
        dl_se->dl_throttled = 0;
+}
+
+void dl_server_stop(struct sched_dl_entity *dl_se)
+{
+       if (dl_se->dl_server_frozen)
+               goto reset_active;
+
+       if (WARN_ON_ONCE(!dl_se->dl_server_active))
+               return;
+
+       __dl_server_stop(dl_se);
+
+reset_active:
        dl_se->dl_server_active = 0;
 }

+void dl_server_freeze(struct sched_dl_entity *dl_se)
+{
+       if (dl_se->dl_server_active) {
+               update_rq_clock(dl_se->rq);
+               __dl_server_stop(dl_se);
+       }
+       dl_se->dl_server_frozen = 1;
+}
+
+void dl_server_thaw(struct sched_dl_entity *dl_se)
+{
+       if (dl_se->dl_server_active) {
+               update_rq_clock(dl_se->rq);
+               __dl_server_start(dl_se);
+       }
+       dl_se->dl_server_frozen = 0;
+}
+
+void freeze_thaw_dl_server(enum dl_server_pm_action action)
+{
+       int cpu;
+
+       cpus_read_lock();
+       for_each_online_cpu(cpu) {
+               struct rq_flags rf;
+               struct rq *rq = cpu_rq(cpu);
+               struct sched_dl_entity *dl_se;
+
+               sched_clock_tick();
+               rq_lock_irqsave(rq, &rf);
+               dl_se = &rq->fair_server;
+               switch (action) {
+               case dl_server_pm_freeze:
+                       if (WARN_ON_ONCE(dl_se->dl_server_frozen))
+                               break;
+
+                       dl_server_freeze(dl_se);
+                       break;
+               case dl_server_pm_thaw:
+                       if (WARN_ON_ONCE(!dl_se->dl_server_frozen))
+                               break;
+
+
+                       dl_server_thaw(dl_se);
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+               }
+               rq_unlock_irqrestore(rq, &rf);
+       }
+       cpus_read_unlock();
+}
+
 void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
                    dl_server_has_tasks_f has_tasks,
                    dl_server_pick_f pick_next,
-- 
2.45.2.741.gdbec12cfda-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ