[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230210140917.279062-4-frederic@kernel.org>
Date: Fri, 10 Feb 2023 15:09:14 +0100
From: Frederic Weisbecker <frederic@...nel.org>
To: LKML <linux-kernel@...r.kernel.org>
Cc: Frederic Weisbecker <frederic@...nel.org>,
Alexey Dobriyan <adobriyan@...il.com>,
Peter Zijlstra <peterz@...radead.org>,
Wei Li <liwei391@...wei.com>,
Mirsad Goran Todorovac <mirsad.todorovac@....unizg.hr>,
Thomas Gleixner <tglx@...utronix.de>,
Yu Liao <liaoyu15@...wei.com>, Hillf Danton <hdanton@...a.com>,
Ingo Molnar <mingo@...nel.org>
Subject: [PATCH 3/6] timers/nohz: Protect idle/iowait sleep time under seqcount
Reading idle/io sleep time (eg: from /proc/stat) can race with idle exit
updates because the state machine handling the stats is not atomic and
requires a coherent read batch.
As a result reading the sleep time may report irrelevant or backward
values.
Fix this with protecting the simple state machine within a seqcount.
This is expected to be cheap enough not to add measurable performance
impact on the idle path.
Note this only fixes reader VS writer condition partitially. A race
remains that involves remote updates of the CPU iowait task counter. It
can hardly be fixed.
Reported-by: Yu Liao <liaoyu15@...wei.com>
Cc: Hillf Danton <hdanton@...a.com>
Cc: Yu Liao <liaoyu15@...wei.com>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Wei Li <liwei391@...wei.com>
Cc: Alexey Dobriyan <adobriyan@...il.com>
Cc: Mirsad Goran Todorovac <mirsad.todorovac@....unizg.hr>
Cc: Peter Zijlstra <peterz@...radead.org>
Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
---
kernel/time/tick-sched.c | 22 ++++++++++++++++------
kernel/time/tick-sched.h | 1 +
2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index db22342f8948..757e03ef4409 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -646,6 +646,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
delta = ktime_sub(now, ts->idle_entrytime);
+ write_seqcount_begin(&ts->idle_sleeptime_seq);
if (nr_iowait_cpu(smp_processor_id()) > 0)
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
else
@@ -653,14 +654,18 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
ts->idle_entrytime = now;
ts->idle_active = 0;
+ write_seqcount_end(&ts->idle_sleeptime_seq);
sched_clock_idle_wakeup_event();
}
static void tick_nohz_start_idle(struct tick_sched *ts)
{
+ write_seqcount_begin(&ts->idle_sleeptime_seq);
ts->idle_entrytime = ktime_get();
ts->idle_active = 1;
+ write_seqcount_end(&ts->idle_sleeptime_seq);
+
sched_clock_idle_sleep_event();
}
@@ -669,6 +674,7 @@ static u64 get_cpu_sleep_time_us(int cpu, ktime_t *sleeptime,
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t now, idle;
+ unsigned int seq;
if (!tick_nohz_active)
return -1;
@@ -677,13 +683,17 @@ static u64 get_cpu_sleep_time_us(int cpu, ktime_t *sleeptime,
if (last_update_time)
*last_update_time = ktime_to_us(now);
- if (ts->idle_active && !nr_iowait_cpu(cpu)) {
- ktime_t delta = ktime_sub(now, ts->idle_entrytime);
+ do {
+ seq = read_seqcount_begin(&ts->idle_sleeptime_seq);
- idle = ktime_add(*sleeptime, delta);
- } else {
- idle = *sleeptime;
- }
+ if (ts->idle_active && !nr_iowait_cpu(cpu)) {
+ ktime_t delta = ktime_sub(now, ts->idle_entrytime);
+
+ idle = ktime_add(*sleeptime, delta);
+ } else {
+ idle = *sleeptime;
+ }
+ } while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq));
return ktime_to_us(idle);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index c6663254d17d..5ed5a9d41d5a 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -75,6 +75,7 @@ struct tick_sched {
ktime_t idle_waketime;
/* Idle entry */
+ seqcount_t idle_sleeptime_seq;
ktime_t idle_entrytime;
/* Tick stop */
--
2.34.1
Powered by blists - more mailing lists