[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <370700e7-6e93-d1ad-1215-81caccd50b5d@alu.unizg.hr>
Date: Tue, 7 Feb 2023 06:25:12 +0100
From: Mirsad Goran Todorovac <mirsad.todorovac@....unizg.hr>
To: Yu Liao <liaoyu15@...wei.com>, fweisbec@...il.com,
tglx@...utronix.de, mingo@...nel.org
Cc: liwei391@...wei.com, adobriyan@...il.com,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH RFC] tick/nohz: fix data races in get_cpu_idle_time_us()
Hi,Yu Liao,
On 28. 01. 2023. 03:00, Yu Liao wrote:
> selftest/proc/proc-uptime-001 complains:
> Euler:/mnt # while true; do ./proc-uptime-001; done
> proc-uptime-001: proc-uptime-001.c:41: main: Assertion `i1 >= i0' failed.
> proc-uptime-001: proc-uptime-001.c:41: main: Assertion `i1 >= i0' failed.
>
> /proc/uptime should be monotonically increasing. This occurs because
> the data races between get_cpu_idle_time_us and
> tick_nohz_stop_idle/tick_nohz_start_idle, for example:
>
> CPU0 CPU1
> get_cpu_idle_time_us
>
> tick_nohz_idle_exit
> now = ktime_get();
> tick_nohz_stop_idle
> update_ts_time_stats
> delta = ktime_sub(now, ts->idle_entrytime);
> ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta)
> ts->idle_entrytime = now
>
> now = ktime_get();
> if (ts->idle_active && !nr_iowait_cpu(cpu)) {
> ktime_t delta = ktime_sub(now, ts->idle_entrytime);
> idle = ktime_add(ts->idle_sleeptime, delta);
> //idle is slightly greater than the actual value
> } else {
> idle = ts->idle_sleeptime;
> }
> ts->idle_active = 0
>
> After this, idle = idle_sleeptime(actual idle value) + now(CPU0) - now(CPU1).
> If get_cpu_idle_time_us() is called immediately after ts->idle_active = 0,
> only ts->idle_sleeptime is returned, which is smaller than the previously
> read one, resulting in a non-monotonically increasing idle time. In
> addition, there are other data race scenarios not listed here.
>
> This patch introduce a lock to prevent data races.
>
> Fixes: a130e8fbc7de ("fs/proc/uptime.c: Fix idle time reporting in /proc/uptime")
> Signed-off-by: Yu Liao <liaoyu15@...wei.com>
> ---
> kernel/time/tick-sched.c | 15 ++++++++++++++-
> kernel/time/tick-sched.h | 1 +
> 2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index b0e3c9205946..ad7d47098a58 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -36,7 +36,9 @@
> /*
> * Per-CPU nohz control structure
> */
> -static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
> +static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched) = {
> + .idle_time_lock = __SPIN_LOCK_UNLOCKED(tick_cpu_sched.idle_time_lock),
> +};
>
> struct tick_sched *tick_get_tick_sched(int cpu)
> {
> @@ -661,16 +663,24 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda
>
> static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&ts->idle_time_lock, flags);
> update_ts_time_stats(smp_processor_id(), ts, now, NULL);
> ts->idle_active = 0;
> + spin_unlock_irqrestore(&ts->idle_time_lock, flags);
>
> sched_clock_idle_wakeup_event();
> }
>
> static void tick_nohz_start_idle(struct tick_sched *ts)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&ts->idle_time_lock, flags);
> ts->idle_entrytime = ktime_get();
> ts->idle_active = 1;
> + spin_unlock_irqrestore(&ts->idle_time_lock, flags);
> sched_clock_idle_sleep_event();
> }
>
> @@ -691,12 +701,14 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
> u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
> {
> struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
> + unsigned long flags;
> ktime_t now, idle;
>
> if (!tick_nohz_active)
> return -1;
>
> now = ktime_get();
> + spin_lock_irqsave(&ts->idle_time_lock, flags);
> if (last_update_time) {
> update_ts_time_stats(cpu, ts, now, last_update_time);
> idle = ts->idle_sleeptime;
> @@ -709,6 +721,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
> idle = ts->idle_sleeptime;
> }
> }
> + spin_unlock_irqrestore(&ts->idle_time_lock, flags);
>
> return ktime_to_us(idle);
>
> diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
> index 504649513399..a64d4781e7af 100644
> --- a/kernel/time/tick-sched.h
> +++ b/kernel/time/tick-sched.h
> @@ -81,6 +81,7 @@ struct tick_sched {
> atomic_t tick_dep_mask;
> unsigned long last_tick_jiffies;
> unsigned int stalled_jiffies;
> + spinlock_t idle_time_lock;
> };
>
> extern struct tick_sched *tick_get_tick_sched(int cpu);
Thank you for the patch RFC.
I can confirm that v6.2-rc7 still exhibits the issue:
marvin@...vin-IdeaPad-3-15ITL6:~/linux/kernel/linux_torvalds/tools/testing/selftests/proc$ while true; do ./proc-uptime-001; done
proc-uptime-001: proc-uptime-001.c:39: main: Assertion `i1 >= i0' failed.
Aborted (core dumped)
proc-uptime-001: proc-uptime-001.c:39: main: Assertion `i1 >= i0' failed.
Aborted (core dumped)
proc-uptime-001: proc-uptime-001.c:39: main: Assertion `i1 >= i0' failed.
Aborted (core dumped)
^C
marvin@...vin-IdeaPad-3-15ITL6:~/linux/kernel/linux_torvalds/tools/testing/selftests/proc$ uname -rms
Linux 6.2.0-rc7-mglru-kmemlk x86_64
marvin@...vin-IdeaPad-3-15ITL6:~/linux/kernel/linux_torvalds/tools/testing/selftests/proc$
Your patch, however, apparently fixes the problem.
Rebooting with the patched kernel eliminates the Assertion above, on the same hardware platform.
All the programs in the tools/testing/seltest/proc now appear to pass the test:
make[2]: Entering directory '/home/marvin/linux/kernel/linux_torvalds/tools/testing/selftests/proc'
TAP version 13
1..21
# selftests: proc: fd-001-lookup
ok 1 selftests: proc: fd-001-lookup
# selftests: proc: fd-002-posix-eq
ok 2 selftests: proc: fd-002-posix-eq
# selftests: proc: fd-003-kthread
ok 3 selftests: proc: fd-003-kthread
# selftests: proc: proc-loadavg-001
ok 4 selftests: proc: proc-loadavg-001
# selftests: proc: proc-empty-vm
ok 5 selftests: proc: proc-empty-vm
# selftests: proc: proc-pid-vm
ok 6 selftests: proc: proc-pid-vm
# selftests: proc: proc-self-map-files-001
ok 7 selftests: proc: proc-self-map-files-001
# selftests: proc: proc-self-map-files-002
ok 8 selftests: proc: proc-self-map-files-002
# selftests: proc: proc-self-syscall
ok 9 selftests: proc: proc-self-syscall
# selftests: proc: proc-self-wchan
ok 10 selftests: proc: proc-self-wchan
# selftests: proc: proc-subset-pid
ok 11 selftests: proc: proc-subset-pid
# selftests: proc: proc-tid0
ok 12 selftests: proc: proc-tid0
# selftests: proc: proc-uptime-001
ok 13 selftests: proc: proc-uptime-001
# selftests: proc: proc-uptime-002
ok 14 selftests: proc: proc-uptime-002
# selftests: proc: read
ok 15 selftests: proc: read
# selftests: proc: self
ok 16 selftests: proc: self
# selftests: proc: setns-dcache
ok 17 selftests: proc: setns-dcache
# selftests: proc: setns-sysvipc
ok 18 selftests: proc: setns-sysvipc
# selftests: proc: thread-self
ok 19 selftests: proc: thread-self
# selftests: proc: proc-multiple-procfs
ok 20 selftests: proc: proc-multiple-procfs
# selftests: proc: proc-fsconfig-hidepid
ok 21 selftests: proc: proc-fsconfig-hidepid
make[2]: Leaving directory '/home/marvin/linux/kernel/linux_torvalds/tools/testing/selftests/proc'
What was applied was:
# git diff
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b0e3c9205946..ad7d47098a58 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -36,7 +36,9 @@
/*
* Per-CPU nohz control structure
*/
-static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched) = {
+ .idle_time_lock = __SPIN_LOCK_UNLOCKED(tick_cpu_sched.idle_time_lock),
+};
struct tick_sched *tick_get_tick_sched(int cpu)
{
@@ -661,16 +663,24 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda
static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ts->idle_time_lock, flags);
update_ts_time_stats(smp_processor_id(), ts, now, NULL);
ts->idle_active = 0;
+ spin_unlock_irqrestore(&ts->idle_time_lock, flags);
sched_clock_idle_wakeup_event();
}
static void tick_nohz_start_idle(struct tick_sched *ts)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ts->idle_time_lock, flags);
ts->idle_entrytime = ktime_get();
ts->idle_active = 1;
+ spin_unlock_irqrestore(&ts->idle_time_lock, flags);
sched_clock_idle_sleep_event();
}
@@ -691,12 +701,14 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ unsigned long flags;
ktime_t now, idle;
if (!tick_nohz_active)
return -1;
now = ktime_get();
+ spin_lock_irqsave(&ts->idle_time_lock, flags);
if (last_update_time) {
update_ts_time_stats(cpu, ts, now, last_update_time);
idle = ts->idle_sleeptime;
@@ -709,6 +721,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
idle = ts->idle_sleeptime;
}
}
+ spin_unlock_irqrestore(&ts->idle_time_lock, flags);
return ktime_to_us(idle);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 504649513399..a64d4781e7af 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -81,6 +81,7 @@ struct tick_sched {
atomic_t tick_dep_mask;
unsigned long last_tick_jiffies;
unsigned int stalled_jiffies;
+ spinlock_t idle_time_lock;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);
Feel free to add: Tested-by: Mirsad Goran Todorovac <mirsad.todorovac@....unizg.hr>
Regards,
Mirsad
--
Mirsad Goran Todorovac
Sistem inženjer
Grafički fakultet | Akademija likovnih umjetnosti
Sveučilište u Zagrebu
System engineer
Faculty of Graphic Arts | Academy of Fine Arts
University of Zagreb, Republic of Croatia
The European Union
Powered by blists - more mailing lists