linux-kernel - [RFC][PATCH 1/3] time/tick: Pipe tick count down through cputime accounting

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250128063301.3879317-2-jstultz@google.com>
Date: Mon, 27 Jan 2025 22:32:53 -0800
From: John Stultz <jstultz@...gle.com>
To: LKML <linux-kernel@...r.kernel.org>
Cc: John Stultz <jstultz@...gle.com>, Anna-Maria Behnsen <anna-maria@...utronix.de>, 
	Frederic Weisbecker <frederic@...nel.org>, Ingo Molnar <mingo@...nel.org>, 
	Thomas Gleixner <tglx@...utronix.de>, Peter Zijlstra <peterz@...radead.org>, 
	Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>, 
	Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt <rostedt@...dmis.org>, 
	Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>, 
	Valentin Schneider <vschneid@...hat.com>, Stephen Boyd <sboyd@...nel.org>, 
	Yury Norov <yury.norov@...il.com>, Bitao Hu <yaoma@...ux.alibaba.com>, 
	Andrew Morton <akpm@...ux-foundation.org>, kernel-team@...roid.com
Subject: [RFC][PATCH 1/3] time/tick: Pipe tick count down through cputime accounting

In working up the dynHZ patch, I found that the skipping of
ticks would result in large latencies for itimers.

As I dug into it, I realized there is still some logic that
assumes we don't miss ticks, resulting in late expiration of
cputime timers.

So this patch pipes the actual number of ticks passed down
through cputime accounting.

Cc: Anna-Maria Behnsen <anna-maria@...utronix.de>
Cc: Frederic Weisbecker <frederic@...nel.org>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Juri Lelli <juri.lelli@...hat.com>
Cc: Vincent Guittot <vincent.guittot@...aro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@....com>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Ben Segall <bsegall@...gle.com>
Cc: Mel Gorman <mgorman@...e.de>
Cc: Valentin Schneider <vschneid@...hat.com>
Cc: Stephen Boyd <sboyd@...nel.org>
Cc: Yury Norov <yury.norov@...il.com>
Cc: Bitao Hu <yaoma@...ux.alibaba.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: kernel-team@...roid.com
Signed-off-by: John Stultz <jstultz@...gle.com>
---
 include/linux/kernel_stat.h |  4 ++--
 kernel/sched/cputime.c      |  6 +++---
 kernel/time/tick-common.c   |  2 +-
 kernel/time/tick-legacy.c   |  2 +-
 kernel/time/tick-sched.c    | 29 ++++++++++++++++-------------
 kernel/time/timekeeping.h   |  2 +-
 kernel/time/timer.c         |  4 ++--
 7 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index b97ce2df376f9..4b5169cd8db04 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,12 +127,12 @@ extern void account_idle_time(u64);
 extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-static inline void account_process_tick(struct task_struct *tsk, int user)
+static inline void account_process_tick(struct task_struct *tsk, unsigned long ticks,  int user)
 {
 	vtime_flush(tsk);
 }
 #else
-extern void account_process_tick(struct task_struct *, int user);
+extern void account_process_tick(struct task_struct *, unsigned long ticks, int user);
 #endif
 
 extern void account_idle_ticks(unsigned long ticks);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 0bed0fa1acd98..9948da0d80842 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -471,7 +471,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
  * @p: the process that the CPU time gets accounted to
  * @user_tick: indicates if the tick is a user or a system tick
  */
-void account_process_tick(struct task_struct *p, int user_tick)
+void account_process_tick(struct task_struct *p, unsigned long ticks, int user_tick)
 {
 	u64 cputime, steal;
 
@@ -479,11 +479,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
 		return;
 
 	if (sched_clock_irqtime) {
-		irqtime_account_process_tick(p, user_tick, 1);
+		irqtime_account_process_tick(p, user_tick, ticks);
 		return;
 	}
 
-	cputime = TICK_NSEC;
+	cputime = ticks * TICK_NSEC;
 	steal = steal_account_process_time(ULONG_MAX);
 
 	if (steal >= cputime)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index a47bcf71defcf..ae5c5befdc58b 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -98,7 +98,7 @@ static void tick_periodic(int cpu)
 		update_wall_time();
 	}
 
-	update_process_times(user_mode(get_irq_regs()));
+	update_process_times(1, user_mode(get_irq_regs()));
 	profile_tick(CPU_PROFILING);
 }
 
diff --git a/kernel/time/tick-legacy.c b/kernel/time/tick-legacy.c
index af225b32f5b37..dbc156e69802b 100644
--- a/kernel/time/tick-legacy.c
+++ b/kernel/time/tick-legacy.c
@@ -32,6 +32,6 @@ void legacy_timer_tick(unsigned long ticks)
 		raw_spin_unlock(&jiffies_lock);
 		update_wall_time();
 	}
-	update_process_times(user_mode(get_irq_regs()));
+	update_process_times(ticks, user_mode(get_irq_regs()));
 	profile_tick(CPU_PROFILING);
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fa058510af9c1..983790923aee9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -54,7 +54,7 @@ static ktime_t last_jiffies_update;
 /*
  * Must be called with interrupts disabled !
  */
-static void tick_do_update_jiffies64(ktime_t now)
+static unsigned long tick_do_update_jiffies64(ktime_t now)
 {
 	unsigned long ticks = 1;
 	ktime_t delta, nextp;
@@ -70,7 +70,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 	 */
 	if (IS_ENABLED(CONFIG_64BIT)) {
 		if (ktime_before(now, smp_load_acquire(&tick_next_period)))
-			return;
+			return 0;
 	} else {
 		unsigned int seq;
 
@@ -84,7 +84,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 		} while (read_seqcount_retry(&jiffies_seq, seq));
 
 		if (ktime_before(now, nextp))
-			return;
+			return 0;
 	}
 
 	/* Quick check failed, i.e. update is required. */
@@ -95,7 +95,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 	 */
 	if (ktime_before(now, tick_next_period)) {
 		raw_spin_unlock(&jiffies_lock);
-		return;
+		return 0;
 	}
 
 	write_seqcount_begin(&jiffies_seq);
@@ -147,6 +147,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 
 	raw_spin_unlock(&jiffies_lock);
 	update_wall_time();
+	return ticks;
 }
 
 /*
@@ -203,10 +204,10 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts,
 
 #define MAX_STALLED_JIFFIES 5
 
-static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
+static unsigned long tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
 	int tick_cpu, cpu = smp_processor_id();
-
+	unsigned long ticks = 0;
 	/*
 	 * Check if the do_timer duty was dropped. We don't care about
 	 * concurrency: This happens only when the CPU in charge went
@@ -229,7 +230,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 
 	/* Check if jiffies need an update */
 	if (tick_cpu == cpu)
-		tick_do_update_jiffies64(now);
+		ticks = tick_do_update_jiffies64(now);
 
 	/*
 	 * If the jiffies update stalled for too long (timekeeper in stop_machine()
@@ -240,7 +241,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 		ts->last_tick_jiffies = READ_ONCE(jiffies);
 	} else {
 		if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
-			tick_do_update_jiffies64(now);
+			ticks += tick_do_update_jiffies64(now);
 			ts->stalled_jiffies = 0;
 			ts->last_tick_jiffies = READ_ONCE(jiffies);
 		}
@@ -248,9 +249,10 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 
 	if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
 		ts->got_idle_tick = 1;
+	return ticks;
 }
 
-static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
+static void tick_sched_handle(struct tick_sched *ts, unsigned long ticks, struct pt_regs *regs)
 {
 	/*
 	 * When we are idle and the tick is stopped, we have to touch
@@ -264,7 +266,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 	    tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
 		touch_softlockup_watchdog_sched();
 		if (is_idle_task(current))
-			ts->idle_jiffies++;
+			ts->idle_jiffies += ticks;
 		/*
 		 * In case the current tick fired too early past its expected
 		 * expiration, make sure we don't bypass the next clock reprogramming
@@ -273,7 +275,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 		ts->next_tick = 0;
 	}
 
-	update_process_times(user_mode(regs));
+	update_process_times(ticks, user_mode(regs));
 	profile_tick(CPU_PROFILING);
 }
 
@@ -286,15 +288,16 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
 	struct tick_sched *ts =	container_of(timer, struct tick_sched, sched_timer);
 	struct pt_regs *regs = get_irq_regs();
 	ktime_t now = ktime_get();
+	unsigned long ticks;
 
-	tick_sched_do_timer(ts, now);
+	ticks = tick_sched_do_timer(ts, now);
 
 	/*
 	 * Do not call when we are not in IRQ context and have
 	 * no valid 'regs' pointer
 	 */
 	if (regs)
-		tick_sched_handle(ts, regs);
+		tick_sched_handle(ts, ticks, regs);
 	else
 		ts->next_tick = 0;
 
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 543beba096c75..3f93af06ce5c3 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -22,7 +22,7 @@ static inline int sched_clock_suspend(void) { return 0; }
 static inline void sched_clock_resume(void) { }
 #endif
 
-extern void update_process_times(int user);
+extern void update_process_times(unsigned long ticks, int user);
 extern void do_timer(unsigned long ticks);
 extern void update_wall_time(void);
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index a5860bf6d16f9..335ed23d1cb2d 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -2509,12 +2509,12 @@ static void run_local_timers(void)
  * Called from the timer interrupt handler to charge one tick to the current
  * process.  user_tick is 1 if the tick is user time, 0 for system.
  */
-void update_process_times(int user_tick)
+void update_process_times(unsigned long ticks, int user_tick)
 {
 	struct task_struct *p = current;
 
 	/* Note: this timer irq context must be accounted for as well. */
-	account_process_tick(p, user_tick);
+	account_process_tick(p, ticks, user_tick);
 	run_local_timers();
 	rcu_sched_clock_irq(user_tick);
 #ifdef CONFIG_IRQ_WORK
-- 
2.48.1.262.g85cc9f2d1e-goog