lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:   Thu, 29 Oct 2020 15:16:58 +0800
From:   Aubrey Li <aubrey.li@...ux.intel.com>
To:     joel@...lfernandes.org, naravamudan@...italocean.com,
        jdesfossez@...italocean.com, peterz@...radead.org,
        tim.c.chen@...ux.intel.com, viremana@...ux.microsoft.com,
        aaron.lwe@...il.com, tglx@...utronix.de,
        linux-kernel@...r.kernel.org
Cc:     mingo@...nel.org, torvalds@...ux-foundation.org,
        fweisbec@...il.com, keescook@...omium.org, kerrnel@...gle.com,
        pauld@...hat.com, valentin.schneider@....com,
        mgorman@...hsingularity.net, pawan.kumar.gupta@...ux.intel.com,
        pbonzini@...hat.com, vineeth@...byteword.org, yu.c.chen@...el.com,
        christian.brauner@...ntu.com, agata.gruza@...el.com,
        antonio.gomez.iglesias@...el.com, graf@...zon.com,
        konrad.wilk@...cle.com, dfaggioli@...e.com, pjt@...gle.com,
        rostedt@...dmis.org, derkling@...gle.com, benbjiang@...cent.com,
        alexandre.chartre@...cle.com,
        James.Bottomley@...senpartnership.com, OWeisse@...ch.edu,
        dhaval.giani@...cle.com, junaids@...gle.com, jsbarnes@...gle.com,
        chris.hyser@...cle.com, paulmck@...nel.org,
        Aubrey Li <aubrey.li@...ux.intel.com>
Subject: [PATCH v1] coresched/proc: add forceidle report with coresched enabled

When a CPU is running a task with coresched enabled, its sibling will
be forced idle if the sibling does not have a trusted task to run. It
is useful to report forceidle to understand the performance of different
cookies of tasks throughout the system.

forceidle is added at the last column of /proc/stat:

  $ cat /proc/stat
  cpu  102034 0 11992 8347016 1046 0 11 0 0 0 991
  cpu0 59 0 212 80364 59 0 0 0 0 0 0
  cpu1 72057 0 89 9102 0 0 0 0 0 0 90

So forceidle% can be computed by any user space tools, for example:

  CPU	user%	system%	iowait%	forceidle%	idle%
  cpu53	24.75	0.00	0.00%	0.99%		74.26%
  CPU	user%	system%	iowait%	forceidle%	idle%
  cpu53	25.74	0.00	0.00%	0.99%		73.27%
  CPU	user%	system%	iowait%	forceidle%	idle%
  cpu53	24.75	0.00	0.00%	0.99%		74.26%
  CPU	user%	system%	iowait%	forceidle%	idle%
  cpu53	25.24	0.00	0.00%	3.88%		70.87%

Signed-off-by: Aubrey Li <aubrey.li@...ux.intel.com>
---
 fs/proc/stat.c              | 48 +++++++++++++++++++++++++++++++++++++
 include/linux/kernel_stat.h |  1 +
 include/linux/tick.h        |  2 ++
 kernel/time/tick-sched.c    | 48 +++++++++++++++++++++++++++++++++++++
 kernel/time/tick-sched.h    |  3 +++
 5 files changed, 102 insertions(+)

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 46b3293015fe..b27ccac7b5a4 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -28,7 +28,11 @@ static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
 	u64 idle;
 
 	idle = kcs->cpustat[CPUTIME_IDLE];
+#ifdef CONFIG_SCHED_CORE
+	if (cpu_online(cpu) && !nr_iowait_cpu(cpu) && !cpu_rq(cpu)->core->core_forceidle)
+#else
 	if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
+#endif
 		idle += arch_idle_time(cpu);
 	return idle;
 }
@@ -43,6 +47,17 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
 	return iowait;
 }
 
+#ifdef CONFIG_SCHED_CORE
+static u64 get_forceidle_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 forceidle;
+
+	forceidle = kcs->cpustat[CPUTIME_FORCEIDLE];
+	if (cpu_online(cpu) && cpu_rq(cpu)->core->core_forceidle)
+		forceidle += arch_idle_time(cpu);
+	return forceidle;
+}
+#endif
 #else
 
 static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
@@ -77,6 +92,21 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
 	return iowait;
 }
 
+static u64 get_forceidle_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 forceidle, forceidle_usecs = -1ULL;
+
+	if (cpu_online(cpu))
+		forceidle_usecs = get_cpu_forceidle_time_us(cpu, NULL);
+
+	if (forceidle_usecs == -1ULL)
+		/* !NO_HZ or cpu offline so we can rely on cpustat.forceidle */
+		forceidle = kcs->cpustat[CPUTIME_FORCEIDLE];
+	else
+		forceidle = forceidle_usecs * NSEC_PER_USEC;
+
+	return forceidle;
+}
 #endif
 
 static void show_irq_gap(struct seq_file *p, unsigned int gap)
@@ -111,12 +141,18 @@ static int show_stat(struct seq_file *p, void *v)
 	u64 guest, guest_nice;
 	u64 sum = 0;
 	u64 sum_softirq = 0;
+#ifdef CONFIG_SCHED_CORE
+	u64 forceidle;
+#endif
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
 	struct timespec64 boottime;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = 0;
 	guest = guest_nice = 0;
+#ifdef CONFIG_SCHED_CORE
+	forceidle = 0;
+#endif
 	getboottime64(&boottime);
 
 	for_each_possible_cpu(i) {
@@ -130,6 +166,9 @@ static int show_stat(struct seq_file *p, void *v)
 		system		+= cpustat[CPUTIME_SYSTEM];
 		idle		+= get_idle_time(&kcpustat, i);
 		iowait		+= get_iowait_time(&kcpustat, i);
+#ifdef CONFIG_SCHED_CORE
+		forceidle	+= get_forceidle_time(&kcpustat, i);
+#endif
 		irq		+= cpustat[CPUTIME_IRQ];
 		softirq		+= cpustat[CPUTIME_SOFTIRQ];
 		steal		+= cpustat[CPUTIME_STEAL];
@@ -157,6 +196,9 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
 	seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
 	seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
+#ifdef CONFIG_SCHED_CORE
+	seq_put_decimal_ull(p, " ", nsec_to_clock_t(forceidle));
+#endif
 	seq_putc(p, '\n');
 
 	for_each_online_cpu(i) {
@@ -171,6 +213,9 @@ static int show_stat(struct seq_file *p, void *v)
 		system		= cpustat[CPUTIME_SYSTEM];
 		idle		= get_idle_time(&kcpustat, i);
 		iowait		= get_iowait_time(&kcpustat, i);
+#ifdef CONFIG_SCHED_CORE
+		forceidle	= get_forceidle_time(&kcpustat, i);
+#endif
 		irq		= cpustat[CPUTIME_IRQ];
 		softirq		= cpustat[CPUTIME_SOFTIRQ];
 		steal		= cpustat[CPUTIME_STEAL];
@@ -187,6 +232,9 @@ static int show_stat(struct seq_file *p, void *v)
 		seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
 		seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
 		seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
+#ifdef CONFIG_SCHED_CORE
+		seq_put_decimal_ull(p, " ", nsec_to_clock_t(forceidle));
+#endif
 		seq_putc(p, '\n');
 	}
 	seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 89f0745c096d..c7ce4bfe757e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -25,6 +25,7 @@ enum cpu_usage_stat {
 	CPUTIME_IRQ,
 	CPUTIME_IDLE,
 	CPUTIME_IOWAIT,
+	CPUTIME_FORCEIDLE,
 	CPUTIME_STEAL,
 	CPUTIME_GUEST,
 	CPUTIME_GUEST_NICE,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7340613c7eff..7fce78f46930 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -139,6 +139,7 @@ extern unsigned long tick_nohz_get_idle_calls(void);
 extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
+extern u64 get_cpu_forceidle_time_us(int cpu, u64 *last_update_time);
 
 static inline void tick_nohz_idle_stop_tick_protected(void)
 {
@@ -169,6 +170,7 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
+static inline u64 get_cpu_forceidle_time_us(int cpu, u64 *unused) { return -1; }
 
 static inline void tick_nohz_idle_stop_tick_protected(void) { }
 #endif /* !CONFIG_NO_HZ_COMMON */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1b734070f028..de94e5bab5a1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -29,6 +29,7 @@
 #include <asm/irq_regs.h>
 
 #include "tick-internal.h"
+#include "../sched/sched.h"
 
 #include <trace/events/timer.h>
 
@@ -547,6 +548,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda
 		delta = ktime_sub(now, ts->idle_entrytime);
 		if (nr_iowait_cpu(cpu) > 0)
 			ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
+#ifdef CONFIG_SCHED_CORE
+		else if (cpu_rq(cpu)->core->core_forceidle)
+			ts->forceidle_sleeptime = ktime_add(ts->forceidle_sleeptime, delta);
+#endif
 		else
 			ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
 		ts->idle_entrytime = now;
@@ -653,6 +658,49 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
 }
 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
 
+#ifdef CONFIG_SCHED_CORE
+/**
+ * get_cpu_forceidle_time_us - get the total force idle time of a CPU
+ * @cpu: CPU number to query
+ * @last_update_time: variable to store update time in. Do not update
+ * counters if NULL.
+ *
+ * Return the cumulative force idle time (since boot) for a given
+ * CPU, in microseconds.
+ *
+ * This time is measured via accounting rather than sampling,
+ * and is as accurate as ktime_get() is.
+ *
+ * This function returns -1 if NOHZ is not enabled.
+ */
+u64 get_cpu_forceidle_time_us(int cpu, u64 *last_update_time)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	ktime_t now, forceidle;
+
+	if (!tick_nohz_active)
+		return -1;
+
+	now = ktime_get();
+	if (last_update_time) {
+		update_ts_time_stats(cpu, ts, now, last_update_time);
+		forceidle = ts->forceidle_sleeptime;
+	} else {
+		if (ts->idle_active && cpu_rq(cpu)->core->core_forceidle) {
+			ktime_t delta = ktime_sub(now, ts->idle_entrytime);
+
+			forceidle = ktime_add(ts->forceidle_sleeptime, delta);
+		} else {
+			forceidle = ts->forceidle_sleeptime;
+		}
+	}
+
+	return ktime_to_us(forceidle);
+
+}
+EXPORT_SYMBOL_GPL(get_cpu_forceidle_time_us);
+#endif
+
 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 {
 	hrtimer_cancel(&ts->sched_timer);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 4fb06527cf64..4c00c5399055 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -71,6 +71,9 @@ struct tick_sched {
 	ktime_t				idle_exittime;
 	ktime_t				idle_sleeptime;
 	ktime_t				iowait_sleeptime;
+#ifdef CONFIG_SCHED_CORE
+	ktime_t				forceidle_sleeptime;
+#endif
 	unsigned long			last_jiffies;
 	u64				timer_expires;
 	u64				timer_expires_base;
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ