[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20191106030807.31091-6-frederic@kernel.org>
Date: Wed, 6 Nov 2019 04:08:03 +0100
From: Frederic Weisbecker <frederic@...nel.org>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...nel.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
Frederic Weisbecker <frederic@...nel.org>,
Jacek Anaszewski <jacek.anaszewski@...il.com>,
Wanpeng Li <wanpengli@...cent.com>,
Pavel Machek <pavel@....cz>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Thomas Gleixner <tglx@...utronix.de>,
Yauheni Kaliuta <yauheni.kaliuta@...hat.com>,
"Rafael J . Wysocki" <rjw@...ysocki.net>,
Viresh Kumar <viresh.kumar@...aro.org>,
Rik van Riel <riel@...riel.com>
Subject: [PATCH 5/9] sched/vtime: Bring all-in-one kcpustat accessor for vtime fields
Many callsites want to fetch the values of system, user, user_nice, guest
or guest_nice kcpustat fields altogether or at least a pair of these.
In that case calling kcpustat_field() for each requested field brings
unecessary overhead when we could fetch all of them in a row.
So provide kcpustat_cputime() that fetches all vtime sensitive fields
under the same RCU and seqcount block.
Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@...hat.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Rik van Riel <riel@...riel.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Wanpeng Li <wanpengli@...cent.com>
Cc: Ingo Molnar <mingo@...nel.org>
---
include/linux/kernel_stat.h | 23 ++++++
kernel/sched/cputime.c | 138 ++++++++++++++++++++++++++++++------
2 files changed, 141 insertions(+), 20 deletions(-)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 1b9b97f6946e..c76daad2d8e2 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,15 +78,38 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
return kstat_cpu(cpu).irqs_sum;
}
+
+static inline void kcpustat_cputime_raw(u64 *cpustat, u64 *user, u64 *nice,
+ u64 *system, u64 *guest, u64 *guest_nice)
+{
+ *user = cpustat[CPUTIME_USER];
+ *nice = cpustat[CPUTIME_NICE];
+ *system = cpustat[CPUTIME_SYSTEM];
+ *guest = cpustat[CPUTIME_GUEST];
+ *guest_nice = cpustat[CPUTIME_GUEST_NICE];
+}
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
enum cpu_usage_stat usage, int cpu);
+extern void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+ u64 *user, u64 *nice, u64 *system,
+ u64 *guest, u64 *guest_nice);
#else
static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
enum cpu_usage_stat usage, int cpu)
{
return kcpustat->cpustat[usage];
}
+
+static inline void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+ u64 *user, u64 *nice, u64 *system,
+ u64 *guest, u64 *guest_nice)
+{
+ kcpustat_cputime_raw(kcpustat->cpustat, user, nice,
+ system, guest, guest_nice);
+}
+
#endif
extern void account_user_time(struct task_struct *, u64);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index bf4b61f71194..0006dfccbeb7 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -1042,6 +1042,30 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
} while (read_seqcount_retry(&vtime->seqcount, seq));
}
+static int vtime_state_check(struct vtime *vtime, int cpu)
+{
+ /*
+ * We raced against context switch, fetch the
+ * kcpustat task again.
+ */
+ if (vtime->cpu != cpu && vtime->cpu != -1)
+ return -EAGAIN;
+
+ /*
+ * Two possible things here:
+ * 1) We are seeing the scheduling out task (prev) or any past one.
+ * 2) We are seeing the scheduling in task (next) but it hasn't
+ * passed though vtime_task_switch() yet so the pending
+ * cputime of the prev task may not be flushed yet.
+ *
+ * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
+ */
+ if (vtime->state == VTIME_INACTIVE)
+ return -EAGAIN;
+
+ return 0;
+}
+
static u64 kcpustat_user_vtime(struct vtime *vtime)
{
if (vtime->state == VTIME_USER)
@@ -1062,26 +1086,9 @@ static int kcpustat_field_vtime(u64 *cpustat,
do {
seq = read_seqcount_begin(&vtime->seqcount);
- /*
- * We raced against context switch, fetch the
- * kcpustat task again.
- */
- if (vtime->cpu != cpu && vtime->cpu != -1)
- return -EAGAIN;
-
- /*
- * Two possible things here:
- * 1) We are seeing the scheduling out task (prev) or any past one.
- * 2) We are seeing the scheduling in task (next) but it hasn't
- * passed though vtime_task_switch() yet so the pending
- * cputime of the prev task may not be flushed yet.
- *
- * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
- */
- if (vtime->state == VTIME_INACTIVE)
- return -EAGAIN;
-
- err = 0;
+ err = vtime_state_check(vtime, cpu);
+ if (err < 0)
+ return err;
*val = cpustat[usage];
@@ -1149,4 +1156,95 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
}
}
EXPORT_SYMBOL_GPL(kcpustat_field);
+
+static int kcpustat_cputime_vtime(u64 *cpustat, struct vtime *vtime,
+ int cpu, u64 *user, u64 *nice,
+ u64 *system, u64 *guest, u64 *guest_nice)
+{
+ unsigned int seq;
+ u64 delta;
+ int err;
+
+ do {
+ seq = read_seqcount_begin(&vtime->seqcount);
+
+ err = vtime_state_check(vtime, cpu);
+ if (err < 0)
+ return err;
+
+ kcpustat_cputime_raw(cpustat, user, nice,
+ system, guest, guest_nice);
+
+ /* Task is sleeping, dead or idle, nothing to add */
+ if (vtime->state < VTIME_SYS)
+ continue;
+
+ delta = vtime_delta(vtime);
+
+ /*
+ * Task runs either in user (including guest) or kernel space,
+ * add pending nohz time to the right place.
+ */
+ if (vtime->state == VTIME_SYS) {
+ *system += vtime->stime + delta;
+ } else if (vtime->state == VTIME_USER) {
+ if (vtime->nice)
+ *nice += vtime->utime + delta;
+ else
+ *user += vtime->utime + delta;
+ } else {
+ WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+ if (vtime->nice) {
+ *guest_nice += vtime->gtime + delta;
+ *nice += vtime->gtime + delta;
+ } else {
+ *guest += vtime->gtime + delta;
+ *user += vtime->gtime + delta;
+ }
+ }
+ } while (read_seqcount_retry(&vtime->seqcount, seq));
+
+ return err;
+}
+
+void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+ u64 *user, u64 *nice, u64 *system,
+ u64 *guest, u64 *guest_nice)
+{
+ u64 *cpustat = kcpustat->cpustat;
+ struct rq *rq;
+ int err;
+
+ if (!vtime_accounting_enabled_cpu(cpu)) {
+ kcpustat_cputime_raw(cpustat, user, nice,
+ system, guest, guest_nice);
+ return;
+ }
+
+ rq = cpu_rq(cpu);
+
+ for (;;) {
+ struct task_struct *curr;
+
+ rcu_read_lock();
+ curr = rcu_dereference(rq->curr);
+ if (WARN_ON_ONCE(!curr)) {
+ rcu_read_unlock();
+ kcpustat_cputime_raw(cpustat, user, nice,
+ system, guest, guest_nice);
+ return;
+ }
+
+ err = kcpustat_cputime_vtime(cpustat, &curr->vtime, cpu, user,
+ nice, system, guest, guest_nice);
+ rcu_read_unlock();
+
+ if (!err)
+ return;
+
+ cpu_relax();
+ }
+}
+EXPORT_SYMBOL_GPL(kcpustat_cputime);
+
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
--
2.23.0
Powered by blists - more mailing lists