lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1542163569-20047-21-git-send-email-frederic@kernel.org>
Date:   Wed, 14 Nov 2018 03:46:04 +0100
From:   Frederic Weisbecker <frederic@...nel.org>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     Frederic Weisbecker <frederic@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Wanpeng Li <wanpengli@...cent.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Yauheni Kaliuta <yauheni.kaliuta@...hat.com>,
        Ingo Molnar <mingo@...nel.org>, Rik van Riel <riel@...hat.com>
Subject: [PATCH 20/25] sched/kcpustat: Introduce vtime-aware kcpustat accessor

Kcpustat is not correctly supported on nohz_full CPUs. The tick doesn't
fire and the cputime therefore doesn't move forward. The issue has shown
up after the vanishing of the remaining 1Hz which has made the issue
visible.

We are solving that with tracking the task running on a CPU through RCU
and reading its vtime delta that we add to the raw kcpustat values.

We make sure that we fetch a coherent raw-kcpustat/vtime-delta couple
sequence while checking that the CPU referred by the target vtime is the
correct one, under the locked vtime seqcount.

Reported-by: Yauheni Kaliuta <yauheni.kaliuta@...hat.com>
Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@...hat.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Rik van Riel <riel@...hat.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Wanpeng Li <wanpengli@...cent.com>
Cc: Ingo Molnar <mingo@...nel.org>
---
 include/linux/kernel_stat.h | 25 +++++++++++++
 kernel/sched/cputime.c      | 90 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 049d973..2d4d301 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -79,6 +79,31 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 	return kstat_cpu(cpu).irqs_sum;
 }
 
+
+static inline void kcpustat_cputime_raw(struct kernel_cpustat *kcpustat,
+					u64 *user, u64 *nice, u64 *system,
+					u64 *guest, u64 *guest_nice)
+{
+	*user = kcpustat->cpustat[CPUTIME_USER];
+	*nice = kcpustat->cpustat[CPUTIME_NICE];
+	*system = kcpustat->cpustat[CPUTIME_SYSTEM];
+	*guest = kcpustat->cpustat[CPUTIME_GUEST];
+	*guest_nice = kcpustat->cpustat[CPUTIME_GUEST_NICE];
+}
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+			     u64 *user, u64 *nice, u64 *system,
+			     u64 *guest, u64 *guest_nice);
+#else
+static inline void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+				    u64 *user, u64 *nice, u64 *system,
+				    u64 *guest, u64 *guest_nice)
+{
+	kcpustat_cputime_raw(kcpustat, user, nice, system, guest, guest_nice);
+}
+#endif
+
 extern void account_user_time(struct task_struct *, u64);
 extern void account_guest_time(struct task_struct *, u64);
 extern void account_system_time(struct task_struct *, int, u64);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 2b35132..3afde9f 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -1024,4 +1024,94 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 			*utime += vtime->utime + delta;
 	} while (read_seqcount_retry(&vtime->seqcount, seq));
 }
+
+static int kcpustat_vtime(struct kernel_cpustat *kcpustat, struct vtime *vtime,
+			  int cpu, u64 *user, u64 *nice,
+			  u64 *system, u64 *guest, u64 *guest_nice)
+{
+	unsigned int seq;
+	u64 delta;
+	int err;
+
+	do {
+		seq = read_seqcount_begin(&vtime->seqcount);
+
+		/*
+		 * We raced against context switch, fetch the
+		 * kcpustat task again.
+		 */
+		if (vtime->cpu != cpu && vtime->cpu != -1) {
+			err = -EAGAIN;
+			continue;
+		}
+
+		err = 0;
+
+		kcpustat_cputime_raw(kcpustat, user, nice,
+				     system, guest, guest_nice);
+
+		/* Task is sleeping, dead or idle, nothing to add */
+		if (vtime->state < VTIME_SYS)
+			continue;
+
+		delta = vtime_delta(vtime);
+
+		/*
+		 * Task runs either in user (including guest) or kernel space,
+		 * add pending nohz time to the right place.
+		 */
+		if (vtime->state == VTIME_SYS) {
+			*system += vtime->stime + delta;
+		} else if (vtime->state == VTIME_USER) {
+			if (vtime->nice)
+				*nice += vtime->utime + delta;
+			else
+				*user += vtime->utime + delta;
+		} else {
+			WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+			if (vtime->nice) {
+				*guest_nice += vtime->gtime + delta;
+				*nice += vtime->gtime + delta;
+			} else {
+				*guest += vtime->gtime + delta;
+				*user += vtime->gtime + delta;
+			}
+		}
+	} while (read_seqcount_retry(&vtime->seqcount, seq));
+
+	return err;
+}
+
+void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+		      u64 *user, u64 *nice, u64 *system,
+		      u64 *guest, u64 *guest_nice)
+{
+	struct task_struct *curr;
+	struct vtime *vtime;
+	int err;
+
+	if (!vtime_accounting_enabled()) {
+		kcpustat_cputime_raw(kcpustat, user, nice,
+				     system, guest, guest_nice);
+		return;
+	}
+
+	rcu_read_lock();
+
+	do {
+		curr = rcu_dereference(kcpustat->curr);
+		if (!curr) {
+			kcpustat_cputime_raw(kcpustat, user, nice,
+					     system, guest, guest_nice);
+			break;
+		}
+
+		vtime = &curr->vtime;
+		err = kcpustat_vtime(kcpustat, vtime, cpu, user,
+				     nice, system, guest, guest_nice);
+	} while (err == -EAGAIN);
+
+	rcu_read_unlock();
+}
+
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ