lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20240718235831.19150-1-myd.xia@gmail.com>
Date: Fri, 19 Jul 2024 07:58:31 +0800
From: Xiang Lin <myd.xia@...il.com>
To: mingo@...hat.com,
	peterz@...radead.org,
	juri.lelli@...hat.com,
	vincent.guittot@...aro.org,
	dietmar.eggemann@....com,
	rostedt@...dmis.org,
	bsegall@...gle.com,
	mgorman@...e.de,
	vschneid@...hat.com
Cc: linux-kernel@...r.kernel.org,
	Xiang Lin <myd.xia@...il.com>
Subject: [PATCH] irqtime: exclude steal time on paravirt

On paravirt, guest irq time may include some steal time, need
subtract steal time delta when accounting irqtime. And when
account user or system time, we exclude irq time and steal
time, if irq time includes some steal time, this will make
user or system time smaller than actual value.

Using below steps to reproduce the problem:
    1. run qemu, and pin vcpu to a physical cpu(e.g, cpu15)
       taskset 0x8000 qemu-system-x86_64 -nographic -enable-kvm \
            -kernel bzImage -append "console=ttyS0,115200 nokaslr" \
            -initrd rootfs.cpio.gz -nic user,hostfwd=tcp::2222-:22
    2. run below cmd on guest console to increase some irq loads
       (guest) top -d 1 -b
    3. ssh to guest, and monitor cpu usage
       (guest) top -d 1 -b | grep ^%Cpu
    4. do some stress on host, and pin it to the same physical cpu as vcpu
       (host) chrt -f 2 stress-ng --cpu 1 --cpu-load 90 --taskset 15

  Before patch, the irq usage is fluctuating hugely
   %Cpu(s):0.0 us,0.0 sy,0.0 ni,5.6 id,0.0 wa, 16.7 hi,  0.0 si, 77.8 st
   %Cpu(s):0.0 us,0.0 sy,0.0 ni,8.1 id,0.0 wa,  2.0 hi,  0.0 si, 89.9 st
   %Cpu(s):0.0 us,0.7 sy,0.0 ni,6.2 id,0.0 wa, 15.9 hi,  0.0 si, 77.2 st
   %Cpu(s):0.0 us,0.0 sy,0.0 ni,8.7 id,0.0 wa,  2.9 hi,  0.0 si, 88.5 st
   %Cpu(s):0.6 us,0.0 sy,0.0 ni,5.0 id,0.0 wa, 16.7 hi,  0.0 si, 77.8 st
   %Cpu(s):0.0 us,0.8 sy,0.0 ni,9.4 id,0.0 wa,  0.0 hi,  0.0 si, 89.8 st
   %Cpu(s):0.0 us,0.0 sy,0.0 ni,6.9 id,0.0 wa, 15.9 hi,  0.0 si, 77.2 st
   ....

  After patch, the irq usage is steady
   %Cpu(s):0.0 us,0.0 sy,0.0 ni,7.2 id,0.0 wa,  2.4 hi,  0.0 si, 90.4 st
   %Cpu(s):0.0 us,0.8 sy,0.0 ni,7.9 id,0.0 wa,  1.6 hi,  0.0 si, 89.7 st
   %Cpu(s):0.8 us,0.0 sy,0.0 ni,7.2 id,0.0 wa,  2.4 hi,  0.0 si, 89.6 st
   %Cpu(s):0.0 us,0.8 sy,0.0 ni,8.0 id,0.0 wa,  1.6 hi,  0.0 si, 89.6 st
   %Cpu(s):0.0 us,0.0 sy,0.0 ni,7.2 id,0.0 wa,  2.4 hi,  0.0 si, 90.4 st
   %Cpu(s):0.0 us,0.8 sy,0.0 ni,7.9 id,0.0 wa,  1.6 hi,  0.0 si, 89.7 st
   %Cpu(s):0.0 us,0.0 sy,0.0 ni,7.2 id,0.0 wa,  2.4 hi,  0.0 si, 90.4 st
   ....

Signed-off-by: Xiang Lin <myd.xia@...il.com>
---
 kernel/sched/cputime.c | 20 +++++++++++++++++++-
 kernel/sched/sched.h   |  3 +++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a5e00293ae43..bc00296f8f9b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -46,6 +46,23 @@ static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
 	u64_stats_update_end(&irqtime->sync);
 }
 
+static u64 steal_irqtime_account(bool irq_entry)
+{
+#ifdef CONFIG_PARAVIRT
+	if (static_key_false(&paravirt_steal_enabled)) {
+		struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
+		u64 delta, steal;
+
+		steal = paravirt_steal_clock(smp_processor_id());
+		delta = steal - irqtime->steal_start_time;
+		irqtime->steal_start_time += delta;
+
+		return irq_entry ? 0 : delta;
+	}
+#endif
+	return 0;
+}
+
 /*
  * Called after incrementing preempt_count on {soft,}irq_enter
  * and before decrementing preempt_count on {soft,}irq_exit.
@@ -54,7 +71,7 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
 {
 	struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
 	unsigned int pc;
-	s64 delta;
+	u64 delta;
 	int cpu;
 
 	if (!sched_clock_irqtime)
@@ -64,6 +81,7 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
 	delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
 	irqtime->irq_start_time += delta;
 	pc = irq_count() - offset;
+	delta -= min(delta, steal_irqtime_account(!pc));
 
 	/*
 	 * We do not account for softirq time from ksoftirqd here.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4c36cc680361..b5389bc8062f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2981,6 +2981,9 @@ struct irqtime {
 	u64			total;
 	u64			tick_delta;
 	u64			irq_start_time;
+#ifdef CONFIG_PARAVIRT
+	u64			steal_start_time;
+#endif
 	struct u64_stats_sync	sync;
 };
 
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ