lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1466648751-7958-6-git-send-email-riel@redhat.com>
Date:	Wed, 22 Jun 2016 22:25:51 -0400
From:	riel@...hat.com
To:	linux-kernel@...r.kernel.org
Cc:	peterz@...radead.org, mingo@...nel.org, pbonzini@...hat.com,
	fweisbec@...hat.com, wanpeng.li@...mail.com, efault@....de,
	tglx@...utronix.de, rkrcmar@...hat.com
Subject: [PATCH 5/5] irqtime: drop local_irq_save/restore from irqtime_account_irq

From: Rik van Riel <riel@...hat.com>

Drop local_irq_save/restore from irqtime_account_irq.
Instead, have softirq and hardirq track their time spent
independently, with the softirq code subtracting hardirq
time that happened during the duration of the softirq run.

The softirq code can be interrupted by hardirq code at
any point in time, but it can check whether it got a
consistent snapshot of the timekeeping variables it wants,
and loop around in the unlikely case that it did not.

Signed-off-by: Rik van Riel <riel@...hat.com>
---
 kernel/sched/cputime.c | 72 +++++++++++++++++++++++++++++++++++++++++---------
 kernel/sched/sched.h   | 38 +++++++++++++++++++++-----
 2 files changed, 90 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index fc4122afc022..e4c7b2d17141 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -26,7 +26,9 @@
 DEFINE_PER_CPU(u64, cpu_hardirq_time);
 DEFINE_PER_CPU(u64, cpu_softirq_time);
 
-static DEFINE_PER_CPU(u64, irq_start_time);
+static DEFINE_PER_CPU(u64, hardirq_start_time);
+static DEFINE_PER_CPU(u64, softirq_start_time);
+static DEFINE_PER_CPU(u64, prev_hardirq_time);
 static int sched_clock_irqtime;
 
 void enable_sched_clock_irqtime(void)
@@ -41,6 +43,7 @@ void disable_sched_clock_irqtime(void)
 
 #ifndef CONFIG_64BIT
 DEFINE_PER_CPU(seqcount_t, irq_time_seq);
+DEFINE_PER_CPU(seqcount_t, softirq_time_seq);
 #endif /* CONFIG_64BIT */
 
 /*
@@ -53,36 +56,79 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  * softirq -> hardirq, hardirq -> softirq
  *
  * When exiting hardirq or softirq time, account the elapsed time.
+ *
+ * When exiting softirq time, subtract the amount of hardirq time that
+ * interrupted this softirq run, to avoid double accounting of that time.
  */
 void irqtime_account_irq(struct task_struct *curr, int irqtype)
 {
-	unsigned long flags;
+	u64 prev_softirq_start;
+	bool leaving_softirq;
+	u64 prev_hardirq;
+	u64 hardirq_time;
 	s64 delta;
 	int cpu;
 
 	if (!sched_clock_irqtime)
 		return;
 
-	local_irq_save(flags);
-
 	cpu = smp_processor_id();
-	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
-	__this_cpu_add(irq_start_time, delta);
 
-	irq_time_write_begin();
+	/*
+	 * Hardirq time accounting is pretty straightforward. If not in
+	 * hardirq context yet (entering hardirq), set the start time.
+	 * If already in hardirq context (leaving), account the elapsed time.
+	 */
+	if (irqtype == HARDIRQ_OFFSET) {
+		bool leaving_hardirq = hardirq_count();
+		delta = sched_clock_cpu(cpu) - __this_cpu_read(hardirq_start_time);
+		__this_cpu_add(hardirq_start_time, delta);
+		if (leaving_hardirq) {
+			hardirq_time_write_begin();
+			__this_cpu_add(cpu_hardirq_time, delta);
+			hardirq_time_write_end();
+		}
+		return;
+	}
+
+	/*
+	 * Softirq context may get interrupted by hardirq context, on the
+	 * same CPU. At softirq entry time the amount of time this CPU spent
+	 * in hardirq context is stored. At softirq exit time, the time spent
+	 * in hardirq context during the softirq is subtracted.
+	 */
+	prev_softirq_start = __this_cpu_read(softirq_start_time);
+	prev_hardirq = __this_cpu_read(prev_hardirq_time);
+	leaving_softirq = in_serving_softirq();
+
+	do {
+		u64 now = sched_clock_cpu(cpu);
+
+		hardirq_time = READ_ONCE(per_cpu(cpu_hardirq_time, cpu));
+		__this_cpu_write(softirq_start_time, now);
+		__this_cpu_write(prev_hardirq_time, hardirq_time);
+
+		if (leaving_softirq) {
+			/*
+			 * Subtract hardirq time that happened during this
+			 * softirq.
+			 */
+			s64 hi_delta = hardirq_time - prev_hardirq;
+			delta = now - prev_softirq_start - hi_delta;
+		}
+		/* Loop around if interrupted by a hardirq. */
+	} while (hardirq_time != READ_ONCE(per_cpu(cpu_hardirq_time, cpu)));
+
 	/*
 	 * We do not account for softirq time from ksoftirqd here.
 	 * We want to continue accounting softirq time to ksoftirqd thread
 	 * in that case, so as not to confuse scheduler with a special task
 	 * that do not consume any time, but still wants to run.
 	 */
-	if (hardirq_count())
-		__this_cpu_add(cpu_hardirq_time, delta);
-	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+	softirq_time_write_begin();
+	if (leaving_softirq && curr != this_cpu_ksoftirqd())
 		__this_cpu_add(cpu_softirq_time, delta);
-
-	irq_time_write_end();
-	local_irq_restore(flags);
+	softirq_time_write_end();
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec2e8d23527e..cad4df9835f7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1752,38 +1752,62 @@ DECLARE_PER_CPU(u64, cpu_softirq_time);
 
 #ifndef CONFIG_64BIT
 DECLARE_PER_CPU(seqcount_t, irq_time_seq);
+DECLARE_PER_CPU(seqcount_t, softirq_time_seq);
 
-static inline void irq_time_write_begin(void)
+static inline void hardirq_time_write_begin(void)
 {
 	__this_cpu_inc(irq_time_seq.sequence);
 	smp_wmb();
 }
 
-static inline void irq_time_write_end(void)
+static inline void hardirq_time_write_end(void)
 {
 	smp_wmb();
 	__this_cpu_inc(irq_time_seq.sequence);
 }
 
+static inline void softirq_time_write_begin(void)
+{
+	__this_cpu_inc(softirq_time_seq.sequence);
+	smp_wmb();
+}
+
+static inline void softirq_time_write_end(void)
+{
+	smp_wmb();
+	__this_cpu_inc(softirq_time_seq.sequence);
+}
+
 static inline u64 irq_time_read(int cpu)
 {
 	u64 irq_time;
-	unsigned seq;
+	unsigned hi_seq;
+	unsigned si_seq;
 
 	do {
-		seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+		hi_seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+		si_seq = read_seqcount_begin(&per_cpu(softirq_time_seq, cpu));
 		irq_time = per_cpu(cpu_softirq_time, cpu) +
 			   per_cpu(cpu_hardirq_time, cpu);
-	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), hi_seq) ||
+		 read_seqcount_retry(&per_cpu(softirq_time_seq, cpu), si_seq));
 
 	return irq_time;
 }
 #else /* CONFIG_64BIT */
-static inline void irq_time_write_begin(void)
+static inline void hardirq_time_write_begin(void)
+{
+}
+
+static inline void hardirq_time_write_end(void)
+{
+}
+
+static inline void softirq_time_write_begin(void)
 {
 }
 
-static inline void irq_time_write_end(void)
+static inline void softirq_time_write_end(void)
 {
 }
 
-- 
2.5.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ