lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200807193018.060388629@infradead.org>
Date:   Fri, 07 Aug 2020 21:23:38 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     tglx@...utronix.de, mingo@...nel.org, will@...nel.org
Cc:     x86@...nel.org, linux-kernel@...r.kernel.org, elver@...gle.com,
        paulmck@...nel.org, rostedt@...dmis.org, rjw@...ysocki.net,
        peterz@...radead.org
Subject: [RFC][PATCH 2/3] locking,entry: #PF vs TRACE_IRQFLAGS

Much of the complexity in irqenter_{enter,exit}() is due to #PF being
the sole exception that can schedule from kernel context.

One additional wrinkle with #PF is that it is non-maskable, it can
happen _anywhere_. Due to this, and the wonders of tracing, we can get
the 'normal' NMI nesting vs TRACE_IRQFLAGS:

	local_irq_disable()
	  raw_local_irq_disable();
	  trace_hardirqs_off();

	local_irq_enable();
	  trace_hardirqs_on();
	  <#PF>
	    trace_hardirqs_off()
	    ...
	    if (!regs_irqs_disabled(regs)
	      trace_hardirqs_on();
	  </#PF>
	  // WHOOPS -- lockdep thinks IRQs are disabled again!
	  raw_local_irqs_enable();

Rework irqenter_{enter,exit}() to save/restore the software state.

Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
 include/linux/entry-common.h |    1 
 kernel/entry/common.c        |   52 ++++++++++++++++++++-----------------------
 2 files changed, 26 insertions(+), 27 deletions(-)

--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -310,6 +310,7 @@ void irqentry_exit_to_user_mode(struct p
 #ifndef irqentry_state
 typedef struct irqentry_state {
 	bool	exit_rcu;
+	bool	irqs_enabled;
 } irqentry_state_t;
 #endif
 
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -260,6 +260,7 @@ noinstr irqentry_state_t irqentry_enter(
 {
 	irqentry_state_t ret = {
 		.exit_rcu = false,
+		.irqs_enabled = lockdep_hardirqs_enabled(),
 	};
 
 	if (user_mode(regs)) {
@@ -340,35 +341,32 @@ noinstr void irqentry_exit(struct pt_reg
 	/* Check whether this returns to user mode */
 	if (user_mode(regs)) {
 		irqentry_exit_to_user_mode(regs);
-	} else if (!regs_irqs_disabled(regs)) {
-		/*
-		 * If RCU was not watching on entry this needs to be done
-		 * carefully and needs the same ordering of lockdep/tracing
-		 * and RCU as the return to user mode path.
-		 */
-		if (state.exit_rcu) {
-			instrumentation_begin();
-			/* Tell the tracer that IRET will enable interrupts */
-			trace_hardirqs_on_prepare();
-			lockdep_hardirqs_on_prepare(CALLER_ADDR0);
-			instrumentation_end();
-			rcu_irq_exit();
-			lockdep_hardirqs_on(CALLER_ADDR0);
-			return;
-		}
+		return;
+	}
 
-		instrumentation_begin();
+	instrumentation_begin();
+	/*
+	 * When returning to interrupts enabled, and RCU was watching see if we
+	 * need preemption.
+	 */
+	if (!regs_irqs_disabled(regs) && !state.exit_rcu) {
 		if (IS_ENABLED(CONFIG_PREEMPTION))
 			irqentry_exit_cond_resched();
-		/* Covers both tracing and lockdep */
-		trace_hardirqs_on();
-		instrumentation_end();
-	} else {
-		/*
-		 * IRQ flags state is correct already. Just tell RCU if it
-		 * was not watching on entry.
-		 */
-		if (state.exit_rcu)
-			rcu_irq_exit();
 	}
+
+	/*
+	 * Return the TRACE_IRQFLAGS state to what we found on entry.
+	 * Observe the correct order vs RCU.
+	 */
+	if (state.irqs_enabled) {
+		trace_hardirqs_on_prepare();
+		lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+	}
+	instrumentation_end();
+
+	if (state.exit_rcu)
+		rcu_irq_exit();
+
+	if (state.irqs_enabled)
+		lockdep_hardirqs_on(CALLER_ADDR0);
 }


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ