lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  8 Nov 2017 13:35:13 -0500
From:   Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To:     "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc:     linux-kernel@...r.kernel.org, linux-api@...r.kernel.org,
        Peter Zijlstra <peterz@...radead.org>,
        Andy Lutomirski <luto@...nel.org>,
        Boqun Feng <boqun.feng@...il.com>,
        Andrew Hunter <ahh@...gle.com>,
        Maged Michael <maged.michael@...il.com>, gromer@...gle.com,
        Avi Kivity <avi@...lladb.com>,
        Benjamin Herrenschmidt <benh@...nel.crashing.org>,
        Paul Mackerras <paulus@...ba.org>,
        Michael Ellerman <mpe@...erman.id.au>,
        Dave Watson <davejwatson@...com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>,
        "H . Peter Anvin" <hpa@...or.com>,
        Andrea Parri <parri.andrea@...il.com>, x86@...nel.org,
        Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Subject: [RFC PATCH for 4.15 5/6] membarrier: x86: Provide core serializing command

There are two places where core serialization is needed by membarrier:

1) When returning from the membarrier IPI,
2) After scheduler updates curr to a thread with a different mm, before
   going back to user-space, since the curr->mm is used by membarrier to
   check whether it needs to send an IPI to that CPU.

x86-32 uses only iret both as return from interrupt, and to go back to
user-space. The iret instruction is core serializing.

x86-64 uses iret as return from interrupt, which takes care of the IPI.
However, it can return to user-space through either sysretl (compat
code), sysretq, or iret. Given that sysret{l,q} is not core serializing,
we rely instead on write_cr3() performed by switch_mm() to provide core
serialization after changing the current mm, and deal with the special
case of kthread -> uthread (temporarily keeping current mm into
active_mm) by adding a sync_core() in that specific case.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
CC: Peter Zijlstra <peterz@...radead.org>
CC: Andy Lutomirski <luto@...nel.org>
CC: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
CC: Boqun Feng <boqun.feng@...il.com>
CC: Andrew Hunter <ahh@...gle.com>
CC: Maged Michael <maged.michael@...il.com>
CC: gromer@...gle.com
CC: Avi Kivity <avi@...lladb.com>
CC: Benjamin Herrenschmidt <benh@...nel.crashing.org>
CC: Paul Mackerras <paulus@...ba.org>
CC: Michael Ellerman <mpe@...erman.id.au>
CC: Dave Watson <davejwatson@...com>
CC: Thomas Gleixner <tglx@...utronix.de>
CC: Ingo Molnar <mingo@...hat.com>
CC: "H. Peter Anvin" <hpa@...or.com>
CC: Andrea Parri <parri.andrea@...il.com>
CC: x86@...nel.org
---
 MAINTAINERS                           |  2 ++
 arch/powerpc/include/asm/membarrier.h |  8 ++++++-
 arch/powerpc/kernel/membarrier.c      |  3 ++-
 arch/x86/Kconfig                      |  2 ++
 arch/x86/entry/entry_32.S             |  5 +++++
 arch/x86/entry/entry_64.S             |  8 +++++++
 arch/x86/include/asm/membarrier.h     | 36 ++++++++++++++++++++++++++++++++
 arch/x86/kernel/Makefile              |  1 +
 arch/x86/kernel/membarrier.c          | 39 +++++++++++++++++++++++++++++++++++
 arch/x86/mm/tlb.c                     |  7 ++++---
 include/linux/sched/mm.h              |  9 +++++++-
 kernel/sched/core.c                   |  6 +++++-
 12 files changed, 119 insertions(+), 7 deletions(-)
 create mode 100644 arch/x86/include/asm/membarrier.h
 create mode 100644 arch/x86/kernel/membarrier.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 34687a0ec28c..ff564e5195fb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8831,6 +8831,8 @@ F:	kernel/sched/membarrier.c
 F:	include/uapi/linux/membarrier.h
 F:	arch/powerpc/kernel/membarrier.c
 F:	arch/powerpc/include/asm/membarrier.h
+F:	arch/x86/kernel/membarrier.c
+F:	arch/x86/include/asm/membarrier.h
 
 MEMORY MANAGEMENT
 L:	linux-mm@...ck.org
diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h
index 0951646253d9..018cf278dc93 100644
--- a/arch/powerpc/include/asm/membarrier.h
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -21,6 +21,12 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 	 */
 	smp_mb();
 }
-void membarrier_arch_register_private_expedited(struct task_struct *t);
+
+static inline void membarrier_arch_mm_sync_core(void)
+{
+}
+
+void membarrier_arch_register_private_expedited(struct task_struct *t,
+		int flags);
 
 #endif /* _ASM_POWERPC_MEMBARRIER_H */
diff --git a/arch/powerpc/kernel/membarrier.c b/arch/powerpc/kernel/membarrier.c
index 4795ad59b833..0026d740e5a3 100644
--- a/arch/powerpc/kernel/membarrier.c
+++ b/arch/powerpc/kernel/membarrier.c
@@ -21,7 +21,8 @@
 #include <linux/rcupdate.h>
 #include <linux/atomic.h>
 
-void membarrier_arch_register_private_expedited(struct task_struct *p)
+void membarrier_arch_register_private_expedited(struct task_struct *p,
+		int flags)
 {
 	struct mm_struct *mm = p->mm;
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fdb23313dd5..6ac32fe768a8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,6 +54,8 @@ config X86
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
+	select ARCH_HAS_MEMBARRIER_HOOKS
+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_PMEM_API		if X86_64
 	# Causing hangs/crashes, see the commit that added this change for details.
 	select ARCH_HAS_REFCOUNT		if BROKEN
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6..04e5daba8456 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -553,6 +553,11 @@ restore_all:
 .Lrestore_nocheck:
 	RESTORE_REGS 4				# skip orig_eax/error_code
 .Lirq_return:
+	/*
+	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on iret core serialization
+	 * when returning from IPI handler and when returning from
+	 * scheduler to user-space.
+	 */
 	INTERRUPT_RETURN
 
 .section .fixup, "ax"
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index bcfc5668dcb2..4859f04e1695 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -642,6 +642,10 @@ GLOBAL(restore_regs_and_iret)
 restore_c_regs_and_iret:
 	RESTORE_C_REGS
 	REMOVE_PT_GPREGS_FROM_STACK 8
+	/*
+	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on iret core serialization
+	 * when returning from IPI handler.
+	 */
 	INTERRUPT_RETURN
 
 ENTRY(native_iret)
@@ -1122,6 +1126,10 @@ paranoid_exit_restore:
 	RESTORE_EXTRA_REGS
 	RESTORE_C_REGS
 	REMOVE_PT_GPREGS_FROM_STACK 8
+	/*
+	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on iret core serialization
+	 * when returning from IPI handler.
+	 */
 	INTERRUPT_RETURN
 END(paranoid_exit)
 
diff --git a/arch/x86/include/asm/membarrier.h b/arch/x86/include/asm/membarrier.h
new file mode 100644
index 000000000000..d22aac77047c
--- /dev/null
+++ b/arch/x86/include/asm/membarrier.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_X86_MEMBARRIER_H
+#define _ASM_X86_MEMBARRIER_H
+
+#include <asm/processor.h>
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+		struct mm_struct *next, struct task_struct *tsk)
+{
+}
+
+#ifdef CONFIG_X86_32
+static inline void membarrier_arch_mm_sync_core(struct mm_struct *mm)
+{
+}
+static inline
+void membarrier_arch_register_private_expedited(struct task_struct *t,
+		int flags);
+#else
+/*
+ * x86-64 implements return to user-space through sysret, which is not a
+ * core-serializing instruction. Therefore, we need an explicit core
+ * serializing instruction after going from kernel thread back to
+ * user-space thread (active_mm moved back to current mm).
+ */
+static inline void membarrier_arch_mm_sync_core(struct mm_struct *mm)
+{
+	if (likely(!(atomic_read(&mm->membarrier_state) &
+			MEMBARRIER_STATE_SYNC_CORE)))
+		return;
+	sync_core();
+}
+void membarrier_arch_register_private_expedited(struct task_struct *t,
+		int flags);
+#endif
+
+#endif /* _ASM_X86_MEMBARRIER_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5f70044340ff..13d6738b26c5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
+obj-$(CONFIG_X86_64)		+= membarrier.o
 
 obj-$(CONFIG_EISA)		+= eisa.o
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
diff --git a/arch/x86/kernel/membarrier.c b/arch/x86/kernel/membarrier.c
new file mode 100644
index 000000000000..978698d7da3d
--- /dev/null
+++ b/arch/x86/kernel/membarrier.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
+ *
+ * membarrier system call - x86 architecture code
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/thread_info.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/atomic.h>
+
+void membarrier_arch_register_private_expedited(struct task_struct *p,
+		int flags)
+{
+	struct mm_struct *mm = p->mm;
+
+	if (!(flags & MEMBARRIER_FLAG_SYNC_CORE))
+		return;
+	atomic_or(MEMBARRIER_STATE_SYNC_CORE, &mm->membarrier_state);
+	if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)
+		return;
+	/*
+	 * Ensure all future scheduler executions will observe the new
+	 * thread flag state for this process.
+	 */
+	synchronize_sched();
+}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5abf9bfcca1f..3b13d6735fa5 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -147,9 +147,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	this_cpu_write(cpu_tlbstate.is_lazy, false);
 
 	/*
-	 * The membarrier system call requires a full memory barrier
-	 * before returning to user-space, after storing to rq->curr.
-	 * Writing to CR3 provides that full memory barrier.
+	 * The membarrier system call requires a full memory barrier and
+	 * core serialization before returning to user-space, after
+	 * storing to rq->curr. Writing to CR3 provides that full
+	 * memory barrier and core serializing instruction.
 	 */
 	if (real_prev == next) {
 		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a888da398517..5561b92b597a 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -222,6 +222,7 @@ enum {
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY		= (1U << 0),
 	MEMBARRIER_STATE_SWITCH_MM				= (1U << 1),
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY	= (1U << 2),
+	MEMBARRIER_STATE_SYNC_CORE				= (1U << 3),
 };
 
 enum {
@@ -232,7 +233,10 @@ enum {
 #include <asm/membarrier.h>
 #else
 static inline void membarrier_arch_register_private_expedited(
-		struct task_struct *p)
+		struct task_struct *p, int flags)
+{
+}
+static inline void membarrier_arch_mm_sync_core(struct mm_struct *mm)
 {
 }
 #endif
@@ -251,6 +255,9 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 static inline void membarrier_execve(struct task_struct *t)
 {
 }
+static inline void membarrier_arch_mm_sync_core(struct mm_struct *mm)
+{
+}
 #endif
 
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8a176892b4f0..b5194cfc2199 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2653,9 +2653,13 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	 * thread, mmdrop()'s implicit full barrier is required by the
 	 * membarrier system call, because the current active_mm can
 	 * become the current mm without going through switch_mm().
+	 * membarrier also requires a core serializing instruction
+	 * before going back to user-space after storing to rq->curr.
 	 */
-	if (mm)
+	if (mm) {
 		mmdrop(mm);
+		membarrier_arch_mm_sync_core(mm);
+	}
 	if (unlikely(prev_state == TASK_DEAD)) {
 		if (prev->sched_class->task_dead)
 			prev->sched_class->task_dead(prev);
-- 
2.11.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ