[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251020150133.GK3245006@noisy.programming.kicks-ass.net>
Date: Mon, 20 Oct 2025 17:01:33 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: "Kaplan, David" <David.Kaplan@....com>
Cc: Thomas Gleixner <tglx@...utronix.de>, Borislav Petkov <bp@...en8.de>,
Josh Poimboeuf <jpoimboe@...nel.org>,
Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
Ingo Molnar <mingo@...hat.com>,
Dave Hansen <dave.hansen@...ux.intel.com>,
"x86@...nel.org" <x86@...nel.org>,
"H . Peter Anvin" <hpa@...or.com>, Alexander Graf <graf@...zon.com>,
Boris Ostrovsky <boris.ostrovsky@...cle.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [RFC PATCH 40/56] x86/alternative: Use sync_core_nmi_safe()
On Mon, Oct 20, 2025 at 02:49:56PM +0000, Kaplan, David wrote:
> Coming back to this, are you thinking we should just create something
> like 'text_poke_sync_core()' inside alternative.c and that can use:
> 1. SERIALIZE (if available)
> 2. MOV-CR2 (if re-patching)
> 3. Else, IRET
>
> And maybe someday we put MFENCE into there too for AMD parts.
>
> Right now, of course this is the only logic that would care about an
> NMI-safe sync_core(). So maybe this makes sense vs creating a generic
> version that nobody else is using?
I was thinking something fairly straight forward like the below. Yes,
there are a few more sync_core() callers out there, git tells me:
arch/x86/kernel/alternative.c: sync_core();
arch/x86/kernel/alternative.c:noinstr void sync_core(void)
arch/x86/kernel/alternative.c: sync_core();
arch/x86/kernel/cpu/mce/core.c: sync_core();
arch/x86/kernel/cpu/mce/core.c: sync_core();
arch/x86/kernel/static_call.c: sync_core();
drivers/misc/sgi-gru/grufault.c: sync_core();
drivers/misc/sgi-gru/grufault.c: sync_core(); /* make sure we are have current data */
drivers/misc/sgi-gru/gruhandles.c: sync_core();
drivers/misc/sgi-gru/gruhandles.c: sync_core();
drivers/misc/sgi-gru/grukservices.c: sync_core();
But none of that seems like it cares about an extra few cycles, and why
complicate matters with another sync_core variant and all that.
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index 96bda43538ee..ef4508a03800 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -7,86 +7,7 @@
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
-#ifdef CONFIG_X86_32
-static __always_inline void iret_to_self(void)
-{
- asm volatile (
- "pushfl\n\t"
- "pushl %%cs\n\t"
- "pushl $1f\n\t"
- "iret\n\t"
- "1:"
- : ASM_CALL_CONSTRAINT : : "memory");
-}
-#else
-static __always_inline void iret_to_self(void)
-{
- unsigned int tmp;
-
- asm volatile (
- "mov %%ss, %0\n\t"
- "pushq %q0\n\t"
- "pushq %%rsp\n\t"
- "addq $8, (%%rsp)\n\t"
- "pushfq\n\t"
- "mov %%cs, %0\n\t"
- "pushq %q0\n\t"
- "pushq $1f\n\t"
- "iretq\n\t"
- "1:"
- : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
-}
-#endif /* CONFIG_X86_32 */
-
-/*
- * This function forces the icache and prefetched instruction stream to
- * catch up with reality in two very specific cases:
- *
- * a) Text was modified using one virtual address and is about to be executed
- * from the same physical page at a different virtual address.
- *
- * b) Text was modified on a different CPU, may subsequently be
- * executed on this CPU, and you want to make sure the new version
- * gets executed. This generally means you're calling this in an IPI.
- *
- * If you're calling this for a different reason, you're probably doing
- * it wrong.
- *
- * Like all of Linux's memory ordering operations, this is a
- * compiler barrier as well.
- */
-static __always_inline void sync_core(void)
-{
- /*
- * The SERIALIZE instruction is the most straightforward way to
- * do this, but it is not universally available.
- */
- if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
- serialize();
- return;
- }
-
- /*
- * For all other processors, there are quite a few ways to do this.
- * IRET-to-self is nice because it works on every CPU, at any CPL
- * (so it's compatible with paravirtualization), and it never exits
- * to a hypervisor. The only downsides are that it's a bit slow
- * (it seems to be a bit more than 2x slower than the fastest
- * options) and that it unmasks NMIs. The "push %cs" is needed,
- * because in paravirtual environments __KERNEL_CS may not be a
- * valid CS value when we do IRET directly.
- *
- * In case NMI unmasking or performance ever becomes a problem,
- * the next best option appears to be MOV-to-CR2 and an
- * unconditional jump. That sequence also works on all CPUs,
- * but it will fault at CPL3 (i.e. Xen PV).
- *
- * CPUID is the conventional way, but it's nasty: it doesn't
- * exist on some 486-like CPUs, and it usually exits to a
- * hypervisor.
- */
- iret_to_self();
-}
+extern void sync_core(void);
/*
* Ensure that a core serializing instruction is issued before returning
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e377b06e70e3..2a5daae3626b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -2687,6 +2687,87 @@ void *text_poke_set(void *addr, int c, size_t len)
return addr;
}
+#ifdef CONFIG_X86_32
+static __always_inline void iret_to_self(void)
+{
+ asm volatile (
+ "pushfl\n\t"
+ "pushl %%cs\n\t"
+ "pushl $1f\n\t"
+ "iret\n\t"
+ "1:"
+ : ASM_CALL_CONSTRAINT : : "memory");
+}
+#else
+static __always_inline void iret_to_self(void)
+{
+ unsigned int tmp;
+
+ asm volatile (
+ "mov %%ss, %0\n\t"
+ "pushq %q0\n\t"
+ "pushq %%rsp\n\t"
+ "addq $8, (%%rsp)\n\t"
+ "pushfq\n\t"
+ "mov %%cs, %0\n\t"
+ "pushq %q0\n\t"
+ "pushq $1f\n\t"
+ "iretq\n\t"
+ "1:"
+ : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * This function forces the icache and prefetched instruction stream to
+ * catch up with reality in two very specific cases:
+ *
+ * a) Text was modified using one virtual address and is about to be executed
+ * from the same physical page at a different virtual address.
+ *
+ * b) Text was modified on a different CPU, may subsequently be
+ * executed on this CPU, and you want to make sure the new version
+ * gets executed. This generally means you're calling this in an IPI.
+ *
+ * If you're calling this for a different reason, you're probably doing
+ * it wrong.
+ *
+ * Like all of Linux's memory ordering operations, this is a
+ * compiler barrier as well.
+ */
+noinstr void sync_core(void)
+{
+ /*
+ * The SERIALIZE instruction is the most straightforward way to
+ * do this, but it is not universally available.
+ */
+ if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
+ serialize();
+ return;
+ }
+
+ /*
+ * For all other processors, there are quite a few ways to do this.
+ * IRET-to-self is nice because it works on every CPU, at any CPL
+ * (so it's compatible with paravirtualization), and it never exits
+ * to a hypervisor. The only downsides are that it's a bit slow
+ * (it seems to be a bit more than 2x slower than the fastest
+ * options) and that it unmasks NMIs. The "push %cs" is needed,
+ * because in paravirtual environments __KERNEL_CS may not be a
+ * valid CS value when we do IRET directly.
+ *
+ * In case NMI unmasking or performance ever becomes a problem,
+ * the next best option appears to be MOV-to-CR2 and an
+ * unconditional jump. That sequence also works on all CPUs,
+ * but it will fault at CPL3 (i.e. Xen PV).
+ *
+ * CPUID is the conventional way, but it's nasty: it doesn't
+ * exist on some 486-like CPUs, and it usually exits to a
+ * hypervisor.
+ */
+ iret_to_self();
+}
+
static void do_sync_core(void *info)
{
sync_core();
Powered by blists - more mailing lists