[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171125032907.2241-3-namit@vmware.com>
Date: Fri, 24 Nov 2017 19:29:07 -0800
From: Nadav Amit <namit@...are.com>
To: <linux-kernel@...r.kernel.org>, <linux-edac@...r.kernel.org>
CC: <nadav.amit@...il.com>, Nadav Amit <namit@...are.com>,
Andy Lutomirski <luto@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
"H. Peter Anvin" <hpa@...or.com>, <x86@...nel.org>,
"Tony Luck" <tony.luck@...el.com>, Borislav Petkov <bp@...en8.de>,
Paolo Bonzini <pbonzini@...hat.com>,
Radim Krčmář <rkrcmar@...hat.com>
Subject: [PATCH v2 2/2] x86: disable IRQs before changing CR4
CR4 changes need to be performed while IRQs are disabled in order to
update the CR4 shadow and the actual register atomically. Actually, they
are needed regardless of CR4 shadowing, since CR4 are performed in a
read-modify-write manner.
Currently, however, this is not the case, as can be experienced by
adding warnings when CR4 updates are performed and interrupts are
enabled. It also appears that CR4 changes with enabled interrupts can be
triggered by the user (PR_SET_TSC).
If CR4 updates are done while interrupts are enabled, an interrupt can
be delivered between the CR4 read and the corresponding write of the
modified value to CR4. If the interrupt handler changes CR4, the write
would ignore the modified value.
It is not clear there are currently interrupt handlers that modify CR4
and do not restore immediately the original value, but there is an
interrupt handler that changes CR4, when global PTEs are invalidated.
Moreover, a recent patch considered doing change CR4 inside the
interrupt handler, emphasizing that the current scheme is error-prone.
Prevent the issue by: adding a debug warning if CR4 is updated with
enabled interrupts; changing CR4 manipulation function names to reflect
the fact they need disabled IRQs and fix the callers.
Note that in some cases, e.g., kvm_cpu_vmxon(), it appears that saving
and restoring the IRQs could have been spared. Yet, since these calls do
not appear to be on the hot path, save and restore the IRQs to be on the
safe side.
Cc: Andy Lutomirski <luto@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: x86@...nel.org
Cc: Tony Luck <tony.luck@...el.com>
Cc: Borislav Petkov <bp@...en8.de>
Cc: Paolo Bonzini <pbonzini@...hat.com>
Cc: "Radim Krčmář" <rkrcmar@...hat.com>
Signed-off-by: Nadav Amit <namit@...are.com>
---
arch/x86/include/asm/mmu_context.h | 4 ++--
arch/x86/include/asm/tlbflush.h | 16 +++++++++++----
arch/x86/include/asm/virtext.h | 2 +-
arch/x86/kernel/cpu/common.c | 38 ++++++++++++++++++++++++++----------
arch/x86/kernel/cpu/mcheck/mce.c | 5 ++++-
arch/x86/kernel/cpu/mcheck/p5.c | 6 +++++-
arch/x86/kernel/cpu/mcheck/winchip.c | 5 ++++-
arch/x86/kernel/fpu/init.c | 2 +-
arch/x86/kernel/fpu/xstate.c | 4 ++--
arch/x86/kernel/process.c | 20 ++++++++++++++-----
arch/x86/kernel/reboot.c | 2 +-
arch/x86/kvm/vmx.c | 13 ++++++++++--
arch/x86/mm/init.c | 6 +++++-
13 files changed, 91 insertions(+), 32 deletions(-)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6699fc441644..637cbda77eda 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -30,9 +30,9 @@ static inline void load_mm_cr4(struct mm_struct *mm)
{
if (static_key_false(&rdpmc_always_available) ||
atomic_read(&mm->context.perf_rdpmc_allowed))
- cr4_set_bits(X86_CR4_PCE);
+ cr4_set_bits_irqs_off(X86_CR4_PCE);
else
- cr4_clear_bits(X86_CR4_PCE);
+ cr4_clear_bits_irqs_off(X86_CR4_PCE);
}
#else
static inline void load_mm_cr4(struct mm_struct *mm) {}
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index e736f7f0ba92..6b94dcf8c500 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -175,12 +175,15 @@ static inline void cr4_init_shadow(void)
static inline void __cr4_set(unsigned long cr4)
{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON(!irqs_disabled());
+#endif
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
/* Set in this cpu's CR4. */
-static inline void cr4_set_bits(unsigned long mask)
+static inline void cr4_set_bits_irqs_off(unsigned long mask)
{
unsigned long cr4;
@@ -190,7 +193,7 @@ static inline void cr4_set_bits(unsigned long mask)
}
/* Clear in this cpu's CR4. */
-static inline void cr4_clear_bits(unsigned long mask)
+static inline void cr4_clear_bits_irqs_off(unsigned long mask)
{
unsigned long cr4;
@@ -199,7 +202,7 @@ static inline void cr4_clear_bits(unsigned long mask)
__cr4_set(cr4 & ~mask);
}
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqs_off(unsigned long mask)
{
unsigned long cr4;
@@ -224,10 +227,15 @@ extern u32 *trampoline_cr4_features;
static inline void cr4_set_bits_and_update_boot(unsigned long mask)
{
+ unsigned long flags;
+
mmu_cr4_features |= mask;
if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features;
- cr4_set_bits(mask);
+
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(mask);
+ local_irq_restore(flags);
}
extern void initialize_tlbstate_and_flush(void);
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 0116b2ee9e64..b403ca417b7d 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -41,7 +41,7 @@ static inline int cpu_has_vmx(void)
static inline void cpu_vmxoff(void)
{
asm volatile (ASM_VMX_VMXOFF : : : "cc");
- cr4_clear_bits(X86_CR4_VMXE);
+ cr4_clear_bits_irqs_off(X86_CR4_VMXE);
}
static inline int cpu_vmx_enabled(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176bae7fd8..f31890787d01 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -302,8 +302,14 @@ __setup("nosmep", setup_disable_smep);
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
{
- if (cpu_has(c, X86_FEATURE_SMEP))
- cr4_set_bits(X86_CR4_SMEP);
+ unsigned long flags;
+
+ if (!cpu_has(c, X86_FEATURE_SMEP))
+ return;
+
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(X86_CR4_SMEP);
+ local_irq_restore(flags);
}
static __init int setup_disable_smap(char *arg)
@@ -320,13 +326,16 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
/* This should have been cleared long ago */
BUG_ON(eflags & X86_EFLAGS_AC);
- if (cpu_has(c, X86_FEATURE_SMAP)) {
+ if (!cpu_has(c, X86_FEATURE_SMAP))
+ return;
+
+ local_irq_save(eflags);
#ifdef CONFIG_X86_SMAP
- cr4_set_bits(X86_CR4_SMAP);
+ cr4_set_bits_irqs_off(X86_CR4_SMAP);
#else
- cr4_clear_bits(X86_CR4_SMAP);
+ cr4_clear_bits_irqs_off(X86_CR4_SMAP);
#endif
- }
+ local_irq_restore(eflags);
}
/*
@@ -336,6 +345,8 @@ static bool pku_disabled;
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
{
+ unsigned long flags;
+
/* check the boot processor, plus compile options for PKU: */
if (!cpu_feature_enabled(X86_FEATURE_PKU))
return;
@@ -345,7 +356,10 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
if (pku_disabled)
return;
- cr4_set_bits(X86_CR4_PKE);
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(X86_CR4_PKE);
+ local_irq_restore(flags);
+
/*
* Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
* cpuid bit to be set. We need to ensure that we
@@ -1147,7 +1161,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);
- /* Set up SMEP/SMAP */
+ /*
+ * Set up SMEP/SMAP. Disable interrupts to prevent triggering a warning
+ * as CR4 changes must be done with disabled interrupts.
+ */
setup_smep(c);
setup_smap(c);
@@ -1520,7 +1537,7 @@ void cpu_init(void)
pr_debug("Initializing CPU#%d\n", cpu);
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ cr4_clear_bits_irqs_off(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
/*
* Initialize the per-CPU GDT with the boot GDT,
@@ -1613,7 +1630,8 @@ void cpu_init(void)
if (cpu_feature_enabled(X86_FEATURE_VME) ||
boot_cpu_has(X86_FEATURE_TSC) ||
boot_cpu_has(X86_FEATURE_DE))
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ cr4_clear_bits_irqs_off(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|
+ X86_CR4_DE);
load_current_idt();
switch_to_new_gdt(cpu);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..1aac196eb145 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
{
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
+ unsigned long flags;
u64 cap;
if (!mca_cfg.bootlog)
@@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | m_fl, &all_banks);
- cr4_set_bits(X86_CR4_MCE);
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(X86_CR4_MCE);
+ local_irq_restore(flags);
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 5cddf831720f..e4bb9573cfe5 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -43,6 +43,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
/* Set up machine check reporting for processors with Intel style MCE: */
void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
+ unsigned long flags;
u32 l, h;
/* Default P5 to off as its often misconnected: */
@@ -63,7 +64,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
pr_info("Intel old style machine check architecture supported.\n");
/* Enable MCE: */
- cr4_set_bits(X86_CR4_MCE);
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(X86_CR4_MCE);
+ local_irq_restore(flags);
+
pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
}
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 3b45b270a865..72213d75c865 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -27,6 +27,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
/* Set up machine check reporting on the Winchip C6 series */
void winchip_mcheck_init(struct cpuinfo_x86 *c)
{
+ unsigned long flags;
u32 lo, hi;
machine_check_vector = winchip_machine_check;
@@ -38,7 +39,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
lo &= ~(1<<4); /* Enable MCE */
wrmsr(MSR_IDT_FCR1, lo, hi);
- cr4_set_bits(X86_CR4_MCE);
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(X86_CR4_MCE);
+ local_irq_restore(flags);
pr_info("Winchip machine check reporting enabled on CPU#0.\n");
}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e3d9a5..db57b217e123 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -23,7 +23,7 @@ static void fpu__init_cpu_generic(void)
if (boot_cpu_has(X86_FEATURE_XMM))
cr4_mask |= X86_CR4_OSXMMEXCPT;
if (cr4_mask)
- cr4_set_bits(cr4_mask);
+ cr4_set_bits_irqs_off(cr4_mask);
cr0 = read_cr0();
cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476c9022..9d3c7a1a4ce5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -237,7 +237,7 @@ void fpu__init_cpu_xstate(void)
xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
- cr4_set_bits(X86_CR4_OSXSAVE);
+ cr4_set_bits_irqs_off(X86_CR4_OSXSAVE);
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
}
@@ -713,7 +713,7 @@ static int init_xstate_size(void)
static void fpu__init_disable_system_xstate(void)
{
xfeatures_mask = 0;
- cr4_clear_bits(X86_CR4_OSXSAVE);
+ cr4_clear_bits_irqs_off(X86_CR4_OSXSAVE);
fpu__xstate_clear_all_cpu_caps();
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c67685337c5a..412265fe14df 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -128,25 +128,35 @@ void flush_thread(void)
void disable_TSC(void)
{
+ unsigned long flags;
+
preempt_disable();
- if (!test_and_set_thread_flag(TIF_NOTSC))
+ if (!test_and_set_thread_flag(TIF_NOTSC)) {
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- cr4_set_bits(X86_CR4_TSD);
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(X86_CR4_TSD);
+ local_irq_restore(flags);
+ }
preempt_enable();
}
static void enable_TSC(void)
{
+ unsigned long flags;
+
preempt_disable();
- if (test_and_clear_thread_flag(TIF_NOTSC))
+ if (test_and_clear_thread_flag(TIF_NOTSC)) {
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- cr4_clear_bits(X86_CR4_TSD);
+ local_irq_save(flags);
+ cr4_clear_bits_irqs_off(X86_CR4_TSD);
+ local_irq_restore(flags);
+ }
preempt_enable();
}
@@ -293,7 +303,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
}
if ((tifp ^ tifn) & _TIF_NOTSC)
- cr4_toggle_bits(X86_CR4_TSD);
+ cr4_toggle_bits_irqs_off(X86_CR4_TSD);
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2126b9d27c34..86ad70c02607 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -109,7 +109,7 @@ void __noreturn machine_real_restart(unsigned int type)
/* Exiting long mode will fail if CR4.PCIDE is set. */
if (static_cpu_has(X86_FEATURE_PCID))
- cr4_clear_bits(X86_CR4_PCIDE);
+ cr4_clear_bits_irqs_off(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a6f4f095f8f4..a0b387dfbf5c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3500,7 +3500,12 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
- cr4_set_bits(X86_CR4_VMXE);
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(X86_CR4_VMXE);
+ local_irq_restore(flags);
+
intel_pt_handle_vmx(1);
asm volatile (ASM_VMX_VMXON_RAX
@@ -3565,10 +3570,14 @@ static void vmclear_local_loaded_vmcss(void)
*/
static void kvm_cpu_vmxoff(void)
{
+ unsigned long flags;
+
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
intel_pt_handle_vmx(0);
- cr4_clear_bits(X86_CR4_VMXE);
+ local_irq_save(flags);
+ cr4_clear_bits_irqs_off(X86_CR4_VMXE);
+ local_irq_restore(flags);
}
static void hardware_disable(void)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index af5c1ed21d43..ddd248acab8e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -199,6 +199,8 @@ static void setup_pcid(void)
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_PCID)) {
if (boot_cpu_has(X86_FEATURE_PGE)) {
+ unsigned long flags;
+
/*
* This can't be cr4_set_bits_and_update_boot() --
* the trampoline code can't handle CR4.PCIDE and
@@ -210,7 +212,9 @@ static void setup_pcid(void)
* Instead, we brute-force it and set CR4.PCIDE
* manually in start_secondary().
*/
- cr4_set_bits(X86_CR4_PCIDE);
+ local_irq_save(flags);
+ cr4_set_bits_irqs_off(X86_CR4_PCIDE);
+ local_irq_restore(flags);
} else {
/*
* flush_tlb_all(), as currently implemented, won't
--
2.14.1
Powered by blists - more mailing lists