lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Wed, 15 Nov 2017 19:44:19 -0800
From:   Andy Lutomirski <luto@...nel.org>
To:     Nadav Amit <nadav.amit@...il.com>
Cc:     "H. Peter Anvin" <hpa@...or.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...en8.de>,
        "the arch/x86 maintainers" <x86@...nel.org>,
        Andy Lutomirski <luto@...nel.org>,
        LKML <linux-kernel@...r.kernel.org>
Subject: Re: x86: CR4 update when IRQs are enabled

I think Nadav is right here.  IMO the right fix is to rename the
functions cr4_set_bits_irqs_off() etc, add a warning (if lockdep is
on) and fix the callers.

On Wed, Nov 15, 2017 at 4:34 PM, Nadav Amit <nadav.amit@...il.com> wrote:
> hpa@...or.com wrote:
>
>> On November 15, 2017 3:31:50 PM PST, Nadav Amit <nadav.amit@...il.com> wrote:
>>> Ping?
>>>
>>> Nadav Amit <nadav.amit@...il.com> wrote:
>>>
>>>> CC’ing more people, and adding a patch to clarify...
>>>>
>>>> Nadav Amit <nadav.amit@...il.com> wrote:
>>>>
>>>>> I am puzzled by the comment in tlb_state.cr4 , which says:
>>>>>
>>>>>      /*
>>>>>       * Access to this CR4 shadow and to H/W CR4 is protected by
>>>>>       * disabling interrupts when modifying either one.
>>>>>       */
>>>>>
>>>>> This does not seem to be true and adding a warning to CR4 writes
>>> when
>>>>> !irqs_disabled() immediately fires in identify_cpu() and
>>>>> __mcheck_cpu_init_generic(). While these two are called during boot,
>>> I think
>>>>> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
>>>>>
>>>>> So my question(s): Is the comment correct? Is the current behavior
>>> correct?
>>>> So here is what I have in mind. I am not sure whether
>>> CONFIG_DEBUG_PREEMPT is
>>>> the right #ifdef. Let me know what you think.
>>>>
>>>> -- >8 --
>>>>
>>>> Subject: [PATCH] x86: disable IRQs before changing CR4
>>>>
>>>> CR4 changes need to be performed while IRQs are disabled in order to
>>>> update the CR4 shadow and the actual register atomically.
>>>>
>>>> Signed-off-by: Nadav Amit <namit@...are.com>
>>>> ---
>>>> arch/x86/include/asm/tlbflush.h      | 18 ++++++++++++------
>>>> arch/x86/kernel/cpu/common.c         | 13 ++++++++++++-
>>>> arch/x86/kernel/cpu/mcheck/mce.c     |  3 +++
>>>> arch/x86/kernel/cpu/mcheck/p5.c      |  4 ++++
>>>> arch/x86/kernel/cpu/mcheck/winchip.c |  3 +++
>>>> arch/x86/kernel/process.c            | 14 ++++++++++++--
>>>> 6 files changed, 46 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/tlbflush.h
>>> b/arch/x86/include/asm/tlbflush.h
>>>> index 50ea3482e1d1..bc70dd1cc7c6 100644
>>>> --- a/arch/x86/include/asm/tlbflush.h
>>>> +++ b/arch/x86/include/asm/tlbflush.h
>>>> @@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
>>>>     this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
>>>> }
>>>>
>>>> +static inline void update_cr4(unsigned long cr4)
>>>> +{
>>>> +#ifdef CONFIG_DEBUG_PREEMPT
>>>> +   WARN_ON_ONCE(!irqs_disabled());
>>>> +#endif
>>>> +   this_cpu_write(cpu_tlbstate.cr4, cr4);
>>>> +   __write_cr4(cr4);
>>>> +}
>>>> +
>>>> /* Set in this cpu's CR4. */
>>>> static inline void cr4_set_bits(unsigned long mask)
>>>> {
>>>> @@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long
>>> mask)
>>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>>>     if ((cr4 | mask) != cr4) {
>>>>             cr4 |= mask;
>>>> -           this_cpu_write(cpu_tlbstate.cr4, cr4);
>>>> -           __write_cr4(cr4);
>>>> +           update_cr4(cr4);
>>>>     }
>>>> }
>>>>
>>>> @@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long
>>> mask)
>>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>>>     if ((cr4 & ~mask) != cr4) {
>>>>             cr4 &= ~mask;
>>>> -           this_cpu_write(cpu_tlbstate.cr4, cr4);
>>>> -           __write_cr4(cr4);
>>>> +           update_cr4(cr4);
>>>>     }
>>>> }
>>>>
>>>> @@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long
>>> mask)
>>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>>>     cr4 ^= mask;
>>>> -   this_cpu_write(cpu_tlbstate.cr4, cr4);
>>>> -   __write_cr4(cr4);
>>>> +   update_cr4(cr4);
>>>> }
>>>>
>>>> /* Read the CR4 shadow. */
>>>> diff --git a/arch/x86/kernel/cpu/common.c
>>> b/arch/x86/kernel/cpu/common.c
>>>> index c8b39870f33e..82e6b41fd5e9 100644
>>>> --- a/arch/x86/kernel/cpu/common.c
>>>> +++ b/arch/x86/kernel/cpu/common.c
>>>> @@ -318,6 +318,8 @@ static bool pku_disabled;
>>>>
>>>> static __always_inline void setup_pku(struct cpuinfo_x86 *c)
>>>> {
>>>> +   unsigned long flags;
>>>> +
>>>>     /* check the boot processor, plus compile options for PKU: */
>>>>     if (!cpu_feature_enabled(X86_FEATURE_PKU))
>>>>             return;
>>>> @@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct
>>> cpuinfo_x86 *c)
>>>> if (pku_disabled)
>>>>             return;
>>>>
>>>> +   local_irq_save(flags);
>>>>     cr4_set_bits(X86_CR4_PKE);
>>>> +   local_irq_restore(flags);
>>>> +
>>>>     /*
>>>>      * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
>>>>      * cpuid bit to be set.  We need to ensure that we
>>>> @@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct
>>> cpuinfo_x86 *c)
>>>> */
>>>> static void identify_cpu(struct cpuinfo_x86 *c)
>>>> {
>>>> +   unsigned long flags;
>>>>     int i;
>>>>
>>>>     c->loops_per_jiffy = loops_per_jiffy;
>>>> @@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86
>>> *c)
>>>> /* Disable the PN if appropriate */
>>>>     squash_the_stupid_serial_number(c);
>>>>
>>>> -   /* Set up SMEP/SMAP */
>>>> +   /*
>>>> +    * Set up SMEP/SMAP. Disable interrupts to prevent triggering a
>>> warning
>>>> +    * as CR4 changes must be done with disabled interrupts.
>>>> +    */
>>>> +   local_irq_save(flags);
>>>>     setup_smep(c);
>>>>     setup_smap(c);
>>>> +   local_irq_restore(flags);
>>>>
>>>>     /*
>>>>      * The vendor-specific functions might have changed features.
>>>> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
>>> b/arch/x86/kernel/cpu/mcheck/mce.c
>>>> index 3b413065c613..497c07e33c25 100644
>>>> --- a/arch/x86/kernel/cpu/mcheck/mce.c
>>>> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
>>>> @@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
>>>> {
>>>>     enum mcp_flags m_fl = 0;
>>>>     mce_banks_t all_banks;
>>>> +   unsigned long flags;
>>>>     u64 cap;
>>>>
>>>>     if (!mca_cfg.bootlog)
>>>> @@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
>>>>     bitmap_fill(all_banks, MAX_NR_BANKS);
>>>>     machine_check_poll(MCP_UC | m_fl, &all_banks);
>>>>
>>>> +   local_irq_save(flags);
>>>>     cr4_set_bits(X86_CR4_MCE);
>>>> +   local_irq_restore(flags);
>>>>
>>>>     rdmsrl(MSR_IA32_MCG_CAP, cap);
>>>>     if (cap & MCG_CTL_P)
>>>> diff --git a/arch/x86/kernel/cpu/mcheck/p5.c
>>> b/arch/x86/kernel/cpu/mcheck/p5.c
>>>> index 2a0717bf8033..d5d4963415e9 100644
>>>> --- a/arch/x86/kernel/cpu/mcheck/p5.c
>>>> +++ b/arch/x86/kernel/cpu/mcheck/p5.c
>>>> @@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs
>>> *regs, long error_code)
>>>> /* Set up machine check reporting for processors with Intel style
>>> MCE: */
>>>> void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>>>> {
>>>> +   unsigned long flags;
>>>>     u32 l, h;
>>>>
>>>>     /* Default P5 to off as its often misconnected: */
>>>> @@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>>>>     pr_info("Intel old style machine check architecture supported.\n");
>>>>
>>>>     /* Enable MCE: */
>>>> +   local_irq_save(flags);
>>>>     cr4_set_bits(X86_CR4_MCE);
>>>> +   local_irq_restore(flags);
>>>> +
>>>>     pr_info("Intel old style machine check reporting enabled on
>>> CPU#%d.\n",
>>>> smp_processor_id());
>>>> }
>>>> diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c
>>> b/arch/x86/kernel/cpu/mcheck/winchip.c
>>>> index c6a722e1d011..6dd985e3849d 100644
>>>> --- a/arch/x86/kernel/cpu/mcheck/winchip.c
>>>> +++ b/arch/x86/kernel/cpu/mcheck/winchip.c
>>>> @@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs
>>> *regs, long error_code)
>>>> /* Set up machine check reporting on the Winchip C6 series */
>>>> void winchip_mcheck_init(struct cpuinfo_x86 *c)
>>>> {
>>>> +   unsigned long flags;
>>>>     u32 lo, hi;
>>>>
>>>>     machine_check_vector = winchip_machine_check;
>>>> @@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
>>>>     lo &= ~(1<<4);  /* Enable MCE */
>>>>     wrmsr(MSR_IDT_FCR1, lo, hi);
>>>>
>>>> +   local_irq_save(flags);
>>>>     cr4_set_bits(X86_CR4_MCE);
>>>> +   local_irq_restore(flags);
>>>>
>>>>     pr_info("Winchip machine check reporting enabled on CPU#0.\n");
>>>> }
>>>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>>>> index 3ca198080ea9..09e44e784745 100644
>>>> --- a/arch/x86/kernel/process.c
>>>> +++ b/arch/x86/kernel/process.c
>>>> @@ -127,25 +127,35 @@ void flush_thread(void)
>>>>
>>>> void disable_TSC(void)
>>>> {
>>>> +   unsigned long flags;
>>>> +
>>>>     preempt_disable();
>>>> -   if (!test_and_set_thread_flag(TIF_NOTSC))
>>>> +   if (!test_and_set_thread_flag(TIF_NOTSC)) {
>>>>             /*
>>>>              * Must flip the CPU state synchronously with
>>>>              * TIF_NOTSC in the current running context.
>>>>              */
>>>> +           local_irq_save(flags);
>>>>             cr4_set_bits(X86_CR4_TSD);
>>>> +           local_irq_restore(flags);
>>>> +   }
>>>>     preempt_enable();
>>>> }
>>>>
>>>> static void enable_TSC(void)
>>>> {
>>>> +   unsigned long flags;
>>>> +
>>>>     preempt_disable();
>>>> -   if (test_and_clear_thread_flag(TIF_NOTSC))
>>>> +   if (test_and_clear_thread_flag(TIF_NOTSC)) {
>>>>             /*
>>>>              * Must flip the CPU state synchronously with
>>>>              * TIF_NOTSC in the current running context.
>>>>              */
>>>> +           local_irq_save(flags);
>>>>             cr4_clear_bits(X86_CR4_TSD);
>>>> +           local_irq_restore(flags);
>>>> +   }
>>>>     preempt_enable();
>>>> }
>>
>> This is wrong on at least two levels colon first of all, this should not be wrapped around the abstracted operations but put inside them if relevant. Second, I suspect that this is not at all a requirement but rather that as long as the hardware register is written second, I think we should always be safe.
>
> Thanks for your reply.
>
> Can you please explain your suspicion? Here is a simple execution that may
> fail (assume cr4 is initially zero):
>
>   CPU0
>   ====
>   cr4_set_bits(1)
>     => cpu_tlbstate.cr4 = 1
>          => interrupt
>             cr4_set_bits(2)
>             cpu_tlbstate.cr4 = 3
>             __write_cr4(3)
>     => __write_cr4(1)           [ and should have been 3 ]
>
> Am I missing anything?
>
> Now, it may be a theoretical issue right now, since I did not find any
> change of CR4 bits that happens in an interrupt handler.
>
> Anyhow, if you want me to submit a fix, please let me know what other levels
> of wrongness you had in mind.
>
> Regards,
> Nadav
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ