[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CALCETrVZU9w2d+q8qzX6i+SyE1+Wp0XZCUqzsk0F-SD3Wq5noQ@mail.gmail.com>
Date:   Sat, 13 Jan 2018 12:52:01 -0800
From:   Andy Lutomirski <luto@...nel.org>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     Andy Lutomirski <luto@...nel.org>, Willy Tarreau <w@....eu>,
        Peter Zijlstra <peterz@...radead.org>,
        Borislav Petkov <bp@...en8.de>,
        Laura Abbott <labbott@...hat.com>, X86 ML <x86@...nel.org>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        stable <stable@...r.kernel.org>
Subject: Re: Yet another KPTI regression with 4.14.x series in a VM
On Sat, Jan 13, 2018 at 12:45 PM, Thomas Gleixner <tglx@...utronix.de> wrote:
> On Sat, 13 Jan 2018, Andy Lutomirski wrote:
>> Trying to inventory this stuff scattered all over the place:
>>
>> #define PTI_PGTABLE_SWITCH_BIT    PAGE_SHIFT
>> #define PTI_SWITCH_PGTABLES_MASK    (1<<PAGE_SHIFT)
>> # define X86_CR3_PTI_SWITCH_BIT    11
>> #define PTI_SWITCH_MASK
>> (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
>>
>> Blech.  I wouldn't be terribly surprised if I missed a few as well.  How about:
>>
>> PTI_USER_PGTABLE_BIT = PAGE_SHIFT
>> PTI_USER_PGTABLE_MASK = 1 << PTI_USER_PGTABLE_BIT
>> PTI_USER_PCID_BIT = 11
>> PTI_USER_PCID_MASK = 1 << PTI_USER_PCID_BIT
>> PTI_USER_PGTABLE_AND_PCID_MASK = PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK
>>
>> This naming would make the apparently buggy code look fishy, as it
>> should.  I will give this a shot some time soon if no one beats me to
>> it.
>
> Well, the thing we tripped over is that we trusted the SDM that bit 11 is
> ignored. Seems its not and the AMD APM says that reserved bit should be
> cleared. Next time I surely stare into both....
>
> So something like the below should make it clear. I've not done the
> alternatives thing yet...
>
Looks generally sane to me.
>
> 8<-------------------
> --- a/arch/x86/entry/calling.h
> +++ b/arch/x86/entry/calling.h
> @@ -198,8 +198,11 @@ For 32-bit we have the following convent
>   * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
>   * halves:
>   */
> -#define PTI_SWITCH_PGTABLES_MASK       (1<<PAGE_SHIFT)
> -#define PTI_SWITCH_MASK                (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
> +#define PTI_USER_PGTABLE_BIT           PAGE_SHIFT
> +#define PTI_USER_PGTABLE_MASK          (1 << PTI_USER_PGTABLE_BIT)
> +#define PTI_USER_PCID_BIT              X86_CR3_PTI_PCID_USER_BIT
> +#define PTI_USER_PCID_MASK             (1 << PTI_USER_PCID_BIT)
> +#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
>
>  .macro SET_NOFLUSH_BIT reg:req
>         bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
> @@ -208,7 +211,7 @@ For 32-bit we have the following convent
>  .macro ADJUST_KERNEL_CR3 reg:req
>         ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
>         /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
> -       andq    $(~PTI_SWITCH_MASK), \reg
> +       andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
>  .endm
>
>  .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
> @@ -239,15 +242,18 @@ For 32-bit we have the following convent
>         /* Flush needed, clear the bit */
>         btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
>         movq    \scratch_reg2, \scratch_reg
> -       jmp     .Lwrcr3_\@
> +       jmp     .Lwrcr3_pcid_\@
>
>  .Lnoflush_\@:
>         movq    \scratch_reg2, \scratch_reg
>         SET_NOFLUSH_BIT \scratch_reg
>
> +.Lwcr3_pcid_\@:
> +       orq     $(PTI_USER_PCID_MASK), \scratch_reg
> +
>  .Lwrcr3_\@:
>         /* Flip the PGD and ASID to the user version */
> -       orq     $(PTI_SWITCH_MASK), \scratch_reg
> +       orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
>         mov     \scratch_reg, %cr3
>  .Lend_\@:
>  .endm
> @@ -272,7 +278,7 @@ For 32-bit we have the following convent
>          *
>          * That indicates a kernel CR3 value, not a user CR3.
>          */
> -       testq   $(PTI_SWITCH_MASK), \scratch_reg
> +       testq   $(PTI_USER_PGTABLE_MASK), \scratch_reg
>         jz      .Ldone_\@
>
>         ADJUST_KERNEL_CR3 \scratch_reg
> @@ -290,7 +296,7 @@ For 32-bit we have the following convent
>          * KERNEL pages can always resume with NOFLUSH as we do
>          * explicit flushes.
>          */
> -       bt      $X86_CR3_PTI_SWITCH_BIT, \save_reg
> +       bt      $PTI_USER_PGTABLE_BIT, \save_reg
>         jnc     .Lnoflush_\@
>
>         /*
> --- a/arch/x86/include/asm/processor-flags.h
> +++ b/arch/x86/include/asm/processor-flags.h
> @@ -40,7 +40,7 @@
>  #define CR3_NOFLUSH    BIT_ULL(63)
>
>  #ifdef CONFIG_PAGE_TABLE_ISOLATION
> -# define X86_CR3_PTI_SWITCH_BIT        11
> +# define X86_CR3_PTI_PCID_USER_BIT     11
>  #endif
>
>  #else
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
>          * Make sure that the dynamic ASID space does not confict with the
>          * bit we are using to switch between user and kernel ASIDs.
>          */
> -       BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
> +       BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
>
>         /*
>          * The ASID being passed in here should have respected the
>          * MAX_ASID_AVAILABLE and thus never have the switch bit set.
>          */
> -       VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
> +       VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
>  #endif
>         /*
>          * The dynamically-assigned ASIDs that get passed in are small
> @@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
>  {
>         u16 ret = kern_pcid(asid);
>  #ifdef CONFIG_PAGE_TABLE_ISOLATION
> -       ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
> +       ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
>  #endif
>         return ret;
>  }
>
>
>
Powered by blists - more mailing lists
 
