lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Tue, 31 Mar 2020 23:56:33 +0000
From:   "Singh, Balbir" <sblbir@...zon.com>
To:     "tglx@...utronix.de" <tglx@...utronix.de>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
CC:     "keescook@...omium.org" <keescook@...omium.org>,
        "tony.luck@...el.com" <tony.luck@...el.com>,
        "benh@...nel.crashing.org" <benh@...nel.crashing.org>,
        "x86@...nel.org" <x86@...nel.org>,
        "dave.hansen@...el.com" <dave.hansen@...el.com>
Subject: Re: [RFC PATCH v2 3/4] arch/x86: Optionally flush L1D on context switch

On Tue, 2020-03-31 at 20:34 +0200, Thomas Gleixner wrote:
> 
> Balbir Singh <sblbir@...zon.com> writes:
> 
> > This patch implements a mechanisn to selectively flush the L1D cache.
> 
> git grep 'This patch' Documentation/process/
> 

I'll get more imperative, thanks!

> > diff --git a/arch/x86/include/asm/tlbflush.h
> > b/arch/x86/include/asm/tlbflush.h
> > index 6f66d841262d..1d535059b358 100644
> > --- a/arch/x86/include/asm/tlbflush.h
> > +++ b/arch/x86/include/asm/tlbflush.h
> > @@ -219,6 +219,12 @@ struct tlb_state {
> >        */
> >       unsigned long cr4;
> > 
> > +     /*
> > +      * Flush the L1D cache on switch_mm_irqs_off() for a
> > +      * task getting off the CPU, if it opted in to do so
> > +      */
> > +     bool last_user_mm_l1d_flush;
> 
> ...
> 
> > +/*
> > + * Flush the L1D cache for this CPU. We want to this at switch mm time,
> > + * this is a pessimistic security measure and an opt-in for those tasks
> > + * that host sensitive information and there are concerns about spills
> > + * from fill buffers.
> 
> Color me confused, but how is L1D flush mitigating fill buffer spills
> (MFBDS)? The MDS family is mitigated by MD_CLEAR, i.e VERW.

I should reword that sentence to say snoop from L1D

> 
> > + */
> > +static void l1d_flush(struct mm_struct *next, struct task_struct *tsk)
> > +{
> > +     struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
> > +
> > +     /*
> > +      * If we are not really switching mm's, we can just return
> > +      */
> > +     if (real_prev == next)
> > +             return;
> 
> Instead of having the same check here, please stick the call into the
> corresponding path in switch_mm_irqs_off(), i.e. where we already have
> the cond_ibpb() invocation.
> 

Sure, will do

> > +     /*
> > +      * Do we need flushing for by the previous task
> 
>   for by? Perhaps:

I'll fix that comment up

> 
>   Did the previous task request L1D flush when it scheduled in?
> 
> > +      */
> > +     if (this_cpu_read(cpu_tlbstate.last_user_mm_l1d_flush) != 0) {
> 
> This is a bool, so != 0 is pointless.
> 
> > +             if (!flush_l1d_cache_hw())
> > +                     flush_l1d_cache_sw(l1d_flush_pages);
> > +             this_cpu_write(cpu_tlbstate.last_user_mm_l1d_flush, 0);
> 
> s/0/false/
> 

Will do

> > +             /* Make sure we clear the values before we set it again */
> > +             barrier();
> > +     }
> > +
> > +     if (tsk == NULL)
> > +             return;
> > +
> > +     /* We don't need stringent checks as we opt-in/opt-out */
> > +     if (test_ti_thread_flag(&tsk->thread_info, TIF_L1D_FLUSH))
> > +             this_cpu_write(cpu_tlbstate.last_user_mm_l1d_flush, 1);
> 
> s/1/true/
> 
> That aside looking at the gazillion of conditionals here. That's 4 in
> the worst case. So how about extending cond_ibpb() like the below?
> 
> Thanks,
> 
>         tglx
> 

Makes sense, it mostly looks good! Let me refactor the comments and code.

Balbir

> 8<---------------------
> 
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -84,7 +84,7 @@ struct thread_info {
>  #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
>  #define TIF_SECCOMP            8       /* secure computing */
>  #define TIF_SPEC_IB            9       /* Indirect branch speculation
> mitigation */
> -#define TIF_SPEC_FORCE_UPDATE  10      /* Force speculation MSR update in
> context switch */
> +#define TIF_SPEC_FLUSH_L1D     10      /* L1D Flush in switch_mm() */
>  #define TIF_USER_RETURN_NOTIFY 11      /* notify kernel of userspace return
> */
>  #define TIF_UPROBE             12      /* breakpointed or singlestepping */
>  #define TIF_PATCH_PENDING      13      /* pending live patching update */
> @@ -96,6 +96,7 @@ struct thread_info {
>  #define TIF_MEMDIE             20      /* is terminating due to OOM killer
> */
>  #define TIF_POLLING_NRFLAG     21      /* idle is polling for
> TIF_NEED_RESCHED */
>  #define TIF_IO_BITMAP          22      /* uses I/O bitmap */
> +#define TIF_SPEC_FORCE_UPDATE  23      /* Force speculation MSR update in
> context switch */
>  #define TIF_FORCED_TF          24      /* true if TF in eflags artificially
> */
>  #define TIF_BLOCKSTEP          25      /* set when we want DEBUGCTLMSR_BTF
> */
>  #define TIF_LAZY_MMU_UPDATES   27      /* task is updating the mmu lazily
> */
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -172,7 +172,7 @@ struct tlb_state {
>         /* Last user mm for optimizing IBPB */
>         union {
>                 struct mm_struct        *last_user_mm;
> -               unsigned long           last_user_mm_ibpb;
> +               unsigned long           last_user_mm_spec;
>         };
> 
>         u16 loaded_mm_asid;
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -33,10 +33,13 @@
>   */
> 
>  /*
> - * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
> - * stored in cpu_tlb_state.last_user_mm_ibpb.
> + * Bits mangle the TIF_SPEC_* state into the mm pointer which is
> + * stored in cpu_tlb_state.last_user_mm_spec.
>   */
>  #define LAST_USER_MM_IBPB      0x1UL
> +#define LAST_USER_MM_FLUSH_L1D 0x2UL
> +
> +#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_FLUSH_L1D)
> 
>  /*
>   * We get here when we do something requiring a TLB invalidation
> @@ -189,18 +192,22 @@ static void sync_current_stack_to_mm(str
>         }
>  }
> 
> -static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
> +static inline unsigned long mm_mangle_tif_spec(struct task_struct *next)
>  {
>         unsigned long next_tif = task_thread_info(next)->flags;
> -       unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
> +       unsigned long bits = (next_tif >> TIF_SPEC_IB) &
> LAST_USER_MM_SPEC_MASK;
> +
> +       BUILD_BUG_ON(TIF_SPEC_FLUSH_L1D != TIF_SPEC_IB + 1);
> 
> -       return (unsigned long)next->mm | ibpb;
> +       return (unsigned long)next->mm | bits;
>  }
> 
> -static void cond_ibpb(struct task_struct *next)
> +static void cond_mitigations(struct task_struct *next)
>  {
> -       if (!next || !next->mm)
> -               return;
> +       unsigned long prev_mm, next_mm;
> +
> +       prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
> +       next_mm = mm_mangle_tif_spec(next);
> 
>         /*
>          * Both, the conditional and the always IBPB mode use the mm
> @@ -212,8 +219,6 @@ static void cond_ibpb(struct task_struct
>          * exposed data is not really interesting.
>          */
>         if (static_branch_likely(&switch_mm_cond_ibpb)) {
> -               unsigned long prev_mm, next_mm;
> -
>                 /*
>                  * This is a bit more complex than the always mode because
>                  * it has to handle two cases:
> @@ -243,20 +248,14 @@ static void cond_ibpb(struct task_struct
>                  * Optimize this with reasonably small overhead for the
>                  * above cases. Mangle the TIF_SPEC_IB bit into the mm
>                  * pointer of the incoming task which is stored in
> -                * cpu_tlbstate.last_user_mm_ibpb for comparison.
> -                */
> -               next_mm = mm_mangle_tif_spec_ib(next);
> -               prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
> -
> -               /*
> +                * cpu_tlbstate.last_user_mm_spec for comparison.
> +                *
>                  * Issue IBPB only if the mm's are different and one or
>                  * both have the IBPB bit set.
>                  */
>                 if (next_mm != prev_mm &&
>                     (next_mm | prev_mm) & LAST_USER_MM_IBPB)
>                         indirect_branch_prediction_barrier();
> -
> -               this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
>         }
> 
>         if (static_branch_unlikely(&switch_mm_always_ibpb)) {
> @@ -265,11 +264,15 @@ static void cond_ibpb(struct task_struct
>                  * different context than the user space task which ran
>                  * last on this CPU.
>                  */
> -               if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
> +               if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) !=
> +                   (unsigned long)next->mm)
>                         indirect_branch_prediction_barrier();
> -                       this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
> -               }
>         }
> +
> +       if (prev_mm & LAST_USER_MM_FLUSH_L1D)
> +               flush_l1d();
> +
> +       this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
>  }
> 
>  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> @@ -371,11 +374,10 @@ void switch_mm_irqs_off(struct mm_struct
>                 need_flush = true;
>         } else {
>                 /*
> -                * Avoid user/user BTB poisoning by flushing the branch
> -                * predictor when switching between processes. This stops
> -                * one process from doing Spectre-v2 attacks on another.
> +                * Speculation vulnerability mitigations when switching
> +                * to a different user space process.
>                  */
> -               cond_ibpb(tsk);
> +               cond_mitigations(tsk);
> 
>                 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
>                         /*
> @@ -501,7 +503,7 @@ void initialize_tlbstate_and_flush(void)
>         write_cr3(build_cr3(mm->pgd, 0));
> 
>         /* Reinitialize tlbstate. */
> -       this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
> +       this_cpu_write(cpu_tlbstate.last_user_mm_spec,
> LAST_USER_MM_SPEC_MASK);
>         this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
>         this_cpu_write(cpu_tlbstate.next_asid, 1);
>         this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ