linux-kernel - Re: [PATCH v3 4/5] arch/x86: Optionally flush L1D on context switch

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bdf950167b86a01db93a9633b708c17a66ad73d0.camel@amazon.com>
Date:   Sat, 18 Apr 2020 01:30:03 +0000
From:   "Singh, Balbir" <sblbir@...zon.com>
To:     "tglx@...utronix.de" <tglx@...utronix.de>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
CC:     "keescook@...omium.org" <keescook@...omium.org>,
        "tony.luck@...el.com" <tony.luck@...el.com>,
        "benh@...nel.crashing.org" <benh@...nel.crashing.org>,
        "jpoimboe@...hat.com" <jpoimboe@...hat.com>,
        "x86@...nel.org" <x86@...nel.org>,
        "dave.hansen@...el.com" <dave.hansen@...el.com>
Subject: Re: [PATCH v3 4/5] arch/x86: Optionally flush L1D on context switch

On Fri, 2020-04-17 at 16:41 +0200, Thomas Gleixner wrote:
> 
> Balbir Singh <sblbir@...zon.com> writes:
> 
> > +static void *l1d_flush_pages;
> > +static DEFINE_MUTEX(l1d_flush_mutex);
> > +
> > +int enable_l1d_flush_for_task(struct task_struct *tsk)
> 
> static ?
> 
> > +{
> > +     struct page *page;
> > +     int ret = 0;
> > +
> > +     if (static_cpu_has(X86_FEATURE_FLUSH_L1D))
> > +             goto done;
> > +
> > +     page = READ_ONCE(l1d_flush_pages);
> > +     if (unlikely(!page)) {
> > +             mutex_lock(&l1d_flush_mutex);
> > +             if (!l1d_flush_pages) {
> > +                     l1d_flush_pages = alloc_l1d_flush_pages();
> > +                     if (!l1d_flush_pages) {
> > +                             mutex_unlock(&l1d_flush_mutex);
> > +                             return -ENOMEM;
> > +                     }
> > +             }
> > +             mutex_unlock(&l1d_flush_mutex);
> > +     }
> 
> So this is +/- the mutex the same code as KVM has. Why is this not moved
> into l1d_flush.c, i.e.
> 
> static void *l1d_flush_pages;
> static DEFINE_MUTEX(l1d_flush_mutex);
> 
> int l1d_flush_init(void)
> {
>         int ret;
> 
>         if (static_cpu_has(X86_FEATURE_FLUSH_L1D) || l1d_flush_pages)
>                 return 0;
> 
>         mutex_lock(&l1d_flush_mutex);
>         if (!l1d_flush_pages)
>                 l1d_flush_pages = l1d_flush_alloc_pages();
>         ret = l1d_flush_pages ? 0 : -ENOMEM;
>         mutex_unlock(&l1d_flush_mutex);
>         return ret;
> }
> EXPORT_SYMBOL_GPL(l1d_flush_init);
> 
> which removes the export of l1d_flush_alloc_pages() and gets rid of the
> cleanup counterpart. In a real world deployment unloading of VMX if used
> once is unlikely and with the task based one you end up with these pages
> 'leaked' anyway if used once.
> 

I don't want the patches to be enforce that one cannot unload the kvm module,
but I can refactor those bits a bit more


> Can we please make this simple and consistent?



> 
> > +done:
> > +     set_ti_thread_flag(&tsk->thread_info, TIF_SPEC_FLUSH_L1D);
> > +     return ret;
> > +}
> > +
> > +int disable_l1d_flush_for_task(struct task_struct *tsk)
> 
> static void?
> 
> > +{
> > +     clear_ti_thread_flag(&tsk->thread_info, TIF_SPEC_FLUSH_L1D);
> > +     return 0;
> > +}
> > +
> > +int arch_prctl_l1d_flush_get(struct task_struct *tsk)
> > +{
> > +     return test_ti_thread_flag(&tsk->thread_info, TIF_SPEC_FLUSH_L1D);
> > +}
> > +
> > +int arch_prctl_l1d_flush_set(struct task_struct *tsk, unsigned long
> > enable)
> > +{
> > +     if (enable)
> > +             return enable_l1d_flush_for_task(tsk);
> > +     return disable_l1d_flush_for_task(tsk);
> > +}
> 
> If any other architecture enables this, then it will have _ALL_ of this
> code duplicated. So we should rather have:

But that is being a bit prescriptive to arch's to implement their L1D flushing
using TIF flags, arch's should be free to use bits in struct_mm for their arch
if they feel so.

> 
>   - CONFIG_FLUSH_L1D which gets selected by features requesting
>     it, i.e. X86 + VMX
> 
>   - CONFIG_FLUSH_L1D_PRCTL which gets selected by architectures
>     supporting it, i.e. X86. This selects CONFIG_FLUSH_L1D and enables
>     the prctl logic.
> 
>   - arch/xxx/include/asm/l1d_flush.h which has for x86:
> 
>     #include <linux/l1d_flush.h>
>     #include <asm/thread_info.h>
> 
>     #define L1D_CACHE_ORDER 4
> 
>     static inline bool arch_has_l1d_flush_hw(void)
>     {
>         return static_cpu_has(X86_FEATURE_FLUSH_L1D);
>     }
> 
>     // This is to make the allocation function conditional or if an
>     // architecture knows upfront compile time optimized.
> 
>     static inline void arch_l1d_flush(void *pages, unsigned long options)
>     {
>         if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
>                 ...
>                 return;
>         }
> 
>         if (options & POPULATE_TLB)
>                 l1d_flush_populate_tlb(pages);
>         l1d_flush_sw(pages);
>     }
> 
>     // The option bits go into linux/l1d_flush.h and the asm header has
>     // exactly one file which includes it: lib/l1d_flush.c
>     //
>     // All other places (VMX, arch context switch code) include
>     // linux/l1d_flush.h which also contains the prototypes for
>     // l1d_flush_init() and l1d_flush().
> 
>   - Have l1d_flush_init() and the alloc function in lib/l1d_flush.c
> 
>   - The flush invocation in lib/l1d_flush.c:
> 
>     void l1d_flush(unsigned long options)
>     {
>         arch_l1d_flush(l1d_flush_pages, options);
>     }
>     EXPORT_SYMBOL_GPL(l1d_flush);
> 
>   - All architectures have to use TIF_SPEC_FLUSH_L1D if they want to
>     support the prctl.
> 

That is a concern (see above), should we enforce this?


>     So all of the above arch_prctl*() stuff becomes generic and sits in
>     lib/l1d_flush.c hidden behind CONFIG_FLUSH_L1D_PRCTL.
> 
>     The only architecture specific bits are in the actual context switch
>     code.
> 
> Hmm?
> 

Let me revisit the code and see if I can refactor it.

Balbir

> > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> > index 07b4f8131e36..42cb3038c81a 100644
> > --- a/include/uapi/linux/prctl.h
> > +++ b/include/uapi/linux/prctl.h
> > @@ -238,4 +238,8 @@ struct prctl_mm_map {
> >  #define PR_SET_IO_FLUSHER            57
> >  #define PR_GET_IO_FLUSHER            58
> > 
> > +/* Flush L1D on context switch (mm) */
> > +#define PR_SET_L1D_FLUSH             59
> > +#define PR_GET_L1D_FLUSH             60
> > +
> >  #endif /* _LINUX_PRCTL_H */
> > diff --git a/kernel/sys.c b/kernel/sys.c
> > index d325f3ab624a..578aa8b6d87e 100644
> > --- a/kernel/sys.c
> > +++ b/kernel/sys.c
> > @@ -2262,6 +2262,16 @@ int __weak arch_prctl_spec_ctrl_set(struct
> > task_struct *t, unsigned long which,
> >       return -EINVAL;
> >  }
> > 
> > +int __weak arch_prctl_l1d_flush_set(struct task_struct *tsk, unsigned
> > long enable)
> > +{
> > +     return -EINVAL;
> > +}
> > +
> > +int __weak arch_prctl_l1d_flush_get(struct task_struct *t)
> > +{
> > +     return -EINVAL;
> > +}
> > +
> >  #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LESS_THROTTLE)
> > 
> >  SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long,
> > arg3,
> > @@ -2514,6 +2524,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long,
> > arg2, unsigned long, arg3,
> > 
> >               error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER;
> >               break;
> > +     case PR_SET_L1D_FLUSH:
> > +             if (arg3 || arg4 || arg5)
> > +                     return -EINVAL;
> > +             error = arch_prctl_l1d_flush_set(me, arg2);
> > +             break;
> > +     case PR_GET_L1D_FLUSH:
> > +             if (arg2 || arg3 || arg4 || arg5)
> > +                     return -EINVAL;
> > +             error = arch_prctl_l1d_flush_get(me);
> > +             break;
> >       default:
> >               error = -EINVAL;
> >               break;
> 
> The prctl itself looks fine to me.
> 
> Thanks,
> 
>         tglx
> 

Thanks,
Balbir