[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <91ff4c04-ec26-418e-a685-f910505eec5a@www.fastmail.com>
Date: Sat, 02 Jul 2022 16:55:40 -0700
From: "Andy Lutomirski" <luto@...nel.org>
To: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Cc: "Dave Hansen" <dave.hansen@...ux.intel.com>,
"Peter Zijlstra (Intel)" <peterz@...radead.org>,
"the arch/x86 maintainers" <x86@...nel.org>,
"kcc@...gle.com" <kcc@...gle.com>,
"ryabinin.a.a@...il.com" <ryabinin.a.a@...il.com>,
"andreyknvl@...il.com" <andreyknvl@...il.com>,
"glider@...gle.com" <glider@...gle.com>,
"dvyukov@...gle.com" <dvyukov@...gle.com>,
"H.J. Lu" <hjl.tools@...il.com>, "Andi Kleen" <ak@...ux.intel.com>,
"Rick P Edgecombe" <rick.p.edgecombe@...el.com>,
linux-mm@...ck.org,
"Linux Kernel Mailing List" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCHv3 6/8] x86/mm: Provide ARCH_GET_UNTAG_MASK and
ARCH_ENABLE_TAGGED_ADDR
On Fri, Jul 1, 2022, at 8:38 AM, Kirill A. Shutemov wrote:
> On Wed, Jun 29, 2022 at 07:29:13PM -0700, Andy Lutomirski wrote:
>>
>>
>> On Tue, Jun 28, 2022, at 5:53 PM, Kirill A. Shutemov wrote:
>> > On Tue, Jun 28, 2022 at 04:42:40PM -0700, Andy Lutomirski wrote:
>> >> On 6/10/22 07:35, Kirill A. Shutemov wrote:
>> >>
>> >> > + /* Update CR3 to get LAM active */
>> >> > + switch_mm(current->mm, current->mm, current);
>> >>
>> >> Can you at least justify this oddity? When changing an LDT, we use a
>> >> dedicated mechanism. Is there a significant benefit to abusing switch_mm
>> >> for this?
>> >
>> > I'm not sure I follow. LAM mode is set in CR3. switch_mm() has to handle
>> > it anyway to context switch. Why do you consider it abuse?
>> >
>> >>
>> >> Also, why can't we enable LAM on a multithreaded process? We can change an
>> >> LDT, and the code isn't even particularly complicated.
>> >
>> > I reworked this in v4[1] and it allows multithreaded processes. Have you
>> > got that version?
>> >
>> > Intel had issue with mail server, but I assumed it didn't affect my
>> > patchset since I see it in the archive.
>> >
>>
>> I didn’t notice it. Not quite sure what the issue was. Could just be
>> incompetence on my part.
>>
>> I think that’s the right idea, except that I think you shouldn’t use
>> switch_mm for this. Just update the LAM bits directly. Once you read
>> mm_cpumask, you should be guaranteed (see next paragraph) that, for each
>> CPU that isn’t in the set, if it switches to the new mm, it will notice
>> the new LAM.
>>
>> I say “should be” because I think smp_wmb() is insufficient. You’re
>> ordering a write with a subsequent read, which needs smp_mb().
>
> I think it is better to put smp_mb() to make it explicit.
>
> Does the fixup below look okay?
>
> diff --git a/arch/x86/include/asm/tlbflush.h
> b/arch/x86/include/asm/tlbflush.h
> index 2d70d75e207f..8da54e7b6f98 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -367,4 +367,30 @@ static inline void
> __native_tlb_flush_global(unsigned long cr4)
> native_write_cr4(cr4 ^ X86_CR4_PGE);
> native_write_cr4(cr4);
> }
> +
> +#ifdef CONFIG_X86_64
> +static inline u64 tlbstate_lam_cr3_mask(void)
> +{
> + u64 lam = this_cpu_read(cpu_tlbstate.lam);
> +
> + return lam << X86_CR3_LAM_U57_BIT;
> +}
> +
> +static inline void set_tlbstate_lam_cr3_mask(u64 mask)
> +{
> + this_cpu_write(cpu_tlbstate.lam, mask >> X86_CR3_LAM_U57_BIT);
> +}
> +
> +#else
> +
> +static inline u64 tlbstate_lam_cr3_mask(void)
> +{
> + return 0;
> +}
> +
> +static inline void set_tlbstate_lam_cr3_mask(u64 mask)
> +{
> +}
> +#endif
> +
> #endif /* _ASM_X86_TLBFLUSH_H */
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 427ebef3f64b..cd2b03fe94c4 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -745,15 +745,16 @@ static long prctl_map_vdso(const struct
> vdso_image *image, unsigned long addr)
> static void enable_lam_func(void *mm)
> {
> struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
> + unsigned long lam_mask;
>
> if (loaded_mm != mm)
> return;
>
> - /* Counterpart of smp_wmb() in prctl_enable_tagged_addr() */
> - smp_rmb();
> + lam_mask = READ_ONCE(loaded_mm->context.lam_cr3_mask);
>
> /* Update CR3 to get LAM active on the CPU */
> - switch_mm(loaded_mm, loaded_mm, current);
> + write_cr3(__read_cr3() | lam_mask);
Perhaps this should also mask off the old LAM mask?
> + set_tlbstate_lam_cr3_mask(lam_mask);
> }
>
> static bool lam_u48_allowed(void)
> @@ -805,7 +806,7 @@ static int prctl_enable_tagged_addr(struct
> mm_struct *mm, unsigned long nr_bits)
> }
>
> /* Make lam_cr3_mask and untag_mask visible on other CPUs */
> - smp_wmb();
> + smp_mb();
>
> on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true);
> out:
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index c5c4f76329c2..d9a2acdae90f 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -486,31 +486,6 @@ void cr4_update_pce(void *ignored)
> static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
> #endif
>
> -#ifdef CONFIG_X86_64
> -static inline u64 tlbstate_lam_cr3_mask(void)
> -{
> - u64 lam = this_cpu_read(cpu_tlbstate.lam);
> -
> - return lam << X86_CR3_LAM_U57_BIT;
> -}
> -
> -static inline void set_tlbstate_lam_cr3_mask(u64 mask)
> -{
> - this_cpu_write(cpu_tlbstate.lam, mask >> X86_CR3_LAM_U57_BIT);
> -}
> -
> -#else
> -
> -static inline u64 tlbstate_lam_cr3_mask(void)
> -{
> - return 0;
> -}
> -
> -static inline void set_tlbstate_lam_cr3_mask(u64 mask)
> -{
> -}
> -#endif
> -
> void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> struct task_struct *tsk)
> {
> @@ -581,7 +556,7 @@ void switch_mm_irqs_off(struct mm_struct *prev,
> struct mm_struct *next,
> * provides that full memory barrier and core serializing
> * instruction.
> */
> - if (real_prev == next && prev_lam == new_lam) {
> + if (real_prev == next) {
> VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
> next->context.ctx_id);
>
> --
> Kirill A. Shutemov
Powered by blists - more mailing lists