lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 23 Jul 2019 20:52:21 +0200
From:   Jan Kiszka <jan.kiszka@...mens.com>
To:     Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...en8.de>,
        "H. Peter Anvin" <hpa@...or.com>, x86@...nel.org
Cc:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Andy Lutomirski <luto@...nel.org>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] x86: Optimize load_mm_cr4 to load_mm_cr4_irqsoff

On 18.06.19 09:32, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@...mens.com>
> 
> We only call load_mm_cr4 with interrupts disabled:
>  - switch_mm_irqs_off
>  - refresh_pce (on_each_cpu callback)
> 
> Thus, we can avoid disabling interrupts again in cr4_set/clear_bits.
> 
> Instead, provide cr4_set/clear_bits_irqsoffs and call those helpers from
> load_mm_cr4_irqsoff. The renaming in combination with the checks
> in __cr4_set shall ensure that any changes in the boundary conditions of
> the invocations will be detected.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@...mens.com>
> ---
> 
> Found while porting Xenomai with its virtualized interrupt
> infrastructure to a newer kernel.
> 
>  arch/x86/events/core.c             |  2 +-
>  arch/x86/include/asm/mmu_context.h |  8 ++++----
>  arch/x86/include/asm/tlbflush.h    | 30 +++++++++++++++++++++++-------
>  arch/x86/mm/tlb.c                  |  2 +-
>  4 files changed, 29 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index f315425d8468..78a3fba28c62 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -2161,7 +2161,7 @@ static int x86_pmu_event_init(struct perf_event *event)
>  
>  static void refresh_pce(void *ignored)
>  {
> -	load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
> +	load_mm_cr4_irqsoff(this_cpu_read(cpu_tlbstate.loaded_mm));
>  }
>  
>  static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index 9024236693d2..16ae821483c8 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -28,16 +28,16 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
>  
>  DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
>  
> -static inline void load_mm_cr4(struct mm_struct *mm)
> +static inline void load_mm_cr4_irqsoff(struct mm_struct *mm)
>  {
>  	if (static_branch_unlikely(&rdpmc_always_available_key) ||
>  	    atomic_read(&mm->context.perf_rdpmc_allowed))
> -		cr4_set_bits(X86_CR4_PCE);
> +		cr4_set_bits_irqsoff(X86_CR4_PCE);
>  	else
> -		cr4_clear_bits(X86_CR4_PCE);
> +		cr4_clear_bits_irqsoff(X86_CR4_PCE);
>  }
>  #else
> -static inline void load_mm_cr4(struct mm_struct *mm) {}
> +static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {}
>  #endif
>  
>  #ifdef CONFIG_MODIFY_LDT_SYSCALL
> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index dee375831962..6f66d841262d 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -290,26 +290,42 @@ static inline void __cr4_set(unsigned long cr4)
>  }
>  
>  /* Set in this cpu's CR4. */
> -static inline void cr4_set_bits(unsigned long mask)
> +static inline void cr4_set_bits_irqsoff(unsigned long mask)
>  {
> -	unsigned long cr4, flags;
> +	unsigned long cr4;
>  
> -	local_irq_save(flags);
>  	cr4 = this_cpu_read(cpu_tlbstate.cr4);
>  	if ((cr4 | mask) != cr4)
>  		__cr4_set(cr4 | mask);
> -	local_irq_restore(flags);
>  }
>  
>  /* Clear in this cpu's CR4. */
> -static inline void cr4_clear_bits(unsigned long mask)
> +static inline void cr4_clear_bits_irqsoff(unsigned long mask)
>  {
> -	unsigned long cr4, flags;
> +	unsigned long cr4;
>  
> -	local_irq_save(flags);
>  	cr4 = this_cpu_read(cpu_tlbstate.cr4);
>  	if ((cr4 & ~mask) != cr4)
>  		__cr4_set(cr4 & ~mask);
> +}
> +
> +/* Set in this cpu's CR4. */
> +static inline void cr4_set_bits(unsigned long mask)
> +{
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	cr4_set_bits_irqsoff(mask);
> +	local_irq_restore(flags);
> +}
> +
> +/* Clear in this cpu's CR4. */
> +static inline void cr4_clear_bits(unsigned long mask)
> +{
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	cr4_clear_bits_irqsoff(mask);
>  	local_irq_restore(flags);
>  }
>  
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 91f6db92554c..8fc1eaa55b6e 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -440,7 +440,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>  	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
>  
>  	if (next != real_prev) {
> -		load_mm_cr4(next);
> +		load_mm_cr4_irqsoff(next);
>  		switch_ldt(real_prev, next);
>  	}
>  }
> 

Ping. I think the only remark of Dave was answered.

Jan

-- 
Siemens AG, Corporate Technology, CT RDA IOT SES-DE
Corporate Competence Center Embedded Linux

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ