lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 23 Jan 2019 10:44:31 +0000
From:   Julien Thierry <julien.thierry@....com>
To:     Catalin Marinas <catalin.marinas@....com>
Cc:     linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
        daniel.thompson@...aro.org, joel@...lfernandes.org,
        marc.zyngier@....com, christoffer.dall@....com,
        james.morse@....com, will.deacon@....com, mark.rutland@....com,
        Ard Biesheuvel <ard.biesheuvel@...aro.org>,
        Oleg Nesterov <oleg@...hat.com>
Subject: Re: [PATCH v9 12/26] arm64: irqflags: Use ICC_PMR_EL1 for interrupt
 masking



On 22/01/2019 15:21, Catalin Marinas wrote:
> On Mon, Jan 21, 2019 at 03:33:31PM +0000, Julien Thierry wrote:
>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
>> index 24692ed..7e82a92 100644
>> --- a/arch/arm64/include/asm/irqflags.h
>> +++ b/arch/arm64/include/asm/irqflags.h
>> @@ -18,7 +18,9 @@
>>  
>>  #ifdef __KERNEL__
>>  
>> +#include <asm/alternative.h>
>>  #include <asm/ptrace.h>
>> +#include <asm/sysreg.h>
>>  
>>  /*
>>   * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
>> @@ -36,33 +38,31 @@
>>  /*
>>   * CPU interrupt mask handling.
>>   */
>> -static inline unsigned long arch_local_irq_save(void)
>> -{
>> -	unsigned long flags;
>> -	asm volatile(
>> -		"mrs	%0, daif		// arch_local_irq_save\n"
>> -		"msr	daifset, #2"
>> -		: "=r" (flags)
>> -		:
>> -		: "memory");
>> -	return flags;
>> -}
>> -
>>  static inline void arch_local_irq_enable(void)
>>  {
>> -	asm volatile(
>> -		"msr	daifclr, #2		// arch_local_irq_enable"
>> -		:
>> +	unsigned long unmasked = GIC_PRIO_IRQON;
>> +
>> +	asm volatile(ALTERNATIVE(
>> +		"msr	daifclr, #2		// arch_local_irq_enable\n"
>> +		"nop",
>> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
>> +		"dsb	sy",
>> +		ARM64_HAS_IRQ_PRIO_MASKING)
>>  		:
>> +		: "r" (unmasked)
>>  		: "memory");
>>  }
>>  
>>  static inline void arch_local_irq_disable(void)
>>  {
>> -	asm volatile(
>> -		"msr	daifset, #2		// arch_local_irq_disable"
>> -		:
>> +	unsigned long masked = GIC_PRIO_IRQOFF;
>> +
>> +	asm volatile(ALTERNATIVE(
>> +		"msr	daifset, #2		// arch_local_irq_disable",
>> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ", %0",
>> +		ARM64_HAS_IRQ_PRIO_MASKING)
>>  		:
>> +		: "r" (masked)
>>  		: "memory");
>>  }
> 
> Nitpicks: you could drop masked/unmasked variables here (it's up to you,
> it wouldn't make any difference on the generated asm).
> 

Good point, I'll do that.

>> @@ -71,12 +71,44 @@ static inline void arch_local_irq_disable(void)
>>   */
>>  static inline unsigned long arch_local_save_flags(void)
>>  {
>> +	unsigned long daif_bits;
>>  	unsigned long flags;
>> -	asm volatile(
>> -		"mrs	%0, daif		// arch_local_save_flags"
>> -		: "=r" (flags)
>> -		:
>> +
>> +	daif_bits = read_sysreg(daif);
>> +
>> +	/*
>> +	 * The asm is logically equivalent to:
>> +	 *
>> +	 * if (system_uses_irq_prio_masking())
>> +	 *	flags = (daif_bits & PSR_I_BIT) ?
>> +	 *			GIC_PRIO_IRQOFF :
>> +	 *			read_sysreg_s(SYS_ICC_PMR_EL1);
>> +	 * else
>> +	 *	flags = daif_bits;
>> +	 */
>> +	asm volatile(ALTERNATIVE(
>> +			"mov	%0, %1\n"
>> +			"nop\n"
>> +			"nop",
>> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1) "\n"
>> +			"ands	%1, %1, " __stringify(PSR_I_BIT) "\n"
>> +			"csel	%0, %0, %2, eq",
>> +			ARM64_HAS_IRQ_PRIO_MASKING)
>> +		: "=&r" (flags), "+r" (daif_bits)
>> +		: "r" (GIC_PRIO_IRQOFF)
>>  		: "memory");
>> +
>> +	return flags;
>> +}
> 
> BTW, how's the code generated from the C version? It will have a branch
> but may not be too bad. Either way is fine by me.
> 

It's a bit hard to talk about the code generated from the C version as
it can lie within several layers of inline, so the instructions for that
section are a bit more scattered.

However, it seems like the compiler is more clever (maybe the asm
volatile prevents some optimizations regarding register allocation or
instruction ordering) and the C version seems to perform slightly better
(although it could be within the noise) despite the branch.

So, I'll just switch up to the C version.

> Reviewed-by: Catalin Marinas <catalin.marinas@....com>
> 

-- 
Julien Thierry

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ