[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <169a83b0-437a-419e-cac4-84338cf95d94@c-s.fr>
Date: Tue, 29 Oct 2019 17:12:52 +0100
From: Christophe Leroy <christophe.leroy@....fr>
To: Santosh Sivaraj <santosh@...six.org>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Paul Mackerras <paulus@...ba.org>,
Michael Ellerman <mpe@...erman.id.au>
Cc: linuxppc-dev@...ts.ozlabs.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
Hi Santosh,
Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
> Hi Christophe,
>
> Christophe Leroy <christophe.leroy@....fr> writes:
>
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>
>> vdsotest before the patch:
>> gettimeofday: vdso: 731 nsec/call
>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>
>> vdsotest after the patch:
>> gettimeofday: vdso: 677 nsec/call
>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@....fr>
>> ---
>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>> 4 files changed, 26 insertions(+), 37 deletions(-)
>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>
> The datapage.h file should ideally be moved under include/asm, then we can use
> the same for powerpc64 too.
Finally, I added the get_datapage macro to the existing asm/vdso_datapage.h
Christophe
>
> Santosh
>
>>
>> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
>> index 7f882e7b9f43..e9453837e4ee 100644
>> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
>> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
>> @@ -10,6 +10,8 @@
>> #include <asm/vdso.h>
>> #include <asm/asm-offsets.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>>
>> /*
>> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>> .cfi_startproc
>> mflr r12
>> .cfi_register lr,r12
>> - mr r11,r3
>> - bl __get_datapage@...al
>> + get_datapage r10, r0
>> mtlr r12
>> - mr r10,r3
>>
>> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
>> addi r5,r7,-1
>> - andc r6,r11,r5 /* round low to line bdy */
>> + andc r6,r3,r5 /* round low to line bdy */
>> subf r8,r6,r4 /* compute length */
>> add r8,r8,r5 /* ensure we get enough */
>> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
>> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>
>> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
>> addi r5,r7,-1
>> - andc r6,r11,r5 /* round low to line bdy */
>> + andc r6,r3,r5 /* round low to line bdy */
>> subf r8,r6,r4 /* compute length */
>> add r8,r8,r5
>> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
>> index 6984125b9fc0..d480d2d4a3fe 100644
>> --- a/arch/powerpc/kernel/vdso32/datapage.S
>> +++ b/arch/powerpc/kernel/vdso32/datapage.S
>> @@ -11,34 +11,13 @@
>> #include <asm/unistd.h>
>> #include <asm/vdso.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>> .global __kernel_datapage_offset;
>> __kernel_datapage_offset:
>> .long 0
>>
>> -V_FUNCTION_BEGIN(__get_datapage)
>> - .cfi_startproc
>> - /* We don't want that exposed or overridable as we want other objects
>> - * to be able to bl directly to here
>> - */
>> - .protected __get_datapage
>> - .hidden __get_datapage
>> -
>> - mflr r0
>> - .cfi_register lr,r0
>> -
>> - bcl 20,31,data_page_branch
>> -data_page_branch:
>> - mflr r3
>> - mtlr r0
>> - addi r3, r3, __kernel_datapage_offset-data_page_branch
>> - lwz r0,0(r3)
>> - .cfi_restore lr
>> - add r3,r0,r3
>> - blr
>> - .cfi_endproc
>> -V_FUNCTION_END(__get_datapage)
>> -
>> /*
>> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>> *
>> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>> mflr r12
>> .cfi_register lr,r12
>> mr r4,r3
>> - bl __get_datapage@...al
>> + get_datapage r3, r0
>> mtlr r12
>> addi r3,r3,CFG_SYSCALL_MAP32
>> cmpli cr0,r4,0
>> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>> .cfi_startproc
>> mflr r12
>> .cfi_register lr,r12
>> - bl __get_datapage@...al
>> + get_datapage r3, r0
>> lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
>> lwz r3,CFG_TB_TICKS_PER_SEC(r3)
>> mtlr r12
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
>> new file mode 100644
>> index 000000000000..74f4f57c2da8
>> --- /dev/null
>> +++ b/arch/powerpc/kernel/vdso32/datapage.h
>> @@ -0,0 +1,11 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.macro get_datapage ptr, tmp
>> + bcl 20,31,.+4
>> + mflr \ptr
>> + addi \ptr, \ptr, __kernel_datapage_offset - (.-4)
>> + lwz \tmp, 0(\ptr)
>> + add \ptr, \tmp, \ptr
>> +.endm
>> +
>> +
>> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> index 355b537d327a..3e55cba19f44 100644
>> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
>> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> @@ -12,6 +12,8 @@
>> #include <asm/asm-offsets.h>
>> #include <asm/unistd.h>
>>
>> +#include "datapage.h"
>> +
>> /* Offset for the low 32-bit part of a field of long type */
>> #ifdef CONFIG_PPC64
>> #define LOPART 4
>> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>>
>> mr r10,r3 /* r10 saves tv */
>> mr r11,r4 /* r11 saves tz */
>> - bl __get_datapage@...al /* get data page */
>> - mr r9, r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>> cmplwi r10,0 /* check if tv is NULL */
>> beq 3f
>> lis r7,1000000@ha /* load up USEC_PER_SEC */
>> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>> mflr r12 /* r12 saves lr */
>> .cfi_register lr,r12
>> mr r11,r4 /* r11 saves tp */
>> - bl __get_datapage@...al /* get data page */
>> - mr r9,r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
>> ori r7,r7,NSEC_PER_SEC@l
>> beq cr5, .Lcoarse_clocks
>> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
>>
>> mflr r12
>> .cfi_register lr,r12
>> - bl __get_datapage@...al /* get data page */
>> + get_datapage r3, r0
>> lwz r5, CLOCK_HRTIMER_RES(r3)
>> mtlr r12
>> li r3,0
>> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>> .cfi_register lr,r12
>>
>> mr r11,r3 /* r11 holds t */
>> - bl __get_datapage@...al
>> - mr r9, r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>>
>> lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>>
>> --
>> 2.13.3
Powered by blists - more mailing lists