lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sat, 11 Jan 2020 09:06:49 +0000
From:   Christophe Leroy <christophe.leroy@....fr>
To:     Thomas Gleixner <tglx@...utronix.de>, Arnd Bergmann <arnd@...db.de>
Cc:     Benjamin Herrenschmidt <benh@...nel.crashing.org>,
        Paul Mackerras <paulus@...ba.org>,
        Michael Ellerman <mpe@...erman.id.au>,
        Vincenzo Frascino <vincenzo.frascino@....com>,
        Andy Lutomirski <luto@...nel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        linuxppc-dev <linuxppc-dev@...ts.ozlabs.org>,
        Linux ARM <linux-arm-kernel@...ts.infradead.org>,
        "open list:BROADCOM NVRAM DRIVER" <linux-mips@...r.kernel.org>,
        the arch/x86 maintainers <x86@...nel.org>
Subject: Re: [RFC PATCH v2 05/10] lib: vdso: inline do_hres()



On 01/10/2020 09:07 PM, Thomas Gleixner wrote:
> Arnd Bergmann <arnd@...db.de> writes:
>> On Mon, Dec 23, 2019 at 3:31 PM Christophe Leroy
>> <christophe.leroy@....fr> wrote:
>>>
>>> do_hres() is called from several places, so GCC doesn't inline
>>> it at first.
>>>
>>> do_hres() takes a struct __kernel_timespec * parameter for
>>> passing the result. In the 32 bits case, this parameter corresponds
>>> to a local var in the caller. In order to provide a pointer
>>> to this structure, the caller has to put it in its stack and
>>> do_hres() has to write the result in the stack. This is suboptimal,
>>> especially on RISC processor like powerpc.
>>>
>>> By making GCC inline the function, the struct __kernel_timespec
>>> remains a local var using registers, avoiding the need to write and
>>> read stack.
>>>
>>> The improvement is significant on powerpc.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@....fr>
>>
>> Good idea, I can see how this ends up being an improvement
>> for most of the callers.
>>
>> Acked-by: Arnd Bergmann <arnd@...db.de>
> 
>    https://lore.kernel.org/r/20191112012724.250792-3-dima@arista.com
> 
> On the way to be applied.
> 

Oh nice, I get even better result with the way it is done by Dmitry 
compared to my own first patch.

On an mpc8xx at 132Mhz (32bits powerpc), before the patch I have
gettimeofday:    vdso: 1256 nsec/call
clock-gettime-monotonic-raw:    vdso: 1449 nsec/call
clock-gettime-monotonic-coarse:    vdso: 768 nsec/call
clock-gettime-monotonic:    vdso: 1390 nsec/call

With the patch I have:
gettimeofday:    vdso: 947 nsec/call
clock-gettime-monotonic-raw:    vdso: 1156 nsec/call
clock-gettime-monotonic-coarse:    vdso: 638 nsec/call
clock-gettime-monotonic:    vdso: 1094 nsec/call

So that's a 20-25% improvement.

I modified it slightly as follows:

diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 9e474d54814f..b793f211bca8 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -37,8 +37,8 @@ u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, 
u32 mult)
  }
  #endif

-static int do_hres(const struct vdso_data *vd, clockid_t clk,
-		   struct __kernel_timespec *ts)
+static __always_inline int do_hres(const struct vdso_data *vd, 
clockid_t clk,
+				   struct __kernel_timespec *ts)
  {
  	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
  	u64 cycles, last, sec, ns;
@@ -67,8 +67,8 @@ static int do_hres(const struct vdso_data *vd, 
clockid_t clk,
  	return 0;
  }

-static void do_coarse(const struct vdso_data *vd, clockid_t clk,
-		      struct __kernel_timespec *ts)
+static __always_inline int do_coarse(const struct vdso_data *vd, 
clockid_t clk,
+				     struct __kernel_timespec *ts)
  {
  	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
  	u32 seq;
@@ -78,6 +78,8 @@ static void do_coarse(const struct vdso_data *vd, 
clockid_t clk,
  		ts->tv_sec = vdso_ts->sec;
  		ts->tv_nsec = vdso_ts->nsec;
  	} while (unlikely(vdso_read_retry(vd, seq)));
+
+	return 0;
  }

  static __maybe_unused int
@@ -95,15 +97,16 @@ __cvdso_clock_gettime_common(const struct vdso_data 
*vd, clockid_t clock,
  	 * clocks are handled in the VDSO directly.
  	 */
  	msk = 1U << clock;
-	if (likely(msk & VDSO_HRES)) {
-		return do_hres(&vd[CS_HRES_COARSE], clock, ts);
-	} else if (msk & VDSO_COARSE) {
-		do_coarse(&vd[CS_HRES_COARSE], clock, ts);
-		return 0;
-	} else if (msk & VDSO_RAW) {
-		return do_hres(&vd[CS_RAW], clock, ts);
-	}
-	return -1;
+	if (likely(msk & VDSO_HRES))
+		vd += CS_HRES_COARSE;
+	else if (msk & VDSO_COARSE)
+		return do_coarse(&vd[CS_HRES_COARSE], clock, ts);
+	else if (msk & VDSO_RAW)
+		vd += CS_RAW;
+	else
+		return -1;
+
+	return do_hres(vd, clock, ts);
  }

  static __maybe_unused int

---

Christophe

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ