lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150623175012.GD3644@twins.programming.kicks-ass.net>
Date:	Tue, 23 Jun 2015 19:50:12 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Daniel Wagner <daniel.wagner@...-carit.de>
Cc:	oleg@...hat.com, paulmck@...ux.vnet.ibm.com, tj@...nel.org,
	mingo@...hat.com, linux-kernel@...r.kernel.org, der.herr@...r.at,
	dave@...olabs.net, riel@...hat.com, viro@...IV.linux.org.uk,
	torvalds@...ux-foundation.org, jlayton@...chiereds.net
Subject: Re: [RFC][PATCH 00/13] percpu rwsem -v2

On Tue, Jun 23, 2015 at 04:56:39PM +0200, Daniel Wagner wrote:
> flock02
>                              mean   variance      sigma        max        min
>                     tip-1    11.8994     0.5874     0.7664    13.2022     8.6324
>                     tip-2    11.7394     0.5252     0.7247    13.2540     9.7513
>                     tip-3    11.8155     0.5288     0.7272    13.2700     9.9480
>        tip+percpu-rswem-1    15.3601     0.8981     0.9477    16.8116    12.6910
>        tip+percpu-rswem-2    15.2558     0.8442     0.9188    17.0199    12.9586
>        tip+percpu-rswem-3    15.5297     0.6386     0.7991    17.4392    12.7992

I did indeed manage to get flock02 down to a usable level and found:

    3.20 :        ffffffff811ecbdf:       incl   %gs:0x7ee1de72(%rip)        # aa58 <__preempt_count>
    0.27 :        ffffffff811ecbe6:       mov    0xa98553(%rip),%rax        # ffffffff81c85140 <file_rwsem>
   10.78 :        ffffffff811ecbed:       incl   %gs:(%rax)
    0.19 :        ffffffff811ecbf0:       mov    0xa9855a(%rip),%edx        # ffffffff81c85150 <file_rwsem+0x10>
    0.00 :        ffffffff811ecbf6:       test   %edx,%edx
    0.00 :        ffffffff811ecbf8:       jne    ffffffff811ecdd1 <flock_lock_file+0x261>
    3.47 :        ffffffff811ecbfe:       decl   %gs:0x7ee1de53(%rip)        # aa58 <__preempt_count>
    0.00 :        ffffffff811ecc05:       je     ffffffff811eccec <flock_lock_file+0x17c>

Which is percpu_down_read(). Now aside from the fact that I run a
PREEMPT=y kernel, it looks like that sem->refcount increment stalls
because of the dependent load.

Manually hoisting the load very slightly improves things:

    0.24 :        ffffffff811ecbdf:       mov    0xa9855a(%rip),%rax        # ffffffff81c85140 <file_rwsem>
    5.88 :        ffffffff811ecbe6:       incl   %gs:0x7ee1de6b(%rip)        # aa58 <__preempt_count>
    7.94 :        ffffffff811ecbed:       incl   %gs:(%rax)
    0.30 :        ffffffff811ecbf0:       mov    0xa9855a(%rip),%edx        # ffffffff81c85150 <file_rwsem+0x10>
    0.00 :        ffffffff811ecbf6:       test   %edx,%edx
    0.00 :        ffffffff811ecbf8:       jne    ffffffff811ecdd1 <flock_lock_file+0x261>
    3.70 :        ffffffff811ecbfe:       decl   %gs:0x7ee1de53(%rip)        # aa58 <__preempt_count>
    0.00 :        ffffffff811ecc05:       je     ffffffff811eccec <flock_lock_file+0x17c>

But its not much :/

Using DEFINE_STATIC_PERCPU_RWSEM(file_rwsem) would allow GCC to omit the
sem->refcount load entirely, but its not smart enough to see that it can
(tested 4.9 and 5.1).

---
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -35,6 +35,8 @@ extern void __percpu_up_read(struct perc
 
 static inline void _percpu_down_read(struct percpu_rw_semaphore *sem)
 {
+	unsigned int __percpu *refcount = sem->refcount;
+
 	might_sleep();
 
 	preempt_disable();
@@ -47,7 +49,7 @@ static inline void _percpu_down_read(str
 	 * writer will see anything we did within this RCU-sched read-side
 	 * critical section.
 	 */
-	__this_cpu_inc(*sem->refcount);
+	__this_cpu_inc(*refcount);
 	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
 		__percpu_down_read(sem); /* Unconditional memory barrier. */
 	preempt_enable();
@@ -81,6 +83,8 @@ static inline bool percpu_down_read_tryl
 
 static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 {
+	unsigned int __percpu *refcount = sem->refcount;
+
 	/*
 	 * The barrier() in preempt_disable() prevents the compiler from
 	 * bleeding the critical section out.
@@ -90,7 +94,7 @@ static inline void percpu_up_read(struct
 	 * Same as in percpu_down_read().
 	 */
 	if (likely(rcu_sync_is_idle(&sem->rss)))
-		__this_cpu_dec(*sem->refcount);
+		__this_cpu_dec(*refcount);
 	else
 		__percpu_up_read(sem); /* Unconditional memory barrier. */
 	preempt_enable();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ