lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20161203021839.GB30078@linux-80c1.suse>
Date:   Fri, 2 Dec 2016 18:18:39 -0800
From:   Davidlohr Bueso <dave@...olabs.net>
To:     mingo@...nel.org, peterz@...radead.org, oleg@...hat.com
Cc:     john.stultz@...aro.org, dimitrysh@...gle.com,
        linux-kernel@...r.kernel.org, Davidlohr Bueso <dbueso@...e.de>
Subject: [PATCH v2 2/3] locking/percpu-rwsem: Rework writer block/wake to not
 use wait-queues

The use of any kind of wait queue is an overkill for pcpu-rwsems.
While one option would be to use the less heavy simple (swait)
flavor, this is still too much for what pcpu-rwsems needs. For one,
we do not care about any sort of queuing in that the only (rare) time
writers (and readers, for that matter) are queued is when trying to
acquire the regular contended rw_sem. There cannot be any further
queuing as writers are serialized by the rw_sem in the first place.

This patch, therefore, implements custom wait/wake, with an rcu-aware
writer task pointer. The only time this is !nil is when a writer is
determining if it is going to block, and reset as soon as we know that
the percpu_down_write() call has succeeded. All this is obviously done while
holding the regular rw_sem. As such, we can avoid the queue handling and
locking overhead (although we currently end up taking the waitqueue
spinlock fastpath, so it wouldn't be a very big an impact).

Signed-off-by: Davidlohr Bueso <dbueso@...e.de>
---
 include/linux/percpu-rwsem.h  |  5 ++---
 kernel/locking/percpu-rwsem.c | 26 +++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 5b2e6159b744..9942b7e8bde8 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -4,7 +4,6 @@
 #include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
-#include <linux/wait.h>
 #include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
@@ -12,7 +11,7 @@ struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
 	unsigned int __percpu	*read_count;
 	struct rw_semaphore	rw_sem;
-	wait_queue_head_t	writer;
+	struct task_struct      *writer; /* blocked writer */
 	int			readers_block;
 };
 
@@ -22,7 +21,7 @@ static struct percpu_rw_semaphore name = {				\
 	.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),	\
 	.read_count = &__percpu_rwsem_rc_##name,			\
 	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
-	.writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer),		\
+	.writer = NULL,							\
 }
 
 extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ce182599cf2e..7856a77396d3 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,7 +1,6 @@
 #include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
-#include <linux/wait.h>
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/rcupdate.h>
@@ -18,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
 	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
 	rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
 	__init_rwsem(&sem->rw_sem, name, rwsem_key);
-	init_waitqueue_head(&sem->writer);
+	sem->writer = NULL;
 	sem->readers_block = 0;
 	return 0;
 }
@@ -94,6 +93,8 @@ EXPORT_SYMBOL_GPL(__percpu_down_read);
 
 void __percpu_up_read(struct percpu_rw_semaphore *sem)
 {
+	struct task_struct *writer;
+
 	smp_mb(); /* B matches C */
 	/*
 	 * In other words, if they see our decrement (presumably to aggregate
@@ -102,8 +103,13 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem)
 	 */
 	__this_cpu_dec(*sem->read_count);
 
+	rcu_read_lock();
+	writer = rcu_dereference(sem->writer);
+
 	/* Prod writer to recheck readers_active */
-	wake_up(&sem->writer);
+	if (writer)
+		wake_up_process(writer);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(__percpu_up_read);
 
@@ -159,8 +165,18 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
 	 * will wait for them.
 	 */
 
-	/* Wait for all now active readers to complete. */
-	wait_event(sem->writer, readers_active_check(sem));
+	WRITE_ONCE(sem->writer, current);
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+
+		if (readers_active_check(sem))
+			break;
+
+		schedule();
+	}
+
+	rcu_assign_pointer(sem->writer, NULL);
+	__set_current_state(TASK_RUNNING);
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
-- 
2.6.6

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ