linux-kernel - [RFC PATCH 1/6] kernel: implement queue spinlock API

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1358896415-28569-2-git-send-email-walken@google.com>
Date:	Tue, 22 Jan 2013 15:13:30 -0800
From:	Michel Lespinasse <walken@...gle.com>
To:	Rik van Riel <riel@...hat.com>, Ingo Molnar <mingo@...hat.com>,
	"Paul E. McKenney" <paulmck@...ibm.com>,
	David Howells <dhowells@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Eric Dumazet <edumazet@...gle.com>,
	"Eric W. Biederman" <ebiederm@...ssion.com>,
	Manfred Spraul <manfred@...orfullife.com>
Cc:	linux-kernel@...r.kernel.org
Subject: [RFC PATCH 1/6] kernel: implement queue spinlock API

Introduce queue spinlocks, to be used in situations where it is desired
to have good throughput even under the occasional high-contention situation.

This initial implementation is based on the classic MCS spinlock,
because I think this represents the nicest API we can hope for in a
fast queue spinlock algorithm. The MCS spinlock has known limitations
in that it performs very well under high contention, but is not as
good as the ticket spinlock under low contention. I will address these
limitations in a later patch, which will propose an alternative,
higher performance implementation using (mostly) the same API.

Sample use case acquiring mystruct->lock:

  struct q_spinlock_node node;

  q_spin_lock(&mystruct->lock, &node);
  ...
  q_spin_unlock(&mystruct->lock, &node);

The q_spinlock_node must be used by a single lock holder / waiter and it must
stay allocated for the entire duration when the lock is held (or waited on).

Signed-off-by: Michel Lespinasse <walken@...gle.com>

---
 arch/x86/include/asm/queue_spinlock.h |   20 ++++++++
 include/asm-generic/queue_spinlock.h  |    7 +++
 include/linux/queue_spinlock.h        |   61 ++++++++++++++++++++++++
 kernel/Makefile                       |    2 +-
 kernel/queue_spinlock.c               |   82 +++++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/include/asm/queue_spinlock.h
 create mode 100644 include/asm-generic/queue_spinlock.h
 create mode 100644 include/linux/queue_spinlock.h
 create mode 100644 kernel/queue_spinlock.c

diff --git a/arch/x86/include/asm/queue_spinlock.h b/arch/x86/include/asm/queue_spinlock.h
new file mode 100644
index 000000000000..655e01c1c2e8
--- /dev/null
+++ b/arch/x86/include/asm/queue_spinlock.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_QUEUE_SPINLOCK_H
+#define _ASM_QUEUE_SPINLOCK_H
+
+#if defined(CONFIG_X86_PPRO_FENCE) || defined(CONFIG_X86_OOSTORE)
+
+#define q_spin_lock_mb() mb()
+#define q_spin_unlock_mb() mb()
+
+#else
+
+/*
+ * x86 memory ordering only needs compiler barriers to implement
+ * acquire load / release store semantics
+ */
+#define q_spin_lock_mb() barrier()
+#define q_spin_unlock_mb() barrier()
+
+#endif
+
+#endif /* _ASM_QUEUE_SPINLOCK_H */
diff --git a/include/asm-generic/queue_spinlock.h b/include/asm-generic/queue_spinlock.h
new file mode 100644
index 000000000000..01fb9fbfb8dc
--- /dev/null
+++ b/include/asm-generic/queue_spinlock.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_GENERIC_QUEUE_SPINLOCK_H
+#define _ASM_GENERIC_QUEUE_SPINLOCK_H
+
+#define q_spin_lock_mb() mb()
+#define q_spin_unlock_mb() mb()
+
+#endif /* _ASM_GENERIC_QUEUE_SPINLOCK_H */
diff --git a/include/linux/queue_spinlock.h b/include/linux/queue_spinlock.h
new file mode 100644
index 000000000000..84b2470d232b
--- /dev/null
+++ b/include/linux/queue_spinlock.h
@@ -0,0 +1,61 @@
+#ifndef __LINUX_QUEUE_SPINLOCK_H
+#define __LINUX_QUEUE_SPINLOCK_H
+
+#include <linux/bug.h>
+
+struct q_spinlock_node {
+	struct q_spinlock_node *next;
+	bool wait;
+};
+
+struct q_spinlock {
+	struct q_spinlock_node *node;
+};
+
+#define __Q_SPIN_LOCK_UNLOCKED(name) { NULL }
+
+#ifdef CONFIG_SMP
+
+void q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node);
+
+static inline void q_spin_lock_init(struct q_spinlock *lock)
+{
+	lock->node = NULL;
+}
+
+static inline void assert_q_spin_locked(struct q_spinlock *lock)
+{
+	BUG_ON(!lock->node);
+}
+
+static inline void assert_q_spin_unlocked(struct q_spinlock *lock)
+{
+	BUG_ON(lock->node);
+}
+
+#else /* !CONFIG_SMP */
+
+static inline void
+q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void q_spin_lock_init(struct q_spinlock *lock) {}
+
+static inline void assert_q_spin_locked(struct q_spinlock *lock) {}
+
+static inline void assert_q_spin_unlocked(struct q_spinlock *lock) {}
+
+#endif /* CONFIG_SMP */
+
+#endif /* __LINUX_QUEUE_SPINLOCK_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 86e3285ae7e5..ec91cfef4d4e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_SMP) += smp.o
 ifneq ($(CONFIG_SMP),y)
 obj-y += up.o
 endif
-obj-$(CONFIG_SMP) += spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o queue_spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
diff --git a/kernel/queue_spinlock.c b/kernel/queue_spinlock.c
new file mode 100644
index 000000000000..20304f0bc852
--- /dev/null
+++ b/kernel/queue_spinlock.c
@@ -0,0 +1,82 @@
+#include <linux/queue_spinlock.h>
+#include <linux/export.h>
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>	/* for cpu_relax() */
+#include <asm/queue_spinlock.h>
+
+static inline void
+__q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	struct q_spinlock_node *prev;
+
+	node->next = NULL;
+	prev = xchg(&lock->node, node);
+	if (likely(!prev))
+		/*
+		 * Acquired the lock.
+		 * xchg() already includes a full memory barrier.
+		 */
+		return;
+
+	node->wait = true;
+	smp_wmb();
+	ACCESS_ONCE(prev->next) = node;
+	while (ACCESS_ONCE(node->wait))
+		cpu_relax();
+	q_spin_lock_mb();	/* guarantee acquire load semantics */
+}
+
+static inline void
+__q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	struct q_spinlock_node *next;
+	if (likely(!(next = ACCESS_ONCE(node->next)))) {
+		/*
+		 * Try to release lock.
+		 * cmpxchg() already includes a full memory barrier.
+		 */
+		if (likely(cmpxchg(&lock->node, node, NULL) == node))
+			return;
+
+		/*
+		 * We raced with __q_spin_lock().
+		 * Wait for next thread to be known.
+		 */
+		while (!(next = ACCESS_ONCE(node->next)))
+			cpu_relax();
+	}
+	q_spin_unlock_mb();	/* guarantee release store semantics */
+	ACCESS_ONCE(next->wait) = false;
+}
+
+void q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	preempt_disable();
+	__q_spin_lock(lock, node);
+}
+EXPORT_SYMBOL(q_spin_lock);
+
+void q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	local_bh_disable();
+	preempt_disable();
+	__q_spin_lock(lock, node);
+}
+EXPORT_SYMBOL(q_spin_lock_bh);
+
+void q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	__q_spin_unlock(lock, node);
+	preempt_enable();
+}
+EXPORT_SYMBOL(q_spin_unlock);
+
+void q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	__q_spin_unlock(lock, node);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+EXPORT_SYMBOL(q_spin_unlock_bh);
-- 
1.7.7.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/