lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1358896415-28569-2-git-send-email-walken@google.com>
Date:	Tue, 22 Jan 2013 15:13:30 -0800
From:	Michel Lespinasse <walken@...gle.com>
To:	Rik van Riel <riel@...hat.com>, Ingo Molnar <mingo@...hat.com>,
	"Paul E. McKenney" <paulmck@...ibm.com>,
	David Howells <dhowells@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Eric Dumazet <edumazet@...gle.com>,
	"Eric W. Biederman" <ebiederm@...ssion.com>,
	Manfred Spraul <manfred@...orfullife.com>
Cc:	linux-kernel@...r.kernel.org
Subject: [RFC PATCH 1/6] kernel: implement queue spinlock API

Introduce queue spinlocks, to be used in situations where it is desired
to have good throughput even under the occasional high-contention situation.

This initial implementation is based on the classic MCS spinlock,
because I think this represents the nicest API we can hope for in a
fast queue spinlock algorithm. The MCS spinlock has known limitations
in that it performs very well under high contention, but is not as
good as the ticket spinlock under low contention. I will address these
limitations in a later patch, which will propose an alternative,
higher performance implementation using (mostly) the same API.

Sample use case acquiring mystruct->lock:

  struct q_spinlock_node node;

  q_spin_lock(&mystruct->lock, &node);
  ...
  q_spin_unlock(&mystruct->lock, &node);

The q_spinlock_node must be used by a single lock holder / waiter and it must
stay allocated for the entire duration when the lock is held (or waited on).

Signed-off-by: Michel Lespinasse <walken@...gle.com>

---
 arch/x86/include/asm/queue_spinlock.h |   20 ++++++++
 include/asm-generic/queue_spinlock.h  |    7 +++
 include/linux/queue_spinlock.h        |   61 ++++++++++++++++++++++++
 kernel/Makefile                       |    2 +-
 kernel/queue_spinlock.c               |   82 +++++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/include/asm/queue_spinlock.h
 create mode 100644 include/asm-generic/queue_spinlock.h
 create mode 100644 include/linux/queue_spinlock.h
 create mode 100644 kernel/queue_spinlock.c

diff --git a/arch/x86/include/asm/queue_spinlock.h b/arch/x86/include/asm/queue_spinlock.h
new file mode 100644
index 000000000000..655e01c1c2e8
--- /dev/null
+++ b/arch/x86/include/asm/queue_spinlock.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_QUEUE_SPINLOCK_H
+#define _ASM_QUEUE_SPINLOCK_H
+
+#if defined(CONFIG_X86_PPRO_FENCE) || defined(CONFIG_X86_OOSTORE)
+
+#define q_spin_lock_mb() mb()
+#define q_spin_unlock_mb() mb()
+
+#else
+
+/*
+ * x86 memory ordering only needs compiler barriers to implement
+ * acquire load / release store semantics
+ */
+#define q_spin_lock_mb() barrier()
+#define q_spin_unlock_mb() barrier()
+
+#endif
+
+#endif /* _ASM_QUEUE_SPINLOCK_H */
diff --git a/include/asm-generic/queue_spinlock.h b/include/asm-generic/queue_spinlock.h
new file mode 100644
index 000000000000..01fb9fbfb8dc
--- /dev/null
+++ b/include/asm-generic/queue_spinlock.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_GENERIC_QUEUE_SPINLOCK_H
+#define _ASM_GENERIC_QUEUE_SPINLOCK_H
+
+#define q_spin_lock_mb() mb()
+#define q_spin_unlock_mb() mb()
+
+#endif /* _ASM_GENERIC_QUEUE_SPINLOCK_H */
diff --git a/include/linux/queue_spinlock.h b/include/linux/queue_spinlock.h
new file mode 100644
index 000000000000..84b2470d232b
--- /dev/null
+++ b/include/linux/queue_spinlock.h
@@ -0,0 +1,61 @@
+#ifndef __LINUX_QUEUE_SPINLOCK_H
+#define __LINUX_QUEUE_SPINLOCK_H
+
+#include <linux/bug.h>
+
+struct q_spinlock_node {
+	struct q_spinlock_node *next;
+	bool wait;
+};
+
+struct q_spinlock {
+	struct q_spinlock_node *node;
+};
+
+#define __Q_SPIN_LOCK_UNLOCKED(name) { NULL }
+
+#ifdef CONFIG_SMP
+
+void q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node);
+
+static inline void q_spin_lock_init(struct q_spinlock *lock)
+{
+	lock->node = NULL;
+}
+
+static inline void assert_q_spin_locked(struct q_spinlock *lock)
+{
+	BUG_ON(!lock->node);
+}
+
+static inline void assert_q_spin_unlocked(struct q_spinlock *lock)
+{
+	BUG_ON(lock->node);
+}
+
+#else /* !CONFIG_SMP */
+
+static inline void
+q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void q_spin_lock_init(struct q_spinlock *lock) {}
+
+static inline void assert_q_spin_locked(struct q_spinlock *lock) {}
+
+static inline void assert_q_spin_unlocked(struct q_spinlock *lock) {}
+
+#endif /* CONFIG_SMP */
+
+#endif /* __LINUX_QUEUE_SPINLOCK_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 86e3285ae7e5..ec91cfef4d4e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_SMP) += smp.o
 ifneq ($(CONFIG_SMP),y)
 obj-y += up.o
 endif
-obj-$(CONFIG_SMP) += spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o queue_spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
diff --git a/kernel/queue_spinlock.c b/kernel/queue_spinlock.c
new file mode 100644
index 000000000000..20304f0bc852
--- /dev/null
+++ b/kernel/queue_spinlock.c
@@ -0,0 +1,82 @@
+#include <linux/queue_spinlock.h>
+#include <linux/export.h>
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>	/* for cpu_relax() */
+#include <asm/queue_spinlock.h>
+
+static inline void
+__q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	struct q_spinlock_node *prev;
+
+	node->next = NULL;
+	prev = xchg(&lock->node, node);
+	if (likely(!prev))
+		/*
+		 * Acquired the lock.
+		 * xchg() already includes a full memory barrier.
+		 */
+		return;
+
+	node->wait = true;
+	smp_wmb();
+	ACCESS_ONCE(prev->next) = node;
+	while (ACCESS_ONCE(node->wait))
+		cpu_relax();
+	q_spin_lock_mb();	/* guarantee acquire load semantics */
+}
+
+static inline void
+__q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	struct q_spinlock_node *next;
+	if (likely(!(next = ACCESS_ONCE(node->next)))) {
+		/*
+		 * Try to release lock.
+		 * cmpxchg() already includes a full memory barrier.
+		 */
+		if (likely(cmpxchg(&lock->node, node, NULL) == node))
+			return;
+
+		/*
+		 * We raced with __q_spin_lock().
+		 * Wait for next thread to be known.
+		 */
+		while (!(next = ACCESS_ONCE(node->next)))
+			cpu_relax();
+	}
+	q_spin_unlock_mb();	/* guarantee release store semantics */
+	ACCESS_ONCE(next->wait) = false;
+}
+
+void q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	preempt_disable();
+	__q_spin_lock(lock, node);
+}
+EXPORT_SYMBOL(q_spin_lock);
+
+void q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	local_bh_disable();
+	preempt_disable();
+	__q_spin_lock(lock, node);
+}
+EXPORT_SYMBOL(q_spin_lock_bh);
+
+void q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	__q_spin_unlock(lock, node);
+	preempt_enable();
+}
+EXPORT_SYMBOL(q_spin_unlock);
+
+void q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	__q_spin_unlock(lock, node);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+EXPORT_SYMBOL(q_spin_unlock_bh);
-- 
1.7.7.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ