[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <544E7DC9.5020903@hp.com>
Date: Mon, 27 Oct 2014 13:15:53 -0400
From: Waiman Long <waiman.long@...com>
To: Peter Zijlstra <peterz@...radead.org>
CC: Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
"H. Peter Anvin" <hpa@...or.com>, linux-arch@...r.kernel.org,
x86@...nel.org, linux-kernel@...r.kernel.org,
virtualization@...ts.linux-foundation.org,
xen-devel@...ts.xenproject.org, kvm@...r.kernel.org,
Paolo Bonzini <paolo.bonzini@...il.com>,
Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>,
Boris Ostrovsky <boris.ostrovsky@...cle.com>,
"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
Rik van Riel <riel@...hat.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Raghavendra K T <raghavendra.kt@...ux.vnet.ibm.com>,
David Vrabel <david.vrabel@...rix.com>,
Oleg Nesterov <oleg@...hat.com>,
Scott J Norton <scott.norton@...com>,
Douglas Hatch <doug.hatch@...com>
Subject: Re: [PATCH v12 09/11] pvqspinlock, x86: Add para-virtualization support
On 10/24/2014 06:04 PM, Peter Zijlstra wrote:
> On Fri, Oct 24, 2014 at 04:53:27PM -0400, Waiman Long wrote:
>> The additional register pressure may just cause a few more register moves
>> which should be negligible in the overall performance . The additional
>> icache pressure, however, may have some impact on performance. I was trying
>> to balance the performance of the pv and non-pv versions so that we won't
>> penalize the pv code too much for a bit more performance in the non-pv code.
>> Doing it your way will add a lot of function call and register
>> saving/restoring to the pv code.
> If people care about performance they should not be using virt crap :-)
>
> I only really care about bare metal.
Yes, I am aware of that. However, the whole point of doing PV spinlock
is to improve performance in a virtual guest.
Anyway, I had done some measurements. In my test system, the
queue_spin_lock_slowpath() function has a text size of about 400 bytes
without PV, but 1120 bytes with PV. I made some changes to create
separate versions of PV and non-PV slowpath functions as shown by the
diff below. The text size is now about 430 bytes for the non-PV version
and 925 bytes for the PV version. The overall object size increases by a
bit more than 200 bytes, but the icache footprint should be reduced no
matter which version is used.
-Longman
----------------------------------------
diff --git a/arch/x86/include/asm/pvqspinlock.h
b/arch/x86/include/asm/pvqspinlo
index d424252..241bf30 100644
--- a/arch/x86/include/asm/pvqspinlock.h
+++ b/arch/x86/include/asm/pvqspinlock.h
@@ -79,9 +79,6 @@ static inline void pv_init_node(struct mcs_spinlock *node)
BUILD_BUG_ON(sizeof(struct pv_qnode) > 5*sizeof(struct
mcs_spinlock));
- if (!pv_enabled())
- return;
-
pn->cpustate = PV_CPU_ACTIVE;
pn->mayhalt = false;
pn->mycpu = smp_processor_id();
@@ -132,9 +129,6 @@ static inline bool pv_link_and_wait_node(u32 old,
struct mcs
struct pv_qnode *ppn, *pn = (struct pv_qnode *)node;
unsigned int count;
- if (!pv_enabled())
- return false;
-
if (!(old & _Q_TAIL_MASK)) {
node->locked = true; /* At queue head now */
goto ret;
@@ -236,9 +230,6 @@ pv_wait_head(struct qspinlock *lock, struct
mcs_spinlock *no
{
struct pv_qnode *pn = (struct pv_qnode *)node;
- if (!pv_enabled())
- return smp_load_acquire(&lock->val.counter);
-
for (;;) {
unsigned int count;
s8 oldstate;
@@ -328,8 +319,6 @@ static inline void pv_wait_check(struct qspinlock *lock,
struct pv_qnode *pnxt = (struct pv_qnode *)next;
struct pv_qnode *pcur = (struct pv_qnode *)node;
- if (!pv_enabled())
- return;
/*
* Clear the locked and head values of lock holder
*/
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 1662dbd..05aea57 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -16,6 +16,7 @@
* Authors: Waiman Long <waiman.long@...com>
* Peter Zijlstra <pzijlstr@...hat.com>
*/
+#ifndef _GEN_PV_LOCK_SLOWPATH
#include <linux/smp.h>
#include <linux/bug.h>
#include <linux/cpumask.h>
@@ -271,19 +272,37 @@ void queue_spin_unlock_slowpath(struct qspinlock
*lock)
}
EXPORT_SYMBOL(queue_spin_unlock_slowpath);
-#else
+static void pv_queue_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+
+#else /* CONFIG_PARAVIRT_SPINLOCKS */
+
+static inline void pv_queue_spin_lock_slowpath(struct qspinlock *lock,
u32 val)
+ { }
-static inline void pv_init_node(struct mcs_spinlock *node) { }
-static inline void pv_wait_check(struct qspinlock *lock,
- struct mcs_spinlock *node,
- struct mcs_spinlock *next) { }
-static inline bool pv_link_and_wait_node(u32 old, struct mcs_spinlock
*node)
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+/*
+ * Dummy PV functions for bare-metal slowpath code
+ */
+static inline void nopv_init_node(struct mcs_spinlock *node) { }
+static inline void nopv_wait_check(struct qspinlock *lock,
+ struct mcs_spinlock *node,
+ struct mcs_spinlock *next) { }
+static inline bool nopv_link_and_wait_node(u32 old, struct mcs_spinlock
*node)
{ return false; }
-static inline int pv_wait_head(struct qspinlock *lock,
+static inline int nopv_wait_head(struct qspinlock *lock,
struct mcs_spinlock *node)
{ return smp_load_acquire(&lock->val.counter); }
+static inline bool return_true(void) { return true; }
+static inline bool return_false(void) { return false; }
-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+#define pv_init_node nopv_init_node
+#define pv_wait_check nopv_wait_check
+#define pv_link_and_wait_node nopv_link_and_wait_node
+#define pv_wait_head nopv_wait_head
+#define in_pv_code return_false
+
+#endif /* _GEN_PV_LOCK_SLOWPATH */
/**
* queue_spin_lock_slowpath - acquire the queue spinlock
@@ -306,7 +325,11 @@ static inline int pv_wait_head(struct qspinlock *lock,
* contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
* queue : ^--' :
*/
+#ifdef _GEN_PV_LOCK_SLOWPATH
+static void pv_queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+#else
void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+#endif
{
struct mcs_spinlock *prev, *next, *node;
u32 new, old, tail;
@@ -314,7 +337,12 @@ void queue_spin_lock_slowpath(struct qspinlock
*lock, u32 v
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
- if (pv_enabled())
+ if (pv_enabled()) {
+ pv_queue_spin_lock_slowpath(lock, val);
+ return;
+ }
+
+ if (in_pv_code())
goto queue;
if (virt_queue_spin_lock(lock))
@@ -474,3 +502,23 @@ release:
this_cpu_dec(mcs_nodes[0].count);
}
EXPORT_SYMBOL(queue_spin_lock_slowpath);
+
+#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+/*
+ * Generate the PV version of the queue_spin_lock_slowpath function
+ */
+#undef pv_init_node
+#undef pv_wait_check
+#undef pv_link_and_wait_node
+#undef pv_wait_head
+#undef EXPORT_SYMBOL
+#undef in_pv_code
+
+#define _GEN_PV_LOCK_SLOWPATH
+#define EXPORT_SYMBOL(x)
+#define in_pv_code return_true
+#define pv_enabled return_false
+
+#include "qspinlock.c"
+
+#endif
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists