[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1700eef8-415c-59d3-d6a1-61314f8be1af@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2016 09:16:20 +0800
From: Pan Xinhui <xinhui@...ux.vnet.ibm.com>
To: Boqun Feng <boqun.feng@...il.com>,
Pan Xinhui <xinhui.pan@...ux.vnet.ibm.com>
Cc: linux-kernel@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org,
benh@...nel.crashing.org, paulus@...ba.org, mpe@...erman.id.au,
peterz@...radead.org, mingo@...hat.com, paulmck@...ux.vnet.ibm.com,
Waiman Long <longman@...hat.com>,
virtualization@...ts.linux-foundation.org
Subject: Re: [PATCH v8 1/6] powerpc/qspinlock: powerpc support qspinlock
correct waiman's address.
在 2016/12/6 08:47, Boqun Feng 写道:
> On Mon, Dec 05, 2016 at 10:19:21AM -0500, Pan Xinhui wrote:
>> This patch add basic code to enable qspinlock on powerpc. qspinlock is
>> one kind of fairlock implementation. And seen some performance improvement
>> under some scenarios.
>>
>> queued_spin_unlock() release the lock by just one write of NULL to the
>> ::locked field which sits at different places in the two endianness
>> system.
>>
>> We override some arch_spin_XXX as powerpc has io_sync stuff which makes
>> sure the io operations are protected by the lock correctly.
>>
>> There is another special case, see commit
>> 2c610022711 ("locking/qspinlock: Fix spin_unlock_wait() some more")
>>
>> Signed-off-by: Pan Xinhui <xinhui.pan@...ux.vnet.ibm.com>
>> ---
>> arch/powerpc/include/asm/qspinlock.h | 66 +++++++++++++++++++++++++++++++
>> arch/powerpc/include/asm/spinlock.h | 31 +++++++++------
>> arch/powerpc/include/asm/spinlock_types.h | 4 ++
>> arch/powerpc/lib/locks.c | 59 +++++++++++++++++++++++++++
>> 4 files changed, 147 insertions(+), 13 deletions(-)
>> create mode 100644 arch/powerpc/include/asm/qspinlock.h
>>
>> diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
>> new file mode 100644
>> index 0000000..4c89256
>> --- /dev/null
>> +++ b/arch/powerpc/include/asm/qspinlock.h
>> @@ -0,0 +1,66 @@
>> +#ifndef _ASM_POWERPC_QSPINLOCK_H
>> +#define _ASM_POWERPC_QSPINLOCK_H
>> +
>> +#include <asm-generic/qspinlock_types.h>
>> +
>> +#define SPIN_THRESHOLD (1 << 15)
>> +#define queued_spin_unlock queued_spin_unlock
>> +#define queued_spin_is_locked queued_spin_is_locked
>> +#define queued_spin_unlock_wait queued_spin_unlock_wait
>> +
>> +extern void queued_spin_unlock_wait(struct qspinlock *lock);
>> +
>> +static inline u8 *__qspinlock_lock_byte(struct qspinlock *lock)
>> +{
>> + return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
>> +}
>> +
>> +static inline void queued_spin_unlock(struct qspinlock *lock)
>> +{
>> + /* release semantics is required */
>> + smp_store_release(__qspinlock_lock_byte(lock), 0);
>> +}
>> +
>> +static inline int queued_spin_is_locked(struct qspinlock *lock)
>> +{
>> + smp_mb();
>> + return atomic_read(&lock->val);
>> +}
>> +
>> +#include <asm-generic/qspinlock.h>
>> +
>> +/* we need override it as ppc has io_sync stuff */
>> +#undef arch_spin_trylock
>> +#undef arch_spin_lock
>> +#undef arch_spin_lock_flags
>> +#undef arch_spin_unlock
>> +#define arch_spin_trylock arch_spin_trylock
>> +#define arch_spin_lock arch_spin_lock
>> +#define arch_spin_lock_flags arch_spin_lock_flags
>> +#define arch_spin_unlock arch_spin_unlock
>> +
>> +static inline int arch_spin_trylock(arch_spinlock_t *lock)
>> +{
>> + CLEAR_IO_SYNC;
>> + return queued_spin_trylock(lock);
>> +}
>> +
>> +static inline void arch_spin_lock(arch_spinlock_t *lock)
>> +{
>> + CLEAR_IO_SYNC;
>> + queued_spin_lock(lock);
>> +}
>> +
>> +static inline
>> +void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
>> +{
>> + CLEAR_IO_SYNC;
>> + queued_spin_lock(lock);
>> +}
>> +
>> +static inline void arch_spin_unlock(arch_spinlock_t *lock)
>> +{
>> + SYNC_IO;
>> + queued_spin_unlock(lock);
>> +}
>> +#endif /* _ASM_POWERPC_QSPINLOCK_H */
>> diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
>> index 8c1b913..954099e 100644
>> --- a/arch/powerpc/include/asm/spinlock.h
>> +++ b/arch/powerpc/include/asm/spinlock.h
>> @@ -60,6 +60,23 @@ static inline bool vcpu_is_preempted(int cpu)
>> }
>> #endif
>>
>> +#if defined(CONFIG_PPC_SPLPAR)
>> +/* We only yield to the hypervisor if we are in shared processor mode */
>> +#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
>> +extern void __spin_yield(arch_spinlock_t *lock);
>> +extern void __rw_yield(arch_rwlock_t *lock);
>> +#else /* SPLPAR */
>> +#define __spin_yield(x) barrier()
>> +#define __rw_yield(x) barrier()
>> +#define SHARED_PROCESSOR 0
>> +#endif
>> +
>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>> +#include <asm/qspinlock.h>
>> +#else
>> +
>> +#define arch_spin_relax(lock) __spin_yield(lock)
>> +
>> static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
>> {
>> return lock.slock == 0;
>> @@ -114,18 +131,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
>> * held. Conveniently, we have a word in the paca that holds this
>> * value.
>> */
>> -
>> -#if defined(CONFIG_PPC_SPLPAR)
>> -/* We only yield to the hypervisor if we are in shared processor mode */
>> -#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
>> -extern void __spin_yield(arch_spinlock_t *lock);
>> -extern void __rw_yield(arch_rwlock_t *lock);
>> -#else /* SPLPAR */
>> -#define __spin_yield(x) barrier()
>> -#define __rw_yield(x) barrier()
>> -#define SHARED_PROCESSOR 0
>> -#endif
>> -
>> static inline void arch_spin_lock(arch_spinlock_t *lock)
>> {
>> CLEAR_IO_SYNC;
>> @@ -203,6 +208,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
>> smp_mb();
>> }
>>
>> +#endif /* !CONFIG_QUEUED_SPINLOCKS */
>> /*
>> * Read-write spinlocks, allowing multiple readers
>> * but only one writer.
>> @@ -338,7 +344,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
>> #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
>> #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
>>
>> -#define arch_spin_relax(lock) __spin_yield(lock)
>> #define arch_read_relax(lock) __rw_yield(lock)
>> #define arch_write_relax(lock) __rw_yield(lock)
>>
>> diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
>> index 2351adc..bd7144e 100644
>> --- a/arch/powerpc/include/asm/spinlock_types.h
>> +++ b/arch/powerpc/include/asm/spinlock_types.h
>> @@ -5,11 +5,15 @@
>> # error "please don't include this file directly"
>> #endif
>>
>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>> +#include <asm-generic/qspinlock_types.h>
>> +#else
>> typedef struct {
>> volatile unsigned int slock;
>> } arch_spinlock_t;
>>
>> #define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
>> +#endif
>>
>> typedef struct {
>> volatile signed int lock;
>> diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
>> index b7b1237..6574626 100644
>> --- a/arch/powerpc/lib/locks.c
>> +++ b/arch/powerpc/lib/locks.c
>> @@ -23,6 +23,7 @@
>> #include <asm/hvcall.h>
>> #include <asm/smp.h>
>>
>> +#ifndef CONFIG_QUEUED_SPINLOCKS
>> void __spin_yield(arch_spinlock_t *lock)
>> {
>> unsigned int lock_value, holder_cpu, yield_count;
>> @@ -42,6 +43,7 @@ void __spin_yield(arch_spinlock_t *lock)
>> get_hard_smp_processor_id(holder_cpu), yield_count);
>> }
>> EXPORT_SYMBOL_GPL(__spin_yield);
>> +#endif
>>
>> /*
>> * Waiting for a read lock or a write lock on a rwlock...
>> @@ -68,3 +70,60 @@ void __rw_yield(arch_rwlock_t *rw)
>> get_hard_smp_processor_id(holder_cpu), yield_count);
>> }
>> #endif
>> +
>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>> +/*
>> + * This forbid we load an old value in another LL/SC. Because the SC here force
>> + * another LL/SC repeat. So we guarantee all loads in another LL and SC will
>> + * read correct value.
>> + */
>> +static inline u32 atomic_read_sync(atomic_t *v)
>> +{
>> + u32 val;
>> +
>> + __asm__ __volatile__(
>> +"1: " PPC_LWARX(%0, 0, %2, 0) "\n"
>> +" stwcx. %0, 0, %2\n"
>> +" bne- 1b\n"
>> + : "=&r" (val), "+m" (*v)
>> + : "r" (v)
>> + : "cr0", "xer");
>> +
>> + return val;
>> +}
>> +
>> +void queued_spin_unlock_wait(struct qspinlock *lock)
>> +{
>> +
>> + u32 val;
>> +
>> + smp_mb();
>> +
>> + /*
>> + * copied from generic queue_spin_unlock_wait with little modification
>> + */
>> + for (;;) {
>> + /* need _sync, as we might race with another LL/SC in lock()*/
>> + val = atomic_read_sync(&lock->val);
>> +
>> + if (!val) /* not locked, we're done */
>> + goto done;
>> +
>> + if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
>> + break;
>> +
>> + /* not locked, but pending, wait until we observe the lock */
>> + cpu_relax();
>> + }
>> +
>> + /*
>> + * any unlock is good. And need not _sync, as ->val is set by the SC in
>> + * unlock(), any loads in lock() must see the correct value.
>> + */
>
> I don't think the comment here about _sync is correct. First, not all
> unlock() has a SC part, and for unlock_wait() case there is nothing to
yes, not all unlock has sc, but we are just dealing with sc part or unlock.
> do with whether lock() see the correct value or not. The reason with
yep, but loads can be reorded, so the unlock_wait(SC) vs lock(SC) can forbid
any loads between lock(LL) and lock(SC) reading any too new values.
> _sync is not needed here is:
>
> /*
> * _sync() is not needed here, because once we got here, we must already
> * read the ->val as LOCKED via a _sync(). Combining the smp_mb()
> * before, we guarantee that all the memory accesses before
> * unlock_wait() must be observed by the next lock critical section.
> */
>
good, more clearly. thanks
xinhui
> Regards,
> Boqun
>
>> + while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
>> + cpu_relax();
>> +done:
>> + smp_mb();
>> +}
>> +EXPORT_SYMBOL(queued_spin_unlock_wait);
>> +#endif
>> --
>> 2.4.11
>>
Powered by blists - more mailing lists