linux-kernel - Re: [PATCH tip/locking/core v9 5/6] locking/pvqspinlock: Allow 1 lock stealing attempt

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20151106145005.GU17308@twins.programming.kicks-ass.net>
Date:	Fri, 6 Nov 2015 15:50:05 +0100
From:	Peter Zijlstra <peterz@...radead.org>
To:	Waiman Long <Waiman.Long@....com>
Cc:	Ingo Molnar <mingo@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	"H. Peter Anvin" <hpa@...or.com>, x86@...nel.org,
	linux-kernel@...r.kernel.org,
	Scott J Norton <scott.norton@....com>,
	Douglas Hatch <doug.hatch@....com>,
	Davidlohr Bueso <dave@...olabs.net>
Subject: Re: [PATCH tip/locking/core v9 5/6] locking/pvqspinlock: Allow 1
 lock stealing attempt

On Fri, Oct 30, 2015 at 07:26:36PM -0400, Waiman Long wrote:

> @@ -431,35 +432,44 @@ queue:
>  	 * sequentiality; this is because the set_locked() function below
>  	 * does not imply a full barrier.
>  	 *
> +	 * The PV pv_wait_head_lock function, if active, will acquire the lock
> +	 * and return a non-zero value. So we have to skip the
> +	 * smp_load_acquire() call. As the next PV queue head hasn't been
> +	 * designated yet, there is no way for the locked value to become
> +	 * _Q_SLOW_VAL. So both the redundant set_locked() and the
> +	 * atomic_cmpxchg_relaxed() calls will be safe. The cost of the
> +	 * redundant set_locked() call below should be negligible, too.
> +	 *
> +	 * If PV isn't active, 0 will be returned instead.
>  	 */
> -	pv_wait_head(lock, node);
> -	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
> -		cpu_relax();
> +	val = pv_wait_head_lock(lock, node);
> +	if (!val) {
> +		while ((val = smp_load_acquire(&lock->val.counter))
> +				& _Q_LOCKED_PENDING_MASK)
> +			cpu_relax();
> +		/*
> +		 * Claim the lock now:
> +		 *
> +		 * 0,0 -> 0,1
> +		 */
> +		set_locked(lock);
> +		val |= _Q_LOCKED_VAL;
> +	}
>  
>  	/*
>  	 * If the next pointer is defined, we are not tail anymore.
> -	 * In this case, claim the spinlock & release the MCS lock.
>  	 */
> -	if (next) {
> -		set_locked(lock);
> +	if (next)
>  		goto mcs_unlock;
> -	}
>  
>  	/*
> -	 * claim the lock:
> -	 *
> -	 * n,0,0 -> 0,0,1 : lock, uncontended
> -	 * *,0,0 -> *,0,1 : lock, contended
> -	 *
>  	 * If the queue head is the only one in the queue (lock value == tail),
> -	 * clear the tail code and grab the lock. Otherwise, we only need
> -	 * to grab the lock.
> +	 * we have to clear the tail code.
>  	 */
>  	for (;;) {
> -		if (val != tail) {
> -			set_locked(lock);
> +		if ((val & _Q_TAIL_MASK) != tail)
>  			break;
> -		}
> +
>  		/*
>  		 * The smp_load_acquire() call above has provided the necessary
>  		 * acquire semantics required for locking. At most two

*urgh*, last time we had:

+	if (pv_wait_head_or_steal())
+		goto stolen;
	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
		cpu_relax();

	...

+stolen:
	while (!(next = READ_ONCE(node->next)))
		cpu_relax();

	...

Now you completely overhaul the native code.. what happened?

> -static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
> +static u32 pv_wait_head_lock(struct qspinlock *lock, struct mcs_spinlock *node)
>  {
>  	struct pv_node *pn = (struct pv_node *)node;
>  	struct __qspinlock *l = (void *)lock;
> @@ -276,11 +330,24 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
>  		lp = (struct qspinlock **)1;
>  
>  	for (;; waitcnt++) {
> +		/*
> +		 * Set the pending bit in the active lock spinning loop to
> +		 * disable lock stealing. However, the pending bit check in
> +		 * pv_queued_spin_trylock_unfair() and the setting/clearing
> +		 * of pending bit here aren't memory barriers. So a cmpxchg()
> +		 * is used to acquire the lock to be sure.
> +		 */
> +		set_pending(lock);

OK, so we mark ourselves 'pending' such that a new lock() will not steal
and is forced to queue behind us.

>  		for (loop = SPIN_THRESHOLD; loop; loop--) {
> -			if (!READ_ONCE(l->locked))
> -				return;
> +			if (!READ_ONCE(l->locked) &&
> +			   (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
> +				clear_pending(lock);
> +				goto gotlock;

Would not: cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL),
make sense to avoid the clear_pending() call?

> +			}
>  			cpu_relax();
>  		}
> +		clear_pending(lock);
> +


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/