linux-kernel - Re: [PATCH v3 1/2] qspinlock: Introducing a 4-byte queue spinlock implementation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1391108430.3138.86.camel@schen9-DESK>
Date:	Thu, 30 Jan 2014 11:00:30 -0800
From:	Tim Chen <tim.c.chen@...ux.intel.com>
To:	Waiman Long <Waiman.Long@...com>
Cc:	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	"H. Peter Anvin" <hpa@...or.com>, Arnd Bergmann <arnd@...db.de>,
	linux-arch@...r.kernel.org, x86@...nel.org,
	linux-kernel@...r.kernel.org,
	Peter Zijlstra <peterz@...radead.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Michel Lespinasse <walken@...gle.com>,
	Andi Kleen <andi@...stfloor.org>,
	Rik van Riel <riel@...hat.com>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Raghavendra K T <raghavendra.kt@...ux.vnet.ibm.com>,
	George Spelvin <linux@...izon.com>,
	Daniel J Blueman <daniel@...ascale.com>,
	Alexander Fyodorov <halcy@...dex.ru>,
	Aswin Chandramouleeswaran <aswin@...com>,
	Scott J Norton <scott.norton@...com>,
	Thavatchai Makphaibulchoke <thavatchai.makpahibulchoke@...com>
Subject: Re: [PATCH v3 1/2] qspinlock: Introducing a 4-byte queue spinlock
 implementation

On Tue, 2014-01-28 at 13:19 -0500, Waiman Long wrote:

> +/**
> + * queue_spin_lock_slowpath - acquire the queue spinlock
> + * @lock: Pointer to queue spinlock structure
> + */
> +void queue_spin_lock_slowpath(struct qspinlock *lock)
> +{
> +	unsigned int cpu_nr, qn_idx;
> +	struct qnode *node, *next = NULL;
> +	u32 prev_qcode, my_qcode;
> +
> +	/*
> +	 * Get the queue node
> +	 */
> +	cpu_nr = smp_processor_id();
> +	node   = this_cpu_ptr(&qnodes[0]);
> +	qn_idx = 0;
> +
> +	if (unlikely(node->used)) {
> +		/*
> +		 * This node has been used, try to find an empty queue
> +		 * node entry.
> +		 */
> +		for (qn_idx = 1; qn_idx < MAX_QNODES; qn_idx++)
> +			if (!node[qn_idx].used)
> +				break;
> +		if (unlikely(qn_idx == MAX_QNODES)) {
> +			/*
> +			 * This shouldn't happen, print a warning message
> +			 * & busy spinning on the lock.
> +			 */
> +			printk_sched(
> +			  "qspinlock: queue node table exhausted at cpu %d!\n",
> +			  cpu_nr);
> +			while (!unfair_trylock(lock))
> +				arch_mutex_cpu_relax();
> +			return;
> +		}
> +		/* Adjust node pointer */
> +		node += qn_idx;
> +	}
> +
> +	/*
> +	 * Set up the new cpu code to be exchanged
> +	 */
> +	my_qcode = SET_QCODE(cpu_nr, qn_idx);
> +

If we get interrupted here before we have a chance to set the used flag,
the interrupt handler could pick up the same qnode if it tries to
acquire queued spin lock.  Then we could overwrite the qcode we have set
here.

Perhaps an exchange operation for the used flag to prevent this race
condition?

Tim

> +	/*
> +	 * The lock may be available at this point, try again before waiting
> +	 * in a queue.
> +	 */
> +	if (queue_spin_trylock(lock))
> +		return;
> +
> +	/*
> +	 * Initialize the queue node
> +	 */
> +	init_node(node, lock, cpu_nr);
> +
> +	/*
> +	 * Exchange current copy of the queue node code
> +	 */
> +	prev_qcode = xchg(&lock->qlcode, my_qcode);
> +	/*
> +	 * It is possible that we may accidentally steal the lock. If this is
> +	 * the case, we need to either release it if not the head of the queue
> +	 * or get the lock and be done with it.
> +	 */
> +	if (unlikely(!(prev_qcode & _QSPINLOCK_LOCKED))) {
> +		if (prev_qcode == 0) {
> +			/*
> +			 * Got the lock since it is at the head of the queue
> +			 * Now try to atomically clear the queue code.
> +			 */
> +			if (cmpxchg(&lock->qlcode, my_qcode, _QSPINLOCK_LOCKED)
> +				== my_qcode)
> +				goto release_node;
> +			/*
> +			 * The cmpxchg fails only if one or more processes
> +			 * are added to the queue. In this case, we need to
> +			 * notify the next one to be the head of the queue.
> +			 */
> +			goto notify_next;
> +		}
> +		/*
> +		 * Accidentally steal the lock, release the lock and
> +		 * let the queue head get it.
> +		 */
> +		queue_spin_unlock(lock);
> +	} else
> +		prev_qcode &= ~_QSPINLOCK_LOCKED;	/* Clear the lock bit */
> +	my_qcode &= ~_QSPINLOCK_LOCKED;
> +
> +	if (prev_qcode) {
> +		/*
> +		 * Not at the queue head, get the address of the previous node
> +		 * and set up the "next" fields of the that node.
> +		 */
> +		struct qnode *prev = xlate_qcode(prev_qcode);
> +
> +		assert_prevnode(prev, lock, cpu_nr);
> +		ACCESS_ONCE(prev->next) = node;
> +		/*
> +		 * Wait until the waiting flag is off
> +		 */
> +		while (smp_load_acquire(&node->wait))
> +			arch_mutex_cpu_relax();
> +	}
> +
> +	/*
> +	 * At the head of the wait queue now
> +	 */
> +	while (true) {
> +		u32 qcode;
> +
> +		if (unlikely(!next)) {
> +			/*
> +			 * Try to get the next node address & clean up
> +			 * current node data structure now if the next node
> +			 * address had been set.
> +			 */
> +			next = ACCESS_ONCE(node->next);
> +			if (next) {
> +				assert_nextnode(next, lock, cpu_nr);
> +				cleanup_node(node, cpu_nr);
> +				node = NULL;
> +			}
> +		}
> +		qcode = ACCESS_ONCE(lock->qlcode);
> +		if (qcode & _QSPINLOCK_LOCKED)
> +			;	/* Lock not available yet */
> +		else if (qcode != my_qcode) {
> +			/*
> +			 * Just get the lock with other spinners waiting
> +			 * in the queue.
> +			 */
> +			if (unfair_trylock(lock))
> +				/* May still need to notify the next node */
> +				goto notify_next;
> +		} else {
> +			/*
> +			 * Get the lock & clear the queue code simultaneously
> +			 */
> +			if (cmpxchg(&lock->qlcode, my_qcode,
> +				    _QSPINLOCK_LOCKED) == my_qcode)
> +				/* No need to notify next one */
> +				goto release_node;
> +		}
> +		arch_mutex_cpu_relax();
> +	}
> +
> +notify_next:
> +	/*
> +	 * If the next pointer is not set, we need to wait and notify the
> +	 * next one in line to do busy spinning.
> +	 */
> +	if (unlikely(!next)) {
> +		/*
> +		 * Wait until the next one in queue set up the next field
> +		 */
> +		while (!(next = ACCESS_ONCE(node->next)))
> +			arch_mutex_cpu_relax();
> +		assert_nextnode(next, lock, cpu_nr);
> +	}
> +	/*
> +	 * The next one in queue is now at the head
> +	 */
> +	smp_store_release(&next->wait, false);
> +
> +release_node:
> +	if (node)
> +		cleanup_node(node, cpu_nr);
> +}
> +EXPORT_SYMBOL(queue_spin_lock_slowpath);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/