lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <52D6A4FB.7060305@hp.com>
Date:	Wed, 15 Jan 2014 10:10:51 -0500
From:	Waiman Long <waiman.long@...com>
To:	Jason Low <jason.low2@...com>
CC:	mingo@...hat.com, peterz@...radead.org, paulmck@...ux.vnet.ibm.com,
	torvalds@...ux-foundation.org, tglx@...utronix.de,
	linux-kernel@...r.kernel.org, riel@...hat.com,
	akpm@...ux-foundation.org, davidlohr@...com, hpa@...or.com,
	aswin@...com, scott.norton@...com
Subject: Re: [RFC 2/3] mutex: Modify the way optimistic spinners are queued

On 01/14/2014 07:33 PM, Jason Low wrote:
> This patch is needed for patch 3, but should also be beneficial in general.
>
> The mutex->spin_mlock was introduced in order to ensure that only 1 thread
> loops on lock->owner field at a time to reduce cache line contention. When
> lock->owner is NULL and the lock->count is still not 1, the spinner(s) will
> continually release and obtain the lock->spin_mlock. This can generate
> quite a bit of overhead/contention, and also might just delay the spinner
> from getting the lock.
>
> This patch modifies the way optimistic spinners are queued by queuing before
> entering the optimistic spinning loop as oppose to acquiring before every
> call to mutex_spin_on_owner(). So in situations where the spinner requires
> extra spins before obtaining the lock, then there will only be 1 spinner
> trying to get the lock and it will avoid the overhead from unnecessarily
> unlocking and locking the spin_mlock.
>
> Signed-off-by: Jason Low<jason.low2@...com>
> ---
>   kernel/locking/mutex.c |   13 ++++++++-----
>   1 files changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index 85c6be1..b500cc7 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -419,6 +419,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   	struct mutex_waiter waiter;
>   	unsigned long flags;
>   	int ret;
> +	struct mspin_node node;
>
>   	preempt_disable();
>   	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
> @@ -449,9 +450,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   	if (!mutex_can_spin_on_owner(lock))
>   		goto slowpath;
>
> +	mspin_lock(MLOCK(lock),&node);
>   	for (;;) {
>   		struct task_struct *owner;
> -		struct mspin_node  node;
>
>   		if (use_ww_ctx&&  ww_ctx->acquired>  0) {
>   			struct ww_mutex *ww;
> @@ -465,15 +466,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   			 * As such, when deadlock detection needs to be
>   			 * performed the optimistic spinning cannot be done.
>   			 */
> -			if (ACCESS_ONCE(ww->ctx))
> +			if (ACCESS_ONCE(ww->ctx)) {
> +				mspin_unlock(MLOCK(lock),&node);
>   				goto slowpath;
> +			}
>   		}
>
>   		/*
>   		 * If there's an owner, wait for it to either
>   		 * release the lock or go to sleep.
>   		 */
> -		mspin_lock(MLOCK(lock),&node);
>   		owner = ACCESS_ONCE(lock->owner);
>   		if (owner&&  !mutex_spin_on_owner(lock, owner)) {
>   			mspin_unlock(MLOCK(lock),&node);
> @@ -495,7 +497,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   			preempt_enable();
>   			return 0;
>   		}
> -		mspin_unlock(MLOCK(lock),&node);
>
>   		/*
>   		 * When there's no owner, we might have preempted between the
> @@ -503,8 +504,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   		 * we're an RT task that will live-lock because we won't let
>   		 * the owner complete.
>   		 */
> -		if (!owner&&  (need_resched() || rt_task(task)))
> +		if (!owner&&  (need_resched() || rt_task(task))) {
> +			mspin_unlock(MLOCK(lock),&node);
>   			goto slowpath;
> +		}
>
>   		/*
>   		 * The cpu_relax() call is a compiler barrier which forces

Maybe you can consider restructure the code as follows to reduce the 
number of mspin_unlock() call sites:
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 4dd6e4c..0a78a0c 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -416,6 +416,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned
         struct mutex_waiter waiter;
         unsigned long flags;
         int ret;
+       struct mspin_node  node;

         preempt_disable();
         mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
@@ -446,9 +447,9 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned
         if (!mutex_can_spin_on_owner(lock))
                 goto slowpath;

+       mspin_lock(MLOCK(lock), &node);
         for (;;) {
                 struct task_struct *owner;
-               struct mspin_node  node;

                 if (use_ww_ctx && ww_ctx->acquired > 0) {
                         struct ww_mutex *ww;
@@ -463,19 +464,16 @@ __mutex_lock_common(struct mutex *lock, long 
state, unsign
                          * performed the optimistic spinning cannot be 
done.
                          */
                         if (ACCESS_ONCE(ww->ctx))
-                               goto slowpath;
+                               break;
                 }

                 /*
                  * If there's an owner, wait for it to either
                  * release the lock or go to sleep.
                  */
-               mspin_lock(MLOCK(lock), &node);
                 owner = ACCESS_ONCE(lock->owner);
-               if (owner && !mutex_spin_on_owner(lock, owner)) {
-                       mspin_unlock(MLOCK(lock), &node);
-                       goto slowpath;
-               }
+               if (owner && !mutex_spin_on_owner(lock, owner))
+                       break;

                 if ((atomic_read(&lock->count) == 1) &&
                     (atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
@@ -492,7 +490,6 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned
                         preempt_enable();
                         return 0;
                 }
-               mspin_unlock(MLOCK(lock), &node);

                 /*
                  * When there's no owner, we might have preempted 
between the
@@ -501,7 +498,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned
                  * the owner complete.
                  */
                 if (!owner && (need_resched() || rt_task(task)))
-                       goto slowpath;
+                       break;

                 /*
                  * The cpu_relax() call is a compiler barrier which forces
@@ -511,6 +508,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned
                  */
                 arch_mutex_cpu_relax();
         }
+       mspin_unlock(MLOCK(lock), &node);
  slowpath:
  #endif
         spin_lock_mutex(&lock->wait_lock, flags);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ