lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100527174816.GA10931@csn.ul.ie>
Date:	Thu, 27 May 2010 18:48:16 +0100
From:	Mel Gorman <mel@....ul.ie>
To:	Rik van Riel <riel@...hat.com>
Cc:	akpm@...ux-foundation.org, linux-kernel@...r.kernel.org,
	linux-mm@...ck.org, Andrea Arcangeli <aarcange@...hat.com>,
	Minchan Kim <minchan.kim@...il.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>,
	Lee Schermerhorn <Lee.Schermerhorn@...com>
Subject: Re: [PATCH 4/5] always lock the root (oldest) anon_vma

On Wed, May 26, 2010 at 03:40:44PM -0400, Rik van Riel wrote:
> Subject: always lock the root (oldest) anon_vma
> 
> Always (and only) lock the root (oldest) anon_vma whenever we do something in an
> anon_vma.  The recently introduced anon_vma scalability is due to the rmap code
> scanning only the VMAs that need to be scanned.  Many common operations still
> took the anon_vma lock on the root anon_vma, so always taking that lock is not
> expected to introduce any scalability issues.
> 
> However, always taking the same lock does mean we only need to take one lock,
> which means rmap_walk on pages from any anon_vma in the vma is excluded from
> occurring during an munmap, expand_stack or other operation that needs to
> exclude rmap_walk and similar functions.
> 
> Also add the proper locking to vma_adjust.
> 
> Signed-off-by: Rik van Riel <riel@...hat.com>
> ---
> v3:
>  - fix locking inversion in vma_adjust, spotted by Andrea
>  - mm_take_all locks needs to use the bitflag in the root anon_vma,
>    since that is the one that gets locked (Andrea Arcangeli)
> v2:
>  - conditionally take the anon_vma lock in vma_adjust, like introduced
>    in 252c5f94d944487e9f50ece7942b0fbf659c5c31  (with a proper comment)
> 
>  include/linux/rmap.h |    8 ++++----
>  mm/ksm.c             |    2 +-
>  mm/migrate.c         |    2 +-
>  mm/mmap.c            |   30 ++++++++++++++++++++++--------
>  4 files changed, 28 insertions(+), 14 deletions(-)
> 
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -104,24 +104,24 @@ static inline void vma_lock_anon_vma(str
>  {
>  	struct anon_vma *anon_vma = vma->anon_vma;
>  	if (anon_vma)
> -		spin_lock(&anon_vma->lock);
> +		spin_lock(&anon_vma->root->lock);
>  }
>  
>  static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
>  {
>  	struct anon_vma *anon_vma = vma->anon_vma;
>  	if (anon_vma)
> -		spin_unlock(&anon_vma->lock);
> +		spin_unlock(&anon_vma->root->lock);
>  }
>  
>  static inline void anon_vma_lock(struct anon_vma *anon_vma)
>  {
> -	spin_lock(&anon_vma->lock);
> +	spin_lock(&anon_vma->root->lock);
>  }
>  
>  static inline void anon_vma_unlock(struct anon_vma *anon_vma)
>  {
> -	spin_unlock(&anon_vma->lock);
> +	spin_unlock(&anon_vma->root->lock);
>  }
>  
>  /*
> Index: linux-2.6.34/mm/ksm.c
> ===================================================================
> --- linux-2.6.34.orig/mm/ksm.c
> +++ linux-2.6.34/mm/ksm.c
> @@ -325,7 +325,7 @@ static void drop_anon_vma(struct rmap_it
>  {
>  	struct anon_vma *anon_vma = rmap_item->anon_vma;
>  
> -	if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
> +	if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
>  		int empty = list_empty(&anon_vma->head);
>  		anon_vma_unlock(anon_vma);
>  		if (empty)
> Index: linux-2.6.34/mm/mmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/mmap.c
> +++ linux-2.6.34/mm/mmap.c
> @@ -506,6 +506,7 @@ int vma_adjust(struct vm_area_struct *vm
>  	struct vm_area_struct *importer = NULL;
>  	struct address_space *mapping = NULL;
>  	struct prio_tree_root *root = NULL;
> +	struct anon_vma *anon_vma = NULL;
>  	struct file *file = vma->vm_file;
>  	long adjust_next = 0;
>  	int remove_next = 0;
> @@ -578,6 +579,17 @@ again:			remove_next = 1 + (end > next->
>  		}
>  	}
>  
> +	/*
> +	 * When changing only vma->vm_end, we don't really need anon_vma
> +	 * lock. This is a fairly rare case by itself, but the anon_vma
> +	 * lock may be shared between many sibling processes.  Skipping
> +	 * the lock for brk adjustments makes a difference sometimes.
> +	 */

"sometimes" is a bit. We know when it makes a difference - for
brk-intensive workloads e.g. aim7. It's only worth mentioning because
git blame will no longer point to Lee's analysis in commit
252c5f94d944487e9f50ece7942b0fbf659c5c31.

Whether you update it or not;

Acked-by: Mel Gorman <mel@....ul.ie>

> +	if (vma->anon_vma && (insert || importer || start != vma->vm_start)) {
> +		anon_vma = vma->anon_vma;
> +		anon_vma_lock(anon_vma);
> +	}
> +
>  	if (root) {
>  		flush_dcache_mmap_lock(mapping);
>  		vma_prio_tree_remove(vma, root);
> @@ -617,6 +629,8 @@ again:			remove_next = 1 + (end > next->
>  		__insert_vm_struct(mm, insert);
>  	}
>  
> +	if (anon_vma)
> +		anon_vma_unlock(anon_vma);
>  	if (mapping)
>  		spin_unlock(&mapping->i_mmap_lock);
>  
> @@ -2466,23 +2480,23 @@ static DEFINE_MUTEX(mm_all_locks_mutex);
>  
>  static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
>  {
> -	if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
> +	if (!test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
>  		/*
>  		 * The LSB of head.next can't change from under us
>  		 * because we hold the mm_all_locks_mutex.
>  		 */
> -		spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
> +		spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
>  		/*
>  		 * We can safely modify head.next after taking the
> -		 * anon_vma->lock. If some other vma in this mm shares
> +		 * anon_vma->root->lock. If some other vma in this mm shares
>  		 * the same anon_vma we won't take it again.
>  		 *
>  		 * No need of atomic instructions here, head.next
>  		 * can't change from under us thanks to the
> -		 * anon_vma->lock.
> +		 * anon_vma->root->lock.
>  		 */
>  		if (__test_and_set_bit(0, (unsigned long *)
> -				       &anon_vma->head.next))
> +				       &anon_vma->root->head.next))
>  			BUG();
>  	}
>  }
> @@ -2573,7 +2587,7 @@ out_unlock:
>  
>  static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
>  {
> -	if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
> +	if (test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
>  		/*
>  		 * The LSB of head.next can't change to 0 from under
>  		 * us because we hold the mm_all_locks_mutex.
> @@ -2584,10 +2598,10 @@ static void vm_unlock_anon_vma(struct an
>  		 *
>  		 * No need of atomic instructions here, head.next
>  		 * can't change from under us until we release the
> -		 * anon_vma->lock.
> +		 * anon_vma->root->lock.
>  		 */
>  		if (!__test_and_clear_bit(0, (unsigned long *)
> -					  &anon_vma->head.next))
> +					  &anon_vma->root->head.next))
>  			BUG();
>  		anon_vma_unlock(anon_vma);
>  	}
> Index: linux-2.6.34/mm/migrate.c
> ===================================================================
> --- linux-2.6.34.orig/mm/migrate.c
> +++ linux-2.6.34/mm/migrate.c
> @@ -682,7 +682,7 @@ skip_unmap:
>  rcu_unlock:
>  
>  	/* Drop an anon_vma reference if we took one */
> -	if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
> +	if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
>  		int empty = list_empty(&anon_vma->head);
>  		anon_vma_unlock(anon_vma);
>  		if (empty)
> 

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ