[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130930110047.GY3081@twins.programming.kicks-ass.net>
Date: Mon, 30 Sep 2013 13:00:47 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Ingo Molnar <mingo@...nel.org>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>,
Waiman Long <Waiman.Long@...com>, Ingo Molnar <mingo@...e.hu>,
Andrew Morton <akpm@...ux-foundation.org>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
Rik van Riel <riel@...hat.com>,
Peter Hurley <peter@...leysoftware.com>,
Davidlohr Bueso <davidlohr.bueso@...com>,
Alex Shi <alex.shi@...el.com>,
Tim Chen <tim.c.chen@...ux.intel.com>,
Andrea Arcangeli <aarcange@...hat.com>,
Matthew R Wilcox <matthew.r.wilcox@...el.com>,
Dave Hansen <dave.hansen@...el.com>,
Michel Lespinasse <walken@...gle.com>,
Andi Kleen <andi@...stfloor.org>,
"Chandramouleeswaran, Aswin" <aswin@...com>,
"Norton, Scott J" <scott.norton@...com>
Subject: Re: [PATCH, v2] anon_vmas: Convert the rwsem to an rwlock_t
On Sat, Sep 28, 2013 at 09:52:07PM +0200, Ingo Molnar wrote:
> Index: tip/mm/rmap.c
> ===================================================================
> --- tip.orig/mm/rmap.c
> +++ tip/mm/rmap.c
> @@ -98,12 +98,12 @@ static inline void anon_vma_free(struct
> * page_lock_anon_vma_read() VS put_anon_vma()
> * down_read_trylock() atomic_dec_and_test()
> * LOCK MB
> - * atomic_read() rwsem_is_locked()
> + * atomic_read() rwlock_is_locked()
> *
> * LOCK should suffice since the actual taking of the lock must
> * happen _before_ what follows.
> */
> - if (rwsem_is_locked(&anon_vma->root->rwsem)) {
> + if (!write_can_lock(&anon_vma->root->rwlock)) {
> anon_vma_lock_write(anon_vma);
> anon_vma_unlock_write(anon_vma);
> }
> @@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma_read
>
> anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
> root_anon_vma = ACCESS_ONCE(anon_vma->root);
> - if (down_read_trylock(&root_anon_vma->rwsem)) {
> + if (read_trylock(&root_anon_vma->rwlock)) {
> /*
> * If the page is still mapped, then this anon_vma is still
> * its anon_vma, and holding the mutex ensures that it will
> * not go away, see anon_vma_free().
> */
> if (!page_mapped(page)) {
> - up_read(&root_anon_vma->rwsem);
> + read_unlock(&root_anon_vma->rwlock);
> anon_vma = NULL;
> }
> goto out;
> @@ -1293,7 +1293,7 @@ out_mlock:
> /*
> * We need mmap_sem locking, Otherwise VM_LOCKED check makes
> * unstable result and race. Plus, We can't wait here because
> - * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
> + * we now hold anon_vma->rwlock or mapping->i_mmap_mutex.
> * if trylock failed, the page remain in evictable lru and later
> * vmscan could retry to move the page to unevictable lru if the
> * page is actually mlocked.
You can remove all that -- all that trickery was only needed because the
lock could sleep;
---
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -85,29 +85,6 @@ static inline struct anon_vma *anon_vma_
static inline void anon_vma_free(struct anon_vma *anon_vma)
{
VM_BUG_ON(atomic_read(&anon_vma->refcount));
-
- /*
- * Synchronize against page_lock_anon_vma_read() such that
- * we can safely hold the lock without the anon_vma getting
- * freed.
- *
- * Relies on the full mb implied by the atomic_dec_and_test() from
- * put_anon_vma() against the acquire barrier implied by
- * down_read_trylock() from page_lock_anon_vma_read(). This orders:
- *
- * page_lock_anon_vma_read() VS put_anon_vma()
- * down_read_trylock() atomic_dec_and_test()
- * LOCK MB
- * atomic_read() rwlock_is_locked()
- *
- * LOCK should suffice since the actual taking of the lock must
- * happen _before_ what follows.
- */
- if (!write_can_lock(&anon_vma->root->rwlock)) {
- anon_vma_lock_write(anon_vma);
- anon_vma_unlock_write(anon_vma);
- }
-
kmem_cache_free(anon_vma_cachep, anon_vma);
}
@@ -437,15 +414,10 @@ struct anon_vma *page_get_anon_vma(struc
/*
* Similar to page_get_anon_vma() except it locks the anon_vma.
- *
- * Its a little more complex as it tries to keep the fast path to a single
- * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
- * reference like with page_get_anon_vma() and then block on the mutex.
*/
struct anon_vma *page_lock_anon_vma_read(struct page *page)
{
struct anon_vma *anon_vma = NULL;
- struct anon_vma *root_anon_vma;
unsigned long anon_mapping;
rcu_read_lock();
@@ -456,51 +428,22 @@ struct anon_vma *page_lock_anon_vma_read
goto out;
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
- root_anon_vma = ACCESS_ONCE(anon_vma->root);
- if (read_trylock(&root_anon_vma->rwlock)) {
- /*
- * If the page is still mapped, then this anon_vma is still
- * its anon_vma, and holding the mutex ensures that it will
- * not go away, see anon_vma_free().
- */
- if (!page_mapped(page)) {
- read_unlock(&root_anon_vma->rwlock);
- anon_vma = NULL;
- }
- goto out;
- }
-
- /* trylock failed, we got to sleep */
- if (!atomic_inc_not_zero(&anon_vma->refcount)) {
- anon_vma = NULL;
- goto out;
- }
-
- if (!page_mapped(page)) {
- put_anon_vma(anon_vma);
- anon_vma = NULL;
- goto out;
- }
-
- /* we pinned the anon_vma, its safe to sleep */
- rcu_read_unlock();
anon_vma_lock_read(anon_vma);
- if (atomic_dec_and_test(&anon_vma->refcount)) {
- /*
- * Oops, we held the last refcount, release the lock
- * and bail -- can't simply use put_anon_vma() because
- * we'll deadlock on the anon_vma_lock_write() recursion.
- */
+ /*
+ * If this page is still mapped, then its anon_vma cannot have been
+ * freed. But if it has been unmapped, we have no security against the
+ * anon_vma structure being freed and reused (for another anon_vma:
+ * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
+ * above cannot corrupt).
+ */
+ if (!page_mapped(page)) {
anon_vma_unlock_read(anon_vma);
- __put_anon_vma(anon_vma);
anon_vma = NULL;
}
-
- return anon_vma;
-
out:
rcu_read_unlock();
+
return anon_vma;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists