lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190416134522.17540-20-ldufour@linux.ibm.com>
Date:   Tue, 16 Apr 2019 15:45:10 +0200
From:   Laurent Dufour <ldufour@...ux.ibm.com>
To:     akpm@...ux-foundation.org, mhocko@...nel.org, peterz@...radead.org,
        kirill@...temov.name, ak@...ux.intel.com, dave@...olabs.net,
        jack@...e.cz, Matthew Wilcox <willy@...radead.org>,
        aneesh.kumar@...ux.ibm.com, benh@...nel.crashing.org,
        mpe@...erman.id.au, paulus@...ba.org,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, hpa@...or.com,
        Will Deacon <will.deacon@....com>,
        Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
        sergey.senozhatsky.work@...il.com,
        Andrea Arcangeli <aarcange@...hat.com>,
        Alexei Starovoitov <alexei.starovoitov@...il.com>,
        kemi.wang@...el.com, Daniel Jordan <daniel.m.jordan@...cle.com>,
        David Rientjes <rientjes@...gle.com>,
        Jerome Glisse <jglisse@...hat.com>,
        Ganesh Mahendran <opensource.ganesh@...il.com>,
        Minchan Kim <minchan@...nel.org>,
        Punit Agrawal <punitagrawal@...il.com>,
        vinayak menon <vinayakm.list@...il.com>,
        Yang Shi <yang.shi@...ux.alibaba.com>,
        zhong jiang <zhongjiang@...wei.com>,
        Haiyan Song <haiyanx.song@...el.com>,
        Balbir Singh <bsingharora@...il.com>, sj38.park@...il.com,
        Michel Lespinasse <walken@...gle.com>,
        Mike Rapoport <rppt@...ux.ibm.com>
Cc:     linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        haren@...ux.vnet.ibm.com, npiggin@...il.com,
        paulmck@...ux.vnet.ibm.com, Tim Chen <tim.c.chen@...ux.intel.com>,
        linuxppc-dev@...ts.ozlabs.org, x86@...nel.org
Subject: [PATCH v12 19/31] mm: protect the RB tree with a sequence lock

Introducing a per mm_struct seqlock, mm_seq field, to protect the changes
made in the MM RB tree. This allows to walk the RB tree without grabbing
the mmap_sem, and on the walk is done to double check that sequence counter
was stable during the walk.

The mm seqlock is held while inserting and removing entries into the MM RB
tree.  Later in this series, it will be check when looking for a VMA
without holding the mmap_sem.

This is based on the initial work from Peter Zijlstra:
https://lore.kernel.org/linux-mm/20100104182813.479668508@chello.nl/

Signed-off-by: Laurent Dufour <ldufour@...ux.ibm.com>
---
 include/linux/mm_types.h |  3 +++
 kernel/fork.c            |  3 +++
 mm/init-mm.c             |  3 +++
 mm/mmap.c                | 48 +++++++++++++++++++++++++++++++---------
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e78f72eb2576..24b3f8ce9e42 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -358,6 +358,9 @@ struct mm_struct {
 	struct {
 		struct vm_area_struct *mmap;		/* list of VMAs */
 		struct rb_root mm_rb;
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+		seqlock_t mm_seq;
+#endif
 		u64 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
 		unsigned long (*get_unmapped_area) (struct file *filp,
diff --git a/kernel/fork.c b/kernel/fork.c
index 2992d2c95256..3a1739197ebc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1008,6 +1008,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm->mmap = NULL;
 	mm->mm_rb = RB_ROOT;
 	mm->vmacache_seqnum = 0;
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+	seqlock_init(&mm->mm_seq);
+#endif
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
diff --git a/mm/init-mm.c b/mm/init-mm.c
index a787a319211e..69346b883a4e 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -27,6 +27,9 @@
  */
 struct mm_struct init_mm = {
 	.mm_rb		= RB_ROOT,
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+	.mm_seq		= __SEQLOCK_UNLOCKED(init_mm.mm_seq),
+#endif
 	.pgd		= swapper_pg_dir,
 	.mm_users	= ATOMIC_INIT(2),
 	.mm_count	= ATOMIC_INIT(1),
diff --git a/mm/mmap.c b/mm/mmap.c
index 13460b38b0fb..f7f6027a7dff 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -170,6 +170,24 @@ void unlink_file_vma(struct vm_area_struct *vma)
 	}
 }
 
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+static inline void mm_write_seqlock(struct mm_struct *mm)
+{
+	write_seqlock(&mm->mm_seq);
+}
+static inline void mm_write_sequnlock(struct mm_struct *mm)
+{
+	write_sequnlock(&mm->mm_seq);
+}
+#else
+static inline void mm_write_seqlock(struct mm_struct *mm)
+{
+}
+static inline void mm_write_sequnlock(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
+
 /*
  * Close a vm structure and free it, returning the next.
  */
@@ -445,26 +463,32 @@ static void vma_gap_update(struct vm_area_struct *vma)
 }
 
 static inline void vma_rb_insert(struct vm_area_struct *vma,
-				 struct rb_root *root)
+				 struct mm_struct *mm)
 {
+	struct rb_root *root = &mm->mm_rb;
+
 	/* All rb_subtree_gap values must be consistent prior to insertion */
 	validate_mm_rb(root, NULL);
 
 	rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
 }
 
-static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
+static void __vma_rb_erase(struct vm_area_struct *vma, struct mm_struct *mm)
 {
+	struct rb_root *root = &mm->mm_rb;
+
 	/*
 	 * Note rb_erase_augmented is a fairly large inline function,
 	 * so make sure we instantiate it only once with our desired
 	 * augmented rbtree callbacks.
 	 */
+	mm_write_seqlock(mm);
 	rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
+	mm_write_sequnlock(mm);	/* wmb */
 }
 
 static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
-						struct rb_root *root,
+						struct mm_struct *mm,
 						struct vm_area_struct *ignore)
 {
 	/*
@@ -472,21 +496,21 @@ static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
 	 * with the possible exception of the "next" vma being erased if
 	 * next->vm_start was reduced.
 	 */
-	validate_mm_rb(root, ignore);
+	validate_mm_rb(&mm->mm_rb, ignore);
 
-	__vma_rb_erase(vma, root);
+	__vma_rb_erase(vma, mm);
 }
 
 static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
-					 struct rb_root *root)
+					 struct mm_struct *mm)
 {
 	/*
 	 * All rb_subtree_gap values must be consistent prior to erase,
 	 * with the possible exception of the vma being erased.
 	 */
-	validate_mm_rb(root, vma);
+	validate_mm_rb(&mm->mm_rb, vma);
 
-	__vma_rb_erase(vma, root);
+	__vma_rb_erase(vma, mm);
 }
 
 /*
@@ -601,10 +625,12 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * immediately update the gap to the correct value. Finally we
 	 * rebalance the rbtree after all augmented values have been set.
 	 */
+	mm_write_seqlock(mm);
 	rb_link_node(&vma->vm_rb, rb_parent, rb_link);
 	vma->rb_subtree_gap = 0;
 	vma_gap_update(vma);
-	vma_rb_insert(vma, &mm->mm_rb);
+	vma_rb_insert(vma, mm);
+	mm_write_sequnlock(mm);
 }
 
 static void __vma_link_file(struct vm_area_struct *vma)
@@ -680,7 +706,7 @@ static __always_inline void __vma_unlink_common(struct mm_struct *mm,
 {
 	struct vm_area_struct *next;
 
-	vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
+	vma_rb_erase_ignore(vma, mm, ignore);
 	next = vma->vm_next;
 	if (has_prev)
 		prev->vm_next = next;
@@ -2674,7 +2700,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 	vma->vm_prev = NULL;
 	do {
-		vma_rb_erase(vma, &mm->mm_rb);
+		vma_rb_erase(vma, mm);
 		mm->map_count--;
 		tail_vma = vma;
 		vma = vma->vm_next;
-- 
2.21.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ