lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue,  6 Apr 2021 18:44:48 -0700
From:   Michel Lespinasse <michel@...pinasse.org>
To:     Linux-MM <linux-mm@...ck.org>
Cc:     Laurent Dufour <ldufour@...ux.ibm.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Michal Hocko <mhocko@...e.com>,
        Matthew Wilcox <willy@...radead.org>,
        Rik van Riel <riel@...riel.com>,
        Paul McKenney <paulmck@...nel.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Suren Baghdasaryan <surenb@...gle.com>,
        Joel Fernandes <joelaf@...gle.com>,
        Rom Lemarchand <romlem@...gle.com>,
        Linux-Kernel <linux-kernel@...r.kernel.org>,
        Michel Lespinasse <michel@...pinasse.org>
Subject: [RFC PATCH 23/37] mm: rcu safe vma->vm_file freeing

Defer freeing of vma->vm_file when freeing vmas.
This is to allow speculative page faults in the mapped file case.

Signed-off-by: Michel Lespinasse <michel@...pinasse.org>
---
 fs/exec.c     |  1 +
 kernel/fork.c | 17 +++++++++++++++--
 mm/mmap.c     | 11 +++++++----
 mm/nommu.c    |  6 ++----
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 18594f11c31f..c9da73eb0f53 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -286,6 +286,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	mmap_write_unlock(mm);
 err_free:
 	bprm->vma = NULL;
+	VM_BUG_ON(vma->vm_file);
 	vm_area_free(vma);
 	return err;
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index b6078e546114..2f20a5c5fed8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -369,19 +369,31 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
 	return new;
 }
 
+static inline void ____vm_area_free(struct vm_area_struct *vma)
+{
+	if (vma->vm_file)
+		fput(vma->vm_file);
+	kmem_cache_free(vm_area_cachep, vma);
+}
+
 #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
 static void __vm_area_free(struct rcu_head *head)
 {
 	struct vm_area_struct *vma = container_of(head, struct vm_area_struct,
 						  vm_rcu);
-	kmem_cache_free(vm_area_cachep, vma);
+	____vm_area_free(vma);
 }
 
+#endif
+
 void vm_area_free(struct vm_area_struct *vma)
 {
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
 	call_rcu(&vma->vm_rcu, __vm_area_free);
+#else
+	____vm_area_free(vma);
+#endif
 }
-#endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
 
 static void account_kernel_stack(struct task_struct *tsk, int account)
 {
@@ -621,6 +633,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 fail_nomem_anon_vma_fork:
 	mpol_put(vma_policy(tmp));
 fail_nomem_policy:
+	tmp->vm_file = NULL;	/* prevents fput within vm_area_free() */
 	vm_area_free(tmp);
 fail_nomem:
 	retval = -ENOMEM;
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f287599a7a3..cc2323e243bb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -178,9 +178,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	might_sleep();
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
-	if (vma->vm_file)
-		fput(vma->vm_file);
 	mpol_put(vma_policy(vma));
+	/* fput(vma->vm_file) happens in vm_area_free after an RCU delay. */
 	vm_area_free(vma);
 	return next;
 }
@@ -949,7 +948,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	if (remove_next) {
 		if (file) {
 			uprobe_munmap(next, next->vm_start, next->vm_end);
-			fput(file);
+			/* fput(file) happens whthin vm_area_free(next) */
+			VM_BUG_ON(file != next->vm_file);
 		}
 		if (next->anon_vma)
 			anon_vma_merge(vma, next);
@@ -1828,7 +1828,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 				 * fput the vma->vm_file here or we would add an extra fput for file
 				 * and cause general protection fault ultimately.
 				 */
-				fput(vma->vm_file);
+				/* fput happens within vm_area_free */
 				vm_area_free(vma);
 				vma = merge;
 				/* Update vm_flags to pick up the change. */
@@ -1907,6 +1907,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	if (vm_flags & VM_DENYWRITE)
 		allow_write_access(file);
 free_vma:
+	VM_BUG_ON(vma->vm_file);
 	vm_area_free(vma);
 unacct_error:
 	if (charged)
@@ -2779,6 +2780,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  out_free_mpol:
 	mpol_put(vma_policy(new));
  out_free_vma:
+	new->vm_file = NULL;	/* prevents fput within vm_area_free() */
 	vm_area_free(new);
 	return err;
 }
@@ -3343,6 +3345,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 out_free_mempol:
 	mpol_put(vma_policy(new_vma));
 out_free_vma:
+	new_vma->vm_file = NULL;	/* Prevent fput within vm_area_free */
 	vm_area_free(new_vma);
 out:
 	return NULL;
diff --git a/mm/nommu.c b/mm/nommu.c
index 5c9ab799c0e6..06a0dc0b913b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -664,9 +664,8 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 {
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
-	if (vma->vm_file)
-		fput(vma->vm_file);
 	put_nommu_region(vma->vm_region);
+	/* fput(vma->vm_file) happens within vm_area_free() */
 	vm_area_free(vma);
 }
 
@@ -1267,8 +1266,7 @@ unsigned long do_mmap(struct file *file,
 	if (region->vm_file)
 		fput(region->vm_file);
 	kmem_cache_free(vm_region_jar, region);
-	if (vma->vm_file)
-		fput(vma->vm_file);
+	/* fput(vma->vm_file) happens within vm_area_free() */
 	vm_area_free(vma);
 	return ret;
 
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ