lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <77bde7d5-6506-4769-8758-ca88bb244bbb@lucifer.local>
Date: Thu, 3 Oct 2024 12:17:34 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Bert Karwatzki <spasswolf@....de>
Cc: "Liam R . Howlett" <Liam.Howlett@...cle.com>,
        Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 14/21] mm/mmap: Avoid zeroing vma tree in mmap_region()

On Thu, Oct 03, 2024 at 12:51:41PM +0200, Bert Karwatzki wrote:
> Here's the log procduced by this kernel:
>
> d530839eacd1 (HEAD -> maple_tree_debug) maybe fix 2
> db8adf7c0a23 hack: mm: see if we can get some more information 2
> 7e3bb072761a mm: correct error handling in mmap_region()
> 77df9e4bb222 (tag: next-20241001, origin/master, origin/HEAD, master) Add linux-next specific files for 20241001
>
> The "maybe fix 2" did not work, either.
>
[snip]

Thanks but oh man, that's disappointing. This is a really, really strange
problem.

Taking a step back, let's see if preallocation is somehow a factor here.

Please unwind all previous patches and apply this to the 1st oct next tree
- this contains fix attempts and further info.

Thanks! Lorenzo

----8<----
>From 558ac3118b2861408a17993e4379f40046767fb3 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Date: Wed, 2 Oct 2024 09:19:28 +0100
Subject: [PATCH] hack: set of fix, info stuff v3

Add some dreadful printk() hacks so we can try to get some more information
on what's going on.

Also various attempts at fixes
---
 mm/internal.h | 15 +++++++++
 mm/mmap.c     | 91 +++++++++++++++++++++++++++++++++++++++++++++++++--
 mm/vma.c      | 66 ++++++++++++++++++++++++++++++++-----
 3 files changed, 162 insertions(+), 10 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 93083bbeeefa..cd9414b4651d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1443,4 +1443,19 @@ static inline void accept_page(struct page *page)
 }
 #endif /* CONFIG_UNACCEPTED_MEMORY */

+static inline bool check_interesting(unsigned long start, unsigned long end)
+{
+	const unsigned long interesting_start = 0x1740000;
+	/* Include off-by-one on purpose.*/
+	const unsigned long interesting_end = 0x68000000 + 1;
+
+	/*  interesting_start            interesting_end
+	 *          |--------------------------|
+	 *           ============================> end
+	 *        <=============================   start
+	 */
+	return end > interesting_start && /* after or overlaps... */
+		start < interesting_end;  /* ...overlaps. */
+}
+
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index dd4b35a25aeb..cef827f4e3cb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1341,6 +1341,18 @@ struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
 	return vma;
 }

+static void ljs_dump(struct mm_struct *mm,
+		     unsigned long addr, unsigned long len,
+		     vm_flags_t vm_flags, bool is_unmap)
+{
+	if (!check_interesting(addr, addr + len))
+		return;
+
+	pr_err("LJS: %s mm=%p [0x%lx, 0x%lx) [vm_flags=%lu]\n",
+	       is_unmap ? "munmap" : "mmap", mm, addr, addr + len,
+		vm_flags);
+}
+
 /* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
  * @mm: The mm_struct
  * @start: The start address to munmap
@@ -1354,6 +1366,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 {
 	VMA_ITERATOR(vmi, mm, start);

+	ljs_dump(mm, start, len, 0, true);
+
 	return do_vmi_munmap(&vmi, mm, start, len, uf, false);
 }

@@ -1375,11 +1389,16 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	VMA_ITERATOR(vmi, mm, addr);
 	VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff);

+	ljs_dump(mm, addr, len, vm_flags, false);
+
 	vmg.file = file;
 	/* Find the first overlapping VMA */
 	vma = vma_find(&vmi, end);
 	init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
 	if (vma) {
+		if (check_interesting(addr, addr + len))
+			pr_err("LJS: mm=%p About to do unmaps for vms=[%lx, %lx), addr=%lx, end=%lx\n", mm, addr, addr + len, vma_iter_addr(&vmi), vma_iter_end(&vmi));
+
 		mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
 		mt_on_stack(mt_detach);
 		mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
@@ -1388,8 +1407,16 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		if (error)
 			goto gather_failed;

+		vma_iter_config(&vmi, addr, end);
+
 		vmg.next = vms.next;
 		vmg.prev = vms.prev;
+
+		if (check_interesting(addr, addr + len))
+			pr_err("LJS: prev=[%lx, %lx), next=[%lx, %lx)\n",
+			       vmg.prev ? vmg.prev->vm_start : 0, vmg.prev ? vmg.prev->vm_end : 0,
+			       vmg.next ? vmg.next->vm_start : 0, vmg.next ? vmg.next->vm_end : 0);
+
 		vma = NULL;
 	} else {
 		vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev);
@@ -1413,9 +1440,32 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		vmg.flags = vm_flags;
 	}

+	if (check_interesting(addr, addr + len)) {
+		char *special = vm_flags & VM_SPECIAL ? "special" : "";
+		char *has_file = file ? "file-backed" : "";
+
+		pr_err("LJS: Interesting [%lx, %lx) flags=%lu, special=[%s] file=[%s] addr=%lx end=%lx\n",
+		       addr, addr+len, vm_flags, special, has_file,
+		       vma_iter_addr(&vmi), vma_iter_end(&vmi));
+	}
+
 	vma = vma_merge_new_range(&vmg);
-	if (vma)
+	if (vma) {
+		if (check_interesting(addr, addr + len)) {
+			pr_err("LJS: Merged to [%lx, %lx), addr=%lx, end=%lx\n",
+			       vma->vm_start, vma->vm_end, vma_iter_addr(&vmi),
+			       vma_iter_end(&vmi));
+
+			mt_validate(&mm->mm_mt);
+
+			pr_err("LJS: Post-validate.\n");
+		}
+
 		goto expanded;
+	} else if (check_interesting(addr, addr + len)) {
+		pr_err("LJS: Failed to merge [%lx, %lx), reset...\n",
+		       addr, addr + len);
+	}
 	/*
 	 * Determine the object being mapped and call the appropriate
 	 * specific mapper. the address has already been validated, but
@@ -1441,6 +1491,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		if (error)
 			goto unmap_and_free_vma;

+		if (check_interesting(addr, addr + len)) {
+			pr_err("LJS: call_mmap() on [%lx, %lx) old_flags=%lu new_flags=%lu new range=[%lx, %lx) [CHANGED=%s]\n",
+			       addr, end, vm_flags, vma->vm_flags, vma->vm_start, vma->vm_end,
+			       vm_flags != vma->vm_flags ? "YES" : "NO");
+		}
+
 		if (vma_is_shared_maywrite(vma)) {
 			error = mapping_map_writable(file->f_mapping);
 			if (error)
@@ -1467,6 +1523,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 			/* If this fails, state is reset ready for a reattempt. */
 			merge = vma_merge_new_range(&vmg);

+			if (check_interesting(addr, addr + len))
+				pr_err("LJS: flags changed for [%lx, %lx) from %lu to %lu %s\n",
+				       vma->vm_start, vma->vm_end, vm_flags, vma->vm_flags,
+				       merge ? "merged" : "");
+
 			if (merge) {
 				/*
 				 * ->mmap() can change vma->vm_file and fput
@@ -1510,10 +1571,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr,

 	/* Lock the VMA since it is modified after insertion into VMA tree */
 	vma_start_write(vma);
-	vma_iter_store(&vmi, vma);
+
+	if (check_interesting(addr, addr + len))
+		pr_err("LJS: mm=%p: iter store addr=%lx, end=%lx, vma=[%lx, %lx)\n",
+		       mm, vma_iter_addr(&vmi), vma_iter_end(&vmi), vma->vm_start, vma->vm_end);
+
+	error = vma_iter_store_gfp(&vmi, vma, GFP_KERNEL);
+	if (error)
+		goto close_and_free_vma;
+
 	mm->map_count++;
 	vma_link_file(vma);

+	if (check_interesting(addr, addr + len)) {
+		pr_err("LJS: mm=%p: About to validate [%lx, %lx) addr=%lx, end=%lx\n",
+		       mm, vma->vm_start,vma->vm_end, vma_iter_addr(&vmi), vma_iter_end(&vmi));
+
+		mt_validate(&mm->mm_mt);
+		pr_err("LJS: Post-validate.\n");
+	}
+
 	/*
 	 * vma_merge_new_range() calls khugepaged_enter_vma() too, the below
 	 * call covers the non-merge case.
@@ -1530,6 +1607,14 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	perf_event_mmap(vma);

 	/* Unmap any existing mapping in the area */
+
+	if (check_interesting(addr, addr + len)) {
+		pr_err("LJS: mm=%p: About to validate [%lx, %lx) addr=%lx, end=%lx\n",
+		       mm, vma->vm_start,vma->vm_end, vma_iter_addr(&vmi), vma_iter_end(&vmi));
+		mt_validate(&mm->mm_mt);
+		pr_err("LJS: Post-validate.\n");
+	}
+
 	vms_complete_munmap_vmas(&vms, &mas_detach);

 	vm_stat_account(mm, vm_flags, pglen);
@@ -1594,6 +1679,8 @@ static int __vm_munmap(unsigned long start, size_t len, bool unlock)
 	LIST_HEAD(uf);
 	VMA_ITERATOR(vmi, mm, start);

+	ljs_dump(mm, start, start + len, 0, true);
+
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;

diff --git a/mm/vma.c b/mm/vma.c
index 4737afcb064c..06e7a0a18aed 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -223,7 +223,7 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
 		 * us to insert it before dropping the locks
 		 * (it may either follow vma or precede it).
 		 */
-		vma_iter_store(vmi, vp->insert);
+		WARN_ON(vma_iter_store_gfp(vmi, vp->insert, GFP_KERNEL));
 		mm->map_count++;
 	}

@@ -598,22 +598,34 @@ static int commit_merge(struct vma_merge_struct *vmg,
 				adjust->vm_end);
 	}

-	if (vma_iter_prealloc(vmg->vmi, vmg->vma))
-		return -ENOMEM;
+//	if (vma_iter_prealloc(vmg->vmi, vmg->vma))
+//		return -ENOMEM;

 	vma_prepare(&vp);
 	vma_adjust_trans_huge(vmg->vma, vmg->start, vmg->end, adj_start);
 	vma_set_range(vmg->vma, vmg->start, vmg->end, vmg->pgoff);

-	if (expanded)
-		vma_iter_store(vmg->vmi, vmg->vma);
+	if (expanded) {
+		if (check_interesting(vmg->start, vmg->end)) {
+			pr_err("LJS: mm=%p expanding to [%lx, %lx) addr=%lx end=%lx\n",
+			       vmg->mm, vmg->start, vmg->end,
+			       vma_iter_addr(vmg->vmi), vma_iter_end(vmg->vmi));
+		}
+
+		//vma_iter_store(vmg->vmi, vmg->vma);
+		if (vma_iter_store_gfp(vmg->vmi, vmg->vma, GFP_KERNEL))
+			return -ENOMEM;
+	}

 	if (adj_start) {
 		adjust->vm_start += adj_start;
 		adjust->vm_pgoff += PHYS_PFN(adj_start);
 		if (adj_start < 0) {
 			WARN_ON(expanded);
-			vma_iter_store(vmg->vmi, adjust);
+			//vma_iter_store(vmg->vmi, adjust);
+
+			if (vma_iter_store_gfp(vmg->vmi, adjust, GFP_KERNEL))
+				return -ENOMEM;
 		}
 	}

@@ -956,6 +968,12 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
 		vma_prev(vmg->vmi); /* Equivalent to going to the previous range */
 	}

+	if (check_interesting(start, end)) {
+		pr_err("LJS: mm=%p About to merge [%lx, %lx) to range [%lx, %lx), addr=%lx end=%lx\n",
+		       vmg->mm, start, end, vmg->start, vmg->end,
+		       vma_iter_addr(vmg->vmi), vma_iter_end(vmg->vmi));
+	}
+
 	/*
 	 * Now try to expand adjacent VMA(s). This takes care of removing the
 	 * following VMA if we have VMAs on both sides.
@@ -1108,8 +1126,13 @@ void vms_clean_up_area(struct vma_munmap_struct *vms,
 	vms_clear_ptes(vms, mas_detach, true);
 	mas_set(mas_detach, 0);
 	mas_for_each(mas_detach, vma, ULONG_MAX)
-		if (vma->vm_ops && vma->vm_ops->close)
+		if (vma->vm_ops && vma->vm_ops->close) {
+			if (check_interesting(vma->vm_start, vma->vm_end))
+				pr_err("LJS: mm=%p Closing [%lx, %lx)\n",
+				       vma->vm_mm, vma->vm_start, vma->vm_end);
+
 			vma->vm_ops->close(vma);
+		}
 	vms->closed_vm_ops = true;
 }

@@ -1179,6 +1202,10 @@ int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
 	struct vm_area_struct *next = NULL;
 	int error;

+	if (check_interesting(vms->vma->vm_start, vms->vma->vm_end))
+		pr_err("LJS2 vms->start=%lx, vms->vma->vm_start=%lx\n",
+		       vms->start, vms->vma->vm_start);
+
 	/*
 	 * If we need to split any vma, do it now to save pain later.
 	 * Does it split the first one?
@@ -1202,6 +1229,11 @@ int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
 			goto start_split_failed;
 		}

+		if (check_interesting(vms->vma->vm_start, vms->vma->vm_end))
+			pr_err("LJS: mm=%p vms=[%lx, %lx) split START of [%lx, %lx)\n",
+			       vms->vma->vm_mm, vms->start, vms->end,
+			       vms->vma->vm_start, vms->vma->vm_end);
+
 		error = __split_vma(vms->vmi, vms->vma, vms->start, 1);
 		if (error)
 			goto start_split_failed;
@@ -1217,12 +1249,24 @@ int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
 	for_each_vma_range(*(vms->vmi), next, vms->end) {
 		long nrpages;

+		if (check_interesting(vms->vma->vm_start, vms->vma->vm_end))
+			pr_err("LJS: mm=%p vms=[%lx, %lx) UNMAP [%lx, %lx) file=[%s]\n",
+			       vms->vma->vm_mm, vms->start, vms->end,
+			       next->vm_start, next->vm_end,
+			       vms->vma->vm_file ? "file-backed" : "");
+
 		if (!can_modify_vma(next)) {
 			error = -EPERM;
 			goto modify_vma_failed;
 		}
 		/* Does it split the end? */
 		if (next->vm_end > vms->end) {
+
+			if (check_interesting(next->vm_start, next->vm_end))
+				pr_err("LJS: mm=%p vms=[%lx, %lx) split END of [%lx, %lx)\n",
+				       next->vm_mm, vms->start, vms->end,
+				       next->vm_start, next->vm_end);
+
 			error = __split_vma(vms->vmi, next, vms->end, 0);
 			if (error)
 				goto end_split_failed;
@@ -1295,9 +1339,15 @@ int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
 	}
 #endif

-	while (vma_iter_addr(vms->vmi) > vms->start)
+	while (vma_iter_addr(vms->vmi) > vms->start) {
 		vma_iter_prev_range(vms->vmi);

+		if (check_interesting(vms->vma->vm_start, vms->vma->vm_end))
+			pr_err("LJS3: addr=%lx, end=%lx, vms->start=%lx\n",
+			       vma_iter_addr(vms->vmi),
+			       vma_iter_end(vms->vmi), vms->start);
+	}
+
 	vms->clear_ptes = true;
 	return 0;

--
2.46.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ