lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <af510726-4245-46c1-81bb-e32379c2c127@lucifer.local>
Date: Tue, 15 Oct 2024 20:56:28 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Oliver Sang <oliver.sang@...el.com>
Cc: oe-lkp@...ts.linux.dev, lkp@...el.com, linux-kernel@...r.kernel.org,
        Andrew Morton <akpm@...ux-foundation.org>,
        Mark Brown <broonie@...nel.org>,
        "Liam R. Howlett" <Liam.Howlett@...cle.com>,
        Vlastimil Babka <vbabka@...e.cz>, Bert Karwatzki <spasswolf@....de>,
        Jeff Xu <jeffxu@...omium.org>, Jiri Olsa <olsajiri@...il.com>,
        Kees Cook <kees@...nel.org>, Lorenzo Stoakes <lstoakes@...il.com>,
        Matthew Wilcox <willy@...radead.org>,
        "Paul E. McKenney" <paulmck@...nel.org>,
        Paul Moore <paul@...l-moore.com>,
        Sidhartha Kumar <sidhartha.kumar@...cle.com>,
        Suren Baghdasaryan <surenb@...gle.com>, linux-mm@...ck.org,
        ying.huang@...el.com, feng.tang@...el.com, fengwei.yin@...el.com
Subject: Re: [linus:master] [mm]  cacded5e42:  aim9.brk_test.ops_per_sec
 -5.0% regression

On Fri, Oct 11, 2024 at 08:26:37AM +0100, Lorenzo Stoakes wrote:
[snip]

> Thanks for testing this suffices to rule this one out... I will try to get a
> functional and reliable performance environment locally so I can properly
> address this and then we can try something else.
>
> Thanks!
> Lorenzo
>

OK Oliver, could you try the below patch? I have got aim9.brk up and
running locally and for me this seems to address the issue.

This is against Andrew's tree [0] in the mm-unstable branch. It should
hopefully apply cleanly to -next also.

Very much appreciated, thanks!

[0]:https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git/

----8<----
>From cee7f4196247de0da2b7632838fd36aee8b77e13 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Date: Tue, 15 Oct 2024 20:16:32 +0100
Subject: [PATCH] mm: add expand-only VMA merge mode and optimise
 do_brk_flags()

We know in advance that do_brk_flags() wants only to perform a VMA
expansion (if the prior VMA is compatible), and that we assume no mergeable
VMA follows it.

These are the semantics of this function prior to the recent rewrite of the
VMA merging logic, however we are now doing more work than necessary -
positioning the VMA iterator at the prior VMA and performing tasks that are
not required.

Add a new field to the vmg struct to permit merge flags and add a new merge
flag VMG_FLAG_JUST_EXPAND which implies this behaviour, and have
do_brk_flags() use this.

This fixes a reported performance regression in a brk() benchmarking suite.
---
 mm/mmap.c |  3 ++-
 mm/vma.c  | 23 +++++++++++++++--------
 mm/vma.h  | 16 ++++++++++++++++
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 02f7b45c3076..b99ba4cac9fe 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1741,7 +1741,8 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 		VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));

 		vmg.prev = vma;
-		vma_iter_next_range(vmi);
+		/* vmi is positioned at prev, which this mode expects. */
+		vmg.merge_flags = VMG_FLAG_JUST_EXPAND;

 		if (vma_merge_new_range(&vmg))
 			goto out;
diff --git a/mm/vma.c b/mm/vma.c
index 749c4881fd60..69ce9e07ab11 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -562,6 +562,7 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
 	pgoff_t pgoff = vmg->pgoff;
 	pgoff_t pglen = PHYS_PFN(end - start);
 	bool can_merge_left, can_merge_right;
+	bool just_expand = vmg->merge_flags & VMG_FLAG_JUST_EXPAND;

 	mmap_assert_write_locked(vmg->mm);
 	VM_WARN_ON(vmg->vma);
@@ -575,7 +576,7 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
 		return NULL;

 	can_merge_left = can_vma_merge_left(vmg);
-	can_merge_right = can_vma_merge_right(vmg, can_merge_left);
+	can_merge_right = !just_expand && can_vma_merge_right(vmg, can_merge_left);

 	/* If we can merge with the next VMA, adjust vmg accordingly. */
 	if (can_merge_right) {
@@ -590,7 +591,11 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
 		vmg->vma = prev;
 		vmg->pgoff = prev->vm_pgoff;

-		vma_prev(vmg->vmi); /* Equivalent to going to the previous range */
+		/* In expand-only case we are already positioned here. */
+		if (!just_expand) {
+			/* Equivalent to going to the previous range. */
+			vma_prev(vmg->vmi);
+		}
 	}

 	/*
@@ -604,12 +609,14 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
 	}

 	/* If expansion failed, reset state. Allows us to retry merge later. */
-	vmg->vma = NULL;
-	vmg->start = start;
-	vmg->end = end;
-	vmg->pgoff = pgoff;
-	if (vmg->vma == prev)
-		vma_iter_set(vmg->vmi, start);
+	if (!just_expand) {
+		vmg->vma = NULL;
+		vmg->start = start;
+		vmg->end = end;
+		vmg->pgoff = pgoff;
+		if (vmg->vma == prev)
+			vma_iter_set(vmg->vmi, start);
+	}

 	return NULL;
 }
diff --git a/mm/vma.h b/mm/vma.h
index 82354fe5edd0..8f8548958e41 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -59,6 +59,19 @@ enum vma_merge_state {
 	VMA_MERGE_SUCCESS,
 };

+typedef unsigned long vma_merge_flags_t;
+
+ /*
+  * If we can expand, simply do so. We know there is nothing to merge to the
+  * right.
+  *
+  * Does not reset state upon failure to merge.
+  *
+  * IMPORTANT: The VMA iterator is assumed to be positioned at the previous VMA,
+  *            rather than at the gap.
+  */
+#define VMG_FLAG_JUST_EXPAND (1 << 0)
+
 /* Represents a VMA merge operation. */
 struct vma_merge_struct {
 	struct mm_struct *mm;
@@ -75,6 +88,7 @@ struct vma_merge_struct {
 	struct mempolicy *policy;
 	struct vm_userfaultfd_ctx uffd_ctx;
 	struct anon_vma_name *anon_name;
+	vma_merge_flags_t merge_flags;
 	enum vma_merge_state state;
 };

@@ -99,6 +113,7 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
 		.flags = flags_,					\
 		.pgoff = pgoff_,					\
 		.state = VMA_MERGE_START,				\
+		.merge_flags = 0,					\
 	}

 #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_)	\
@@ -118,6 +133,7 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
 		.uffd_ctx = vma_->vm_userfaultfd_ctx,		\
 		.anon_name = anon_vma_name(vma_),		\
 		.state = VMA_MERGE_START,			\
+		.merge_flags = 0,				\
 	}

 #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
--
2.46.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ