lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bbbbd8b8b1d0bb2cd2f63dece62ed8d773cdd4d1.1752586090.git.lorenzo.stoakes@oracle.com>
Date: Tue, 15 Jul 2025 14:37:39 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: "Liam R . Howlett" <Liam.Howlett@...cle.com>,
        David Hildenbrand <david@...hat.com>, Vlastimil Babka <vbabka@...e.cz>,
        Jann Horn <jannh@...gle.com>, Pedro Falcato <pfalcato@...e.de>,
        linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        Jeff Xu <jeffxu@...omium.org>
Subject: [PATCH v2 2/5] mm/mseal: update madvise() logic

The madvise() logic is inexplicably performed in mm/mseal.c - this ought to
be located in mm/madvise.c.

Additionally can_modify_vma_madv() is inconsistently named and, in
combination with is_ro_anon(), is very confusing logic.

Put a static function in mm/madvise.c instead - can_madvise_modify() - that
spells out exactly what's happening. Also explicitly check for an anon VMA.

Also add commentary to explain what's going on.

Essentially - we disallow discarding of data in mseal()'d mappings in
instances where the user couldn't otherwise write to that data.

Shared mappings are always backed, so no discard will actually truly
discard the data. Read-only anonymous and MAP_PRIVATE file-backed mappings
are the ones we are interested in.

We make a change to the logic here to correct a mistake - we must disallow
discard of read-only MAP_PRIVATE file-backed mappings, which previously we
were not.

The justification for this change is to account for the case where:

1. A MAP_PRIVATE R/W file-backed mapping is established.
2. The mapping is written to, which backs it with anonymous memory.
3. The mapping is mprotect()'d read-only.
4. The mapping is mseal()'d.

If we were to now allow discard of this data, it would mean mseal() would
not prevent the unrecoverable discarding of data and it was thus violate
the semantics of sealed VMAs.

Finally, update the mseal tests, which were asserting previously that a
read-only MAP_PRIVATE file-backed mapping could be discarded.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@...cle.com>
Reviewed-by: Pedro Falcato <pfalcato@...e.de>
---
 mm/madvise.c                            | 63 ++++++++++++++++++++++++-
 mm/mseal.c                              | 49 -------------------
 mm/vma.h                                |  7 ---
 tools/testing/selftests/mm/mseal_test.c |  3 +-
 4 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 9de9b7c797c6..89205693b67a 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/mm_inline.h>
+#include <linux/mmu_context.h>
 #include <linux/string.h>
 #include <linux/uio.h>
 #include <linux/ksm.h>
@@ -1256,6 +1257,66 @@ static long madvise_guard_remove(struct madvise_behavior *madv_behavior)
 			       &guard_remove_walk_ops, NULL);
 }
 
+#ifdef CONFIG_64BIT
+/* Does the madvise operation result in discarding of mapped data? */
+static bool is_discard(int behavior)
+{
+	switch (behavior) {
+	case MADV_FREE:
+	case MADV_DONTNEED:
+	case MADV_DONTNEED_LOCKED:
+	case MADV_REMOVE:
+	case MADV_DONTFORK:
+	case MADV_WIPEONFORK:
+	case MADV_GUARD_INSTALL:
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * We are restricted from madvise()'ing mseal()'d VMAs only in very particular
+ * circumstances - discarding of data from read-only anonymous SEALED mappings.
+ *
+ * This is because users cannot trivally discard data from these VMAs, and may
+ * only do so via an appropriate madvise() call.
+ */
+static bool can_madvise_modify(struct madvise_behavior *madv_behavior)
+{
+	struct vm_area_struct *vma = madv_behavior->vma;
+
+	/* If the VMA isn't sealed we're good. */
+	if (can_modify_vma(vma))
+		return true;
+
+	/* For a sealed VMA, we only care about discard operations. */
+	if (!is_discard(madv_behavior->behavior))
+		return true;
+
+	/*
+	 * But shared mappings are fine, as dirty pages will be written to a
+	 * backing store regardless of discard.
+	 */
+	if (vma->vm_flags & VM_SHARED)
+		return true;
+
+	/* If the user could write to the mapping anyway, then this is fine. */
+	if ((vma->vm_flags & VM_WRITE) &&
+	    arch_vma_access_permitted(vma, /* write= */ true,
+			/* execute= */ false, /* foreign= */ false))
+		return true;
+
+	/* Otherwise, we are not permitted to perform this operation. */
+	return false;
+}
+#else
+static bool can_madvise_modify(struct madvise_behavior *madv_behavior)
+{
+	return true;
+}
+#endif
+
 /*
  * Apply an madvise behavior to a region of a vma.  madvise_update_vma
  * will handle splitting a vm area into separate areas, each area with its own
@@ -1269,7 +1330,7 @@ static int madvise_vma_behavior(struct madvise_behavior *madv_behavior)
 	struct madvise_behavior_range *range = &madv_behavior->range;
 	int error;
 
-	if (unlikely(!can_modify_vma_madv(madv_behavior->vma, behavior)))
+	if (unlikely(!can_madvise_modify(madv_behavior)))
 		return -EPERM;
 
 	switch (behavior) {
diff --git a/mm/mseal.c b/mm/mseal.c
index c27197ac04e8..1308e88ab184 100644
--- a/mm/mseal.c
+++ b/mm/mseal.c
@@ -11,7 +11,6 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/mm_inline.h>
-#include <linux/mmu_context.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
 #include "internal.h"
@@ -21,54 +20,6 @@ static inline void set_vma_sealed(struct vm_area_struct *vma)
 	vm_flags_set(vma, VM_SEALED);
 }
 
-static bool is_madv_discard(int behavior)
-{
-	switch (behavior) {
-	case MADV_FREE:
-	case MADV_DONTNEED:
-	case MADV_DONTNEED_LOCKED:
-	case MADV_REMOVE:
-	case MADV_DONTFORK:
-	case MADV_WIPEONFORK:
-	case MADV_GUARD_INSTALL:
-		return true;
-	}
-
-	return false;
-}
-
-static bool is_ro_anon(struct vm_area_struct *vma)
-{
-	/* check anonymous mapping. */
-	if (vma->vm_file || vma->vm_flags & VM_SHARED)
-		return false;
-
-	/*
-	 * check for non-writable:
-	 * PROT=RO or PKRU is not writeable.
-	 */
-	if (!(vma->vm_flags & VM_WRITE) ||
-		!arch_vma_access_permitted(vma, true, false, false))
-		return true;
-
-	return false;
-}
-
-/*
- * Check if a vma is allowed to be modified by madvise.
- */
-bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior)
-{
-	if (!is_madv_discard(behavior))
-		return true;
-
-	if (unlikely(!can_modify_vma(vma) && is_ro_anon(vma)))
-		return false;
-
-	/* Allow by default. */
-	return true;
-}
-
 static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 		struct vm_area_struct **prev, unsigned long start,
 		unsigned long end, vm_flags_t newflags)
diff --git a/mm/vma.h b/mm/vma.h
index cf6e3a6371b6..6515045ba342 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -578,8 +578,6 @@ static inline bool can_modify_vma(struct vm_area_struct *vma)
 	return true;
 }
 
-bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior);
-
 #else
 
 static inline bool can_modify_vma(struct vm_area_struct *vma)
@@ -587,11 +585,6 @@ static inline bool can_modify_vma(struct vm_area_struct *vma)
 	return true;
 }
 
-static inline bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior)
-{
-	return true;
-}
-
 #endif
 
 #if defined(CONFIG_STACK_GROWSUP)
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index 005f29c86484..34c042da4de2 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -1712,7 +1712,6 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal)
 	unsigned long size = 4 * page_size;
 	int ret;
 	int fd;
-	unsigned long mapflags = MAP_PRIVATE;
 
 	fd = memfd_create("test", 0);
 	FAIL_TEST_IF_FALSE(fd > 0);
@@ -1720,7 +1719,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal)
 	ret = fallocate(fd, 0, 0, size);
 	FAIL_TEST_IF_FALSE(!ret);
 
-	ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+	ptr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
 	FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
 
 	if (seal) {
-- 
2.50.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ