lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241227072825.1288491-5-rppt@kernel.org>
Date: Fri, 27 Dec 2024 09:28:21 +0200
From: Mike Rapoport <rppt@...nel.org>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: Andy Lutomirski <luto@...nel.org>,
	Anton Ivanov <anton.ivanov@...bridgegreys.com>,
	Borislav Petkov <bp@...en8.de>,
	Brendan Higgins <brendan.higgins@...ux.dev>,
	Daniel Gomez <da.gomez@...sung.com>,
	Daniel Thompson <danielt@...nel.org>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	David Gow <davidgow@...gle.com>,
	Douglas Anderson <dianders@...omium.org>,
	Ingo Molnar <mingo@...hat.com>,
	Jason Wessel <jason.wessel@...driver.com>,
	Jiri Kosina <jikos@...nel.org>,
	Joe Lawrence <joe.lawrence@...hat.com>,
	Johannes Berg <johannes@...solutions.net>,
	Josh Poimboeuf <jpoimboe@...nel.org>,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
	Luis Chamberlain <mcgrof@...nel.org>,
	Mark Rutland <mark.rutland@....com>,
	Masami Hiramatsu <mhiramat@...nel.org>,
	Mike Rapoport <rppt@...nel.org>,
	Miroslav Benes <mbenes@...e.cz>,
	"H. Peter Anvin" <hpa@...or.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Petr Mladek <pmladek@...e.com>,
	Petr Pavlu <petr.pavlu@...e.com>,
	Rae Moar <rmoar@...gle.com>,
	Richard Weinberger <richard@....at>,
	Sami Tolvanen <samitolvanen@...gle.com>,
	Shuah Khan <shuah@...nel.org>,
	Song Liu <song@...nel.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	kgdb-bugreport@...ts.sourceforge.net,
	kunit-dev@...glegroups.com,
	linux-kernel@...r.kernel.org,
	linux-kselftest@...r.kernel.org,
	linux-mm@...ck.org,
	linux-modules@...r.kernel.org,
	linux-trace-kernel@...r.kernel.org,
	linux-um@...ts.infradead.org,
	live-patching@...r.kernel.org,
	x86@...nel.org
Subject: [PATCH 4/8] execmem: add API for temporal remapping as RW and restoring ROX afterwards

From: "Mike Rapoport (Microsoft)" <rppt@...nel.org>

Using a writable copy for ROX memory is cumbersome and error prone.

Add API that allow temporarily remapping of ranges in the ROX cache as
writable  and then restoring their read-only-execute permissions.

This API will be later used in modules code and will allow removing nasty
games with writable copy in alternatives patching on x86.

The restoring of the ROX permissions relies on the ability of architecture
to reconstruct large pages in its set_memory_rox() method.

Signed-off-by: Mike Rapoport (Microsoft) <rppt@...nel.org>
---
 include/linux/execmem.h |  31 +++++++++++
 mm/execmem.c            | 118 +++++++++++++++++++++++++++++++++-------
 2 files changed, 130 insertions(+), 19 deletions(-)

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 64130ae19690..65655a5d1be2 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -65,6 +65,37 @@ enum execmem_range_flags {
  * Architectures that use EXECMEM_ROX_CACHE must implement this.
  */
 void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable);
+
+/**
+ * execmem_make_temp_rw - temporarily remap region with read-write
+ *			  permissions
+ * @ptr:	address of the region to remap
+ * @size:	size of the region to remap
+ *
+ * Remaps a part of the cached large page in the ROX cache in the range
+ * [@ptr, @ptr + @size) as writable and not executable. The caller must
+ * have exclusive ownership of this range and ensure nothing will try to
+ * execute code in this range.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int execmem_make_temp_rw(void *ptr, size_t size);
+
+/**
+ * execmem_restore_rox - restore read-only-execute permissions
+ * @ptr:	address of the region to remap
+ * @size:	size of the region to remap
+ *
+ * Restores read-only-execute permissions on a range [@ptr, @ptr + @size)
+ * after it was temporarily remapped as writable. Relies on architecture
+ * implementation of set_memory_rox() to restore mapping using large pages.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int execmem_restore_rox(void *ptr, size_t size);
+#else
+static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
+static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
 #endif
 
 /**
diff --git a/mm/execmem.c b/mm/execmem.c
index 317b6a8d35be..be6b234c032e 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -89,6 +89,12 @@ static void *execmem_vmalloc(struct execmem_range *range, size_t size,
 #endif /* CONFIG_MMU */
 
 #ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
+struct execmem_area {
+	struct vm_struct *vm;
+	unsigned int rw_mappings;
+	size_t size;
+};
+
 struct execmem_cache {
 	struct mutex mutex;
 	struct maple_tree busy_areas;
@@ -135,7 +141,7 @@ static void execmem_cache_clean(struct work_struct *work)
 	struct maple_tree *free_areas = &execmem_cache.free_areas;
 	struct mutex *mutex = &execmem_cache.mutex;
 	MA_STATE(mas, free_areas, 0, ULONG_MAX);
-	void *area;
+	struct execmem_area *area;
 
 	mutex_lock(mutex);
 	mas_for_each(&mas, area, ULONG_MAX) {
@@ -143,11 +149,12 @@ static void execmem_cache_clean(struct work_struct *work)
 
 		if (IS_ALIGNED(size, PMD_SIZE) &&
 		    IS_ALIGNED(mas.index, PMD_SIZE)) {
-			struct vm_struct *vm = find_vm_area(area);
+			struct vm_struct *vm = area->vm;
 
 			execmem_set_direct_map_valid(vm, true);
 			mas_store_gfp(&mas, NULL, GFP_KERNEL);
-			vfree(area);
+			vfree(vm->addr);
+			kfree(area);
 		}
 	}
 	mutex_unlock(mutex);
@@ -155,30 +162,31 @@ static void execmem_cache_clean(struct work_struct *work)
 
 static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean);
 
-static int execmem_cache_add(void *ptr, size_t size)
+static int execmem_cache_add(void *ptr, size_t size, struct execmem_area *area)
 {
 	struct maple_tree *free_areas = &execmem_cache.free_areas;
 	struct mutex *mutex = &execmem_cache.mutex;
 	unsigned long addr = (unsigned long)ptr;
 	MA_STATE(mas, free_areas, addr - 1, addr + 1);
+	struct execmem_area *lower_area = NULL;
+	struct execmem_area *upper_area = NULL;
 	unsigned long lower, upper;
-	void *area = NULL;
 	int err;
 
 	lower = addr;
 	upper = addr + size - 1;
 
 	mutex_lock(mutex);
-	area = mas_walk(&mas);
-	if (area && mas.last == addr - 1)
+	lower_area = mas_walk(&mas);
+	if (lower_area && lower_area == area && mas.last == addr - 1)
 		lower = mas.index;
 
-	area = mas_next(&mas, ULONG_MAX);
-	if (area && mas.index == addr + size)
+	upper_area = mas_next(&mas, ULONG_MAX);
+	if (upper_area && upper_area == area && mas.index == addr + size)
 		upper = mas.last;
 
 	mas_set_range(&mas, lower, upper);
-	err = mas_store_gfp(&mas, (void *)lower, GFP_KERNEL);
+	err = mas_store_gfp(&mas, area, GFP_KERNEL);
 	mutex_unlock(mutex);
 	if (err)
 		return err;
@@ -209,7 +217,8 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size)
 	MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX);
 	struct mutex *mutex = &execmem_cache.mutex;
 	unsigned long addr, last, area_size = 0;
-	void *area, *ptr = NULL;
+	struct execmem_area *area;
+	void *ptr = NULL;
 	int err;
 
 	mutex_lock(mutex);
@@ -228,20 +237,18 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size)
 
 	/* insert allocated size to busy_areas at range [addr, addr + size) */
 	mas_set_range(&mas_busy, addr, addr + size - 1);
-	err = mas_store_gfp(&mas_busy, (void *)addr, GFP_KERNEL);
+	err = mas_store_gfp(&mas_busy, area, GFP_KERNEL);
 	if (err)
 		goto out_unlock;
 
 	mas_store_gfp(&mas_free, NULL, GFP_KERNEL);
 	if (area_size > size) {
-		void *ptr = (void *)(addr + size);
-
 		/*
 		 * re-insert remaining free size to free_areas at range
 		 * [addr + size, last]
 		 */
 		mas_set_range(&mas_free, addr + size, last);
-		err = mas_store_gfp(&mas_free, ptr, GFP_KERNEL);
+		err = mas_store_gfp(&mas_free, area, GFP_KERNEL);
 		if (err) {
 			mas_store_gfp(&mas_busy, NULL, GFP_KERNEL);
 			goto out_unlock;
@@ -257,16 +264,21 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size)
 static int execmem_cache_populate(struct execmem_range *range, size_t size)
 {
 	unsigned long vm_flags = VM_ALLOW_HUGE_VMAP;
+	struct execmem_area *area;
 	unsigned long start, end;
 	struct vm_struct *vm;
 	size_t alloc_size;
 	int err = -ENOMEM;
 	void *p;
 
+	area = kzalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return err;
+
 	alloc_size = round_up(size, PMD_SIZE);
 	p = execmem_vmalloc(range, alloc_size, PAGE_KERNEL, vm_flags);
 	if (!p)
-		return err;
+		goto err_free_area;
 
 	vm = find_vm_area(p);
 	if (!vm)
@@ -289,7 +301,9 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
 	if (err)
 		goto err_free_mem;
 
-	err = execmem_cache_add(p, alloc_size);
+	area->size = alloc_size;
+	area->vm = vm;
+	err = execmem_cache_add(p, alloc_size, area);
 	if (err)
 		goto err_free_mem;
 
@@ -297,6 +311,8 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
 
 err_free_mem:
 	vfree(p);
+err_free_area:
+	kfree(area);
 	return err;
 }
 
@@ -305,6 +321,9 @@ static void *execmem_cache_alloc(struct execmem_range *range, size_t size)
 	void *p;
 	int err;
 
+	/* make sure everything in the cache is page aligned */
+	size = PAGE_ALIGN(size);
+
 	p = __execmem_cache_alloc(range, size);
 	if (p)
 		return p;
@@ -322,8 +341,8 @@ static bool execmem_cache_free(void *ptr)
 	struct mutex *mutex = &execmem_cache.mutex;
 	unsigned long addr = (unsigned long)ptr;
 	MA_STATE(mas, busy_areas, addr, addr);
+	struct execmem_area *area;
 	size_t size;
-	void *area;
 
 	mutex_lock(mutex);
 	area = mas_walk(&mas);
@@ -338,12 +357,73 @@ static bool execmem_cache_free(void *ptr)
 
 	execmem_fill_trapping_insns(ptr, size, /* writable = */ false);
 
-	execmem_cache_add(ptr, size);
+	execmem_cache_add(ptr, size, area);
 
 	schedule_work(&execmem_cache_clean_work);
 
 	return true;
 }
+
+int execmem_make_temp_rw(void *ptr, size_t size)
+{
+	struct maple_tree *busy_areas = &execmem_cache.busy_areas;
+	unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	struct mutex *mutex = &execmem_cache.mutex;
+	unsigned long addr = (unsigned long)ptr;
+	MA_STATE(mas, busy_areas, addr, addr);
+	struct execmem_area *area;
+	int ret = -ENOMEM;
+
+	mutex_lock(mutex);
+	area = mas_walk(&mas);
+	if (!area)
+		goto out;
+
+	ret = set_memory_nx(addr, nr);
+	if (ret)
+		goto out;
+
+	/*
+	 * If a split of large page was required, it already happened when
+	 * we marked the pages NX which guarantees that this call won't
+	 * fail
+	 */
+	set_memory_rw(addr, nr);
+	area->rw_mappings++;
+
+out:
+	mutex_unlock(mutex);
+	return ret;
+}
+
+int execmem_restore_rox(void *ptr, size_t size)
+{
+	struct maple_tree *busy_areas = &execmem_cache.busy_areas;
+	struct mutex *mutex = &execmem_cache.mutex;
+	unsigned long addr = (unsigned long)ptr;
+	MA_STATE(mas, busy_areas, addr, addr);
+	struct execmem_area *area;
+	int err = 0;
+
+	size = PAGE_ALIGN(size);
+
+	mutex_lock(mutex);
+	mas_for_each(&mas, area, addr + size - 1) {
+		area->rw_mappings--;
+		if (!area->rw_mappings) {
+			unsigned int nr = area->size >> PAGE_SHIFT;
+
+			addr = (unsigned long)area->vm->addr;
+			err = set_memory_rox(addr, nr);
+			if (err)
+				break;
+		}
+	}
+	mutex_unlock(mutex);
+
+	return err;
+}
+
 #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */
 static void *execmem_cache_alloc(struct execmem_range *range, size_t size)
 {
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ