lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240725074155.20565-1-cuiyunhui@bytedance.com>
Date: Thu, 25 Jul 2024 15:41:55 +0800
From: Yunhui Cui <cuiyunhui@...edance.com>
To: punit.agrawal@...edance.com,
	paul.walmsley@...ive.com,
	palmer@...belt.com,
	aou@...s.berkeley.edu,
	alexghiti@...osinc.com,
	cuiyunhui@...edance.com,
	samitolvanen@...gle.com,
	gregkh@...uxfoundation.org,
	suagrfillet@...il.com,
	akpm@...ux-foundation.org,
	shikemeng@...weicloud.com,
	willy@...radead.org,
	charlie@...osinc.com,
	xiao.w.wang@...el.com,
	conor.dooley@...rochip.com,
	chenjiahao16@...wei.com,
	guoren@...nel.org,
	jszhang@...nel.org,
	ajones@...tanamicro.com,
	bhe@...hat.com,
	vishal.moola@...il.com,
	ndesaulniers@...gle.com,
	linux-riscv@...ts.infradead.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH v2] riscv: mm: add paging_check() before paging_init()

When establishing a linear mapping, the virtual address is obtained
through __va(). If the physical address is too large, such as 1TB, then
the virtual address will overflow in the address space of sv39.
The log is as follows:
[    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdf7ad8
[    0.000000] [000000d97fdf7ad8] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
[    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdfaff0
[    0.000000] [000000d97fdfaff0] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
...
[    0.000000] Insufficient stack space to handle exception!
[    0.000000] Task stack:     [0xffffffff81400000..0xffffffff81404000]
[    0.000000] Overflow stack: [0xffffffff80c67370..0xffffffff80c68370]
[    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
[    0.000000] epc : die_kernel_fault+0x158/0x1c8
[    0.000000]  ra : die_kernel_fault+0x12a/0x1c8
[    0.000000] epc : ffffffff808cde36 ra : ffffffff808cde08 sp : ffffffff813fff80
[    0.000000]  gp : ffffffff815a1678 tp : 0000000000000000 t0 : 0000003130386537
[    0.000000]  t1 : 0000000000000031 t2 : 6537666637303430 s0 : ffffffff813fffc0
[    0.000000]  s1 : ffffffff815b0b28 a0 : 0000000000000016 a1 : ffffffff81495298
[    0.000000]  a2 : 0000000000000010 a3 : ffffffff81495298 a4 : 00000000000001fe
[    0.000000]  a5 : 000000d97fdfa000 a6 : ffffffff814250d0 a7 : 0000000000000030
[    0.000000]  s2 : 000000d97fdfaff0 s3 : ffffffff81400040 s4 : 000000d97fdfaff0
[    0.000000]  s5 : ffffffff815a0ed0 s6 : 0000000000000000 s7 : 000000008f604390
[    0.000000]  s8 : 0000000000000000 s9 : ffffffffffffffff s10: 0000000000000000
[    0.000000]  s11: 0000000000000000 t3 : ffffffff815baa9b t4 : ffffffff815baa9b
[    0.000000]  t5 : ffffffff815baa88 t6 : ffffffff813ffda8
[    0.000000] status: 0000000200000100 badaddr: 000000d97fdfaff0 cause: 000000000000000d
[    0.000000] Kernel panic - not syncing: Kernel stack overflow
[    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
[    0.000000] Call Trace:
[    0.000000] [<ffffffff800066bc>] dump_backtrace+0x28/0x30
[    0.000000] [<ffffffff808cdac8>] show_stack+0x38/0x44
[    0.000000] [<ffffffff808d9d40>] dump_stack_lvl+0x44/0x5c
[    0.000000] [<ffffffff808d9d70>] dump_stack+0x18/0x20
[    0.000000] [<ffffffff808cdfb6>] panic+0x110/0x2f2
[    0.000000] [<ffffffff80006532>] walk_stackframe+0x0/0x120
[    0.000000] [<ffffffff808cde08>] die_kernel_fault+0x12a/0x1c8
[    0.000000] ---[ end Kernel panic - not syncing: Kernel stack overflow ]---

In other words, the maximum value of the physical address needs to meet
Documentation/riscv/vm-layout.rst to ensure that there is no overflow.
For sv48/57, the actual virtual address space is huge, so this problem
is generally not triggered, but it is also checked in the code.

We give a warning for the overflowed physical address region and reverve it
so that the kernel can bringup successfully.

Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
---
 arch/riscv/include/asm/page.h    |  9 +++++++++
 arch/riscv/include/asm/pgtable.h |  1 +
 arch/riscv/kernel/setup.c        |  1 +
 arch/riscv/mm/init.c             | 30 ++++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+)

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 57e887bfa34c..f6c0f6e14ecb 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -38,6 +38,15 @@
  */
 #define PAGE_OFFSET_L4		_AC(0xffffaf8000000000, UL)
 #define PAGE_OFFSET_L3		_AC(0xffffffd800000000, UL)
+
+/*
+ * See vm-layout.rst, the size of L3 direct mapping of all physical
+ * memory 124GB, L4 is 64TB, L5 is 32PB.
+ */
+#define MAX_PFN_MEM_ADDR_L5	(0x80000000000000ULL)
+#define MAX_PFN_MEM_ADDR_L4	(0x400000000000ULL)
+#define MAX_PFN_MEM_ADDR_L3	(0x1F00000000ULL)
+
 #else
 #define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
 #endif /* CONFIG_64BIT */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index c8e8867c42f6..e4835de5a743 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -915,6 +915,7 @@ extern uintptr_t _dtb_early_pa;
 #endif /* CONFIG_XIP_KERNEL */
 extern u64 satp_mode;
 
+void paging_check(void);
 void paging_init(void);
 void misc_mem_init(void);
 
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 2a79d4ed2660..366918578544 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -273,6 +273,7 @@ void __init setup_arch(char **cmdline_p)
 	parse_early_param();
 
 	efi_init();
+	paging_check();
 	paging_init();
 
 	/* Parse the ACPI tables for possible boot-time configuration */
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b0cc28f7595f..aa25dcf8a0ff 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1482,6 +1482,36 @@ static void __init reserve_crashkernel(void)
 	crashk_res.end = crash_base + crash_size - 1;
 }
 
+static void __init phymem_addr_overflow(void)
+{
+	phys_addr_t end = memblock_end_of_DRAM();
+
+	if (pgtable_l5_enabled) {
+		if (end > MAX_PFN_MEM_ADDR_L5) {
+			memblock_reserve(MAX_PFN_MEM_ADDR_L5, end - MAX_PFN_MEM_ADDR_L5);
+			WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
+			     end, MAX_PFN_MEM_ADDR_L5, end);
+		}
+	}
+	if (pgtable_l4_enabled) {
+		if (end > MAX_PFN_MEM_ADDR_L4) {
+			memblock_reserve(MAX_PFN_MEM_ADDR_L4, end - MAX_PFN_MEM_ADDR_L4);
+			WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
+			     end, MAX_PFN_MEM_ADDR_L4, end);
+		}
+	}
+	if (end > MAX_PFN_MEM_ADDR_L3) {
+		memblock_reserve(MAX_PFN_MEM_ADDR_L3, end - MAX_PFN_MEM_ADDR_L3);
+		WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
+		     end, MAX_PFN_MEM_ADDR_L3, end);
+	}
+}
+
+void __init paging_check(void)
+{
+	phymem_addr_overflow();
+}
+
 void __init paging_init(void)
 {
 	setup_bootmem();
-- 
2.39.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ