lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260122150116.3409572-1-tomas.mudrunka@gmail.com>
Date: Thu, 22 Jan 2026 16:01:14 +0100
From: Tomas Mudrunka <tomas.mudrunka@...il.com>
To: Mike Rapoport <rppt@...nel.org>,
	Andrew Morton <akpm@...ux-foundation.org>
Cc: Tomas Mudrunka <tomas.mudrunka@...il.com>,
	linux-mm@...ck.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH] MemTest: Test RAM address bus for stuck bits

Adds new test to early_memtest() that runs every time memtest is enabled.
For each bit in phys_addr_t this tries to find pair of addresses in free
memory that differ only in the tested bit. These pairs are then tested
for aliasing/mirroring. This should detect loose connection or short on
RAM address bus. Such problem would be traditionally ignored by normal
early memtest. Result is then printed and exported via EarlyMemtestBad
in /proc/meminfo. If there is no free memory pair found for given bit,
it's printed and ignored.

Please note that this mostly targets systems with fully parallel address
bus like DDR3 and DDR4. Systems with DDR5 and similar are unlikely to boot
at all, since these send multiple address bits in serial over address pins.

Coming from heavy industry, such hardware failure can be caused by
vibrations and go undetected for quite a while, especially on higher bits.
Therefore this test is very useful for early warning self-tests and
quality control use cases.

Signed-off-by: Tomas Mudrunka <tomas.mudrunka@...il.com>
---
 mm/memtest.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/mm/memtest.c b/mm/memtest.c
index c2c609c39..9b63f7917 100644
--- a/mm/memtest.c
+++ b/mm/memtest.c
@@ -4,10 +4,15 @@
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
+#include <linux/bitops.h>
 
 static bool early_memtest_done;
 static phys_addr_t early_memtest_bad_size;
 
+static u64 addr_bus_failed_bits;
+static u64 addr_bus_skipped_bits;
+static bool addr_bus_test_done;
+
 static u64 patterns[] __initdata = {
 	/* The first entry has to be 0 to leave memtest with zeroed memory */
 	0,
@@ -104,6 +109,114 @@ static int __init parse_memtest(char *arg)
 
 early_param("memtest", parse_memtest);
 
+static bool __init is_address_free(phys_addr_t addr)
+{
+	return memblock_is_memory(addr) && !memblock_is_reserved(addr);
+}
+
+static bool __init find_test_pair(unsigned int bit, phys_addr_t *addr1, phys_addr_t *addr2)
+{
+	u64 i;
+	phys_addr_t this_start, this_end;
+	phys_addr_t candidate;
+	const phys_addr_t step = PAGE_SIZE;
+
+	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &this_start,
+				&this_end, NULL) {
+		candidate = this_start;
+		while (candidate < this_end) {
+			phys_addr_t test_addr2;
+
+			/* Calculate address differing only in the specified bit */
+			test_addr2 = candidate ^ BIT_ULL(bit);
+
+			/* Check if both addresses are free */
+			if (is_address_free(candidate) && is_address_free(test_addr2)) {
+				*addr1 = candidate;
+				*addr2 = test_addr2;
+				return true;
+			}
+
+			/* Step to next candidate */
+			candidate += step;
+		}
+	}
+
+	return false;
+}
+
+static void __init test_address_bit(unsigned int bit)
+{
+	phys_addr_t addr1, addr2;
+	u8 *vaddr1, *vaddr2;
+	u8 val1, val2;
+
+	if (!find_test_pair(bit, &addr1, &addr2)) {
+		addr_bus_skipped_bits |= BIT_ULL(bit);
+		return;
+	}
+
+	vaddr1 = (u8 *)__va(addr1);
+	vaddr2 = (u8 *)__va(addr2);
+
+	/* Write different patterns to both addresses (1 byte to avoid overlap) */
+	WRITE_ONCE(*vaddr1, 0xAA);
+	WRITE_ONCE(*vaddr2, 0x55);
+
+	/* Read back and verify we got what we wrote */
+	val1 = READ_ONCE(*vaddr1);
+	val2 = READ_ONCE(*vaddr2);
+
+	/* Check for mirroring: if either address doesn't read its expected value, bit is stuck */
+	if (val1 != 0xAA || val2 != 0x55)
+		addr_bus_failed_bits |= BIT_ULL(bit);
+
+	/* Restore memory to zero */
+	WRITE_ONCE(*vaddr1, 0);
+	WRITE_ONCE(*vaddr2, 0);
+}
+
+static void __init test_address_bus(phys_addr_t start, phys_addr_t end)
+{
+	unsigned int addr_bits = sizeof(phys_addr_t) * 8;
+	unsigned int bit;
+	unsigned long failed_count, skipped_count, ok_count;
+	char result_str[128];
+	int pos = 0;
+
+	addr_bus_failed_bits = 0;
+	addr_bus_skipped_bits = 0;
+
+	for (bit = 0; bit < addr_bits; bit++) {
+		test_address_bit(bit);
+
+		/* Build compact result string */
+		if (bit > 0 && (bit % 8) == 0)
+			result_str[pos++] = ' ';
+		if (addr_bus_failed_bits & BIT_ULL(bit))
+			result_str[pos++] = 'F';
+		else if (addr_bus_skipped_bits & BIT_ULL(bit))
+			result_str[pos++] = '_';
+		else
+			result_str[pos++] = 'O';
+	}
+	result_str[pos] = '\0';
+
+	failed_count = hweight64(addr_bus_failed_bits);
+	skipped_count = hweight64(addr_bus_skipped_bits);
+	ok_count = addr_bits - failed_count - skipped_count;
+
+	pr_info("Address bus test: %s\n", result_str);
+	pr_info("Address bus: %lu OK, %lu FAIL, %lu UNKNOWN\n",
+		ok_count, failed_count, skipped_count);
+	if (addr_bus_failed_bits)
+		pr_info("Address bus failed bits: 0x%016llx\n", addr_bus_failed_bits);
+	if (addr_bus_skipped_bits)
+		pr_info("Address bus skipped bits: 0x%016llx\n", addr_bus_skipped_bits);
+
+	addr_bus_test_done = true;
+}
+
 void __init early_memtest(phys_addr_t start, phys_addr_t end)
 {
 	unsigned int i;
@@ -117,6 +230,8 @@ void __init early_memtest(phys_addr_t start, phys_addr_t end)
 		idx = i % ARRAY_SIZE(patterns);
 		do_one_pass(patterns[idx], start, end);
 	}
+
+	test_address_bus(start, end);
 }
 
 void memtest_report_meminfo(struct seq_file *m)
@@ -132,6 +247,9 @@ void memtest_report_meminfo(struct seq_file *m)
 	early_memtest_bad_size_kb = early_memtest_bad_size >> 10;
 	if (early_memtest_bad_size && !early_memtest_bad_size_kb)
 		early_memtest_bad_size_kb = 1;
+	/* If address bus test found failures, ensure we don't report 0 */
+	if (addr_bus_test_done && addr_bus_failed_bits && !early_memtest_bad_size_kb)
+		early_memtest_bad_size_kb = 1;
 	/* When 0 is reported, it means there actually was a successful test */
 	seq_printf(m, "EarlyMemtestBad:   %5lu kB\n", early_memtest_bad_size_kb);
 }
-- 
2.52.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ