lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ae927280-fe30-4773-b40d-dbb90e98c74c@oracle.com>
Date: Tue, 2 Dec 2025 20:14:04 -0800
From: jane.chu@...cle.com
To: Jiaqi Yan <jiaqiyan@...gle.com>, nao.horiguchi@...il.com,
        linmiaohe@...wei.com, william.roche@...cle.com, harry.yoo@...cle.com
Cc: tony.luck@...el.com, wangkefeng.wang@...wei.com, willy@...radead.org,
        akpm@...ux-foundation.org, osalvador@...e.de, rientjes@...gle.com,
        duenwen@...gle.com, jthoughton@...gle.com, jgg@...dia.com,
        ankita@...dia.com, peterx@...hat.com, sidhartha.kumar@...cle.com,
        ziy@...dia.com, david@...hat.com, dave.hansen@...ux.intel.com,
        muchun.song@...ux.dev, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH v2 2/3] selftests/mm: test userspace MFR for HugeTLB
 hugepage


On 11/15/2025 5:32 PM, Jiaqi Yan wrote:
> Test the userspace memory failure recovery (MFR) policy for HugeTLB
> 1G or 2M hugepage case:
> 1. Create a memfd backed by HugeTLB and had MFD_MF_KEEP_UE_MAPPED set.
> 2. Allocate and map 4 hugepages to the process.
> 3. Create sub-threads to MADV_HWPOISON inner addresses of one hugepage.
> 4. Check if the process gets correct SIGBUS for each poisoned raw page.
> 5. Check if all memory are still accessible and content valid.
> 6. Check if the poisoned hugepage is dealt with after memfd released.
> 
> Signed-off-by: Jiaqi Yan <jiaqiyan@...gle.com>
> ---
>   tools/testing/selftests/mm/.gitignore    |   1 +
>   tools/testing/selftests/mm/Makefile      |   1 +
>   tools/testing/selftests/mm/hugetlb-mfr.c | 327 +++++++++++++++++++++++
>   3 files changed, 329 insertions(+)
>   create mode 100644 tools/testing/selftests/mm/hugetlb-mfr.c
> 

Test looks fine.
Reviewed-by: Jane Chu <jane.chu@...cle.com>


> diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
> index c2a8586e51a1f..11664d20935db 100644
> --- a/tools/testing/selftests/mm/.gitignore
> +++ b/tools/testing/selftests/mm/.gitignore
> @@ -5,6 +5,7 @@ hugepage-mremap
>   hugepage-shm
>   hugepage-vmemmap
>   hugetlb-madvise
> +hugetlb-mfr
>   hugetlb-read-hwpoison
>   hugetlb-soft-offline
>   khugepaged
> diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
> index eaf9312097f7b..de3bdcf7914cd 100644
> --- a/tools/testing/selftests/mm/Makefile
> +++ b/tools/testing/selftests/mm/Makefile
> @@ -63,6 +63,7 @@ TEST_GEN_FILES += hmm-tests
>   TEST_GEN_FILES += hugetlb-madvise
>   TEST_GEN_FILES += hugetlb-read-hwpoison
>   TEST_GEN_FILES += hugetlb-soft-offline
> +TEST_GEN_FILES += hugetlb-mfr
>   TEST_GEN_FILES += hugepage-mmap
>   TEST_GEN_FILES += hugepage-mremap
>   TEST_GEN_FILES += hugepage-shm
> diff --git a/tools/testing/selftests/mm/hugetlb-mfr.c b/tools/testing/selftests/mm/hugetlb-mfr.c
> new file mode 100644
> index 0000000000000..30939b2194188
> --- /dev/null
> +++ b/tools/testing/selftests/mm/hugetlb-mfr.c
> @@ -0,0 +1,327 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/*
> + * Test the userspace memory failure recovery (MFR) policy for HugeTLB
> + * hugepage case:
> + * 1. Create a memfd backed by HugeTLB and MFD_MF_KEEP_UE_MAPPED bit set.
> + * 2. Allocate and map 4 hugepages.
> + * 3. Create sub-threads to MADV_HWPOISON inner addresses of one hugepage.
> + * 4. Check if each sub-thread get correct SIGBUS for the poisoned raw page.
> + * 5. Check if all memory are still accessible and content still valid.
> + * 6. Check if the poisoned hugepage is dealt with after memfd released.
> + *
> + * Two ways to run the test:
> + *   ./hugetlb-mfr 2M
> + * or
> + *   ./hugetlb-mfr 1G
> + * assuming /sys/kernel/mm/hugepages/hugepages-${xxx}kB/nr_hugepages > 4
> + */
> +
> +#define _GNU_SOURCE
> +#include <assert.h>
> +#include <errno.h>
> +#include <numaif.h>
> +#include <numa.h>
> +#include <pthread.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <unistd.h>
> +
> +#include <linux/magic.h>
> +#include <linux/memfd.h>
> +#include <sys/mman.h>
> +#include <sys/prctl.h>
> +#include <sys/statfs.h>
> +#include <sys/types.h>
> +
> +#include "../kselftest.h"
> +#include "vm_util.h"
> +
> +#define EPREFIX			" !!! "
> +#define BYTE_LENTH_IN_1G	0x40000000UL
> +#define BYTE_LENTH_IN_2M	0x200000UL
> +#define HUGETLB_1GB_STR		"1G"
> +#define HUGETLB_2MB_STR		"2M"
> +#define HUGETLB_FILL		0xab
> +
> +static const unsigned long offsets_1g[] = {0x200000, 0x400000, 0x800000};
> +static const unsigned long offsets_2m[] = {0x020000, 0x040000, 0x080000};
> +
> +static void *sigbus_addr;
> +static int sigbus_addr_lsb;
> +static bool expecting_sigbus;
> +static bool got_sigbus;
> +static bool was_mceerr;
> +
> +static int create_hugetlbfs_file(struct statfs *file_stat,
> +				 unsigned long hugepage_size)
> +{
> +	int fd;
> +	int flags = MFD_HUGETLB | MFD_MF_KEEP_UE_MAPPED;
> +
> +	if (hugepage_size == BYTE_LENTH_IN_2M)
> +		flags |= MFD_HUGE_2MB;
> +	else
> +		flags |= MFD_HUGE_1GB;
> +
> +	fd = memfd_create("hugetlb_tmp", flags);
> +	if (fd < 0)
> +		ksft_exit_fail_perror("Failed to memfd_create");
> +
> +	memset(file_stat, 0, sizeof(*file_stat));
> +	if (fstatfs(fd, file_stat)) {
> +		close(fd);
> +		ksft_exit_fail_perror("Failed to fstatfs");
> +	}
> +	if (file_stat->f_type != HUGETLBFS_MAGIC) {
> +		close(fd);
> +		ksft_exit_fail_msg("Not hugetlbfs file");
> +	}
> +
> +	ksft_print_msg("Created hugetlb_tmp file\n");
> +	ksft_print_msg("hugepagesize=%#lx\n", file_stat->f_bsize);
> +	if (file_stat->f_bsize != hugepage_size)
> +		ksft_exit_fail_msg("Hugepage size is not %#lx", hugepage_size);
> +
> +	return fd;
> +}
> +
> +/*
> + * SIGBUS handler for "do_hwpoison" thread that mapped and MADV_HWPOISON
> + */
> +static void sigbus_handler(int signo, siginfo_t *info, void *context)
> +{
> +	if (!expecting_sigbus)
> +		ksft_exit_fail_msg("unexpected sigbus with addr=%p",
> +				   info->si_addr);
> +
> +	got_sigbus = true;
> +	was_mceerr = (info->si_code == BUS_MCEERR_AO ||
> +		      info->si_code == BUS_MCEERR_AR);
> +	sigbus_addr = info->si_addr;
> +	sigbus_addr_lsb = info->si_addr_lsb;
> +}
> +
> +static void *do_hwpoison(void *hwpoison_addr)
> +{
> +	int hwpoison_size = getpagesize();
> +
> +	ksft_print_msg("MADV_HWPOISON hwpoison_addr=%p, len=%d\n",
> +		       hwpoison_addr, hwpoison_size);
> +	if (madvise(hwpoison_addr, hwpoison_size, MADV_HWPOISON) < 0)
> +		ksft_exit_fail_perror("Failed to MADV_HWPOISON");
> +
> +	pthread_exit(NULL);
> +}
> +
> +static void test_hwpoison_multiple_pages(unsigned char *start_addr,
> +					 unsigned long hugepage_size)
> +{
> +	pthread_t pthread;
> +	int ret;
> +	unsigned char *hwpoison_addr;
> +	const unsigned long *offsets;
> +	size_t offsets_count;
> +	size_t i;
> +
> +	if (hugepage_size == BYTE_LENTH_IN_2M) {
> +		offsets = offsets_2m;
> +		offsets_count = ARRAY_SIZE(offsets_2m);
> +	} else {
> +		offsets = offsets_1g;
> +		offsets_count = ARRAY_SIZE(offsets_1g);
> +	}
> +
> +	for (i = 0; i < offsets_count; ++i) {
> +		sigbus_addr = (void *)0xBADBADBAD;
> +		sigbus_addr_lsb = 0;
> +		was_mceerr = false;
> +		got_sigbus = false;
> +		expecting_sigbus = true;
> +		hwpoison_addr = start_addr + offsets[i];
> +
> +		ret = pthread_create(&pthread, NULL, &do_hwpoison, hwpoison_addr);
> +		if (ret)
> +			ksft_exit_fail_perror("Failed to create hwpoison thread");
> +
> +		ksft_print_msg("Created thread to hwpoison and access hwpoison_addr=%p\n",
> +			       hwpoison_addr);
> +
> +		pthread_join(pthread, NULL);
> +
> +		if (!got_sigbus)
> +			ksft_test_result_fail("Didn't get a SIGBUS\n");
> +		if (!was_mceerr)
> +			ksft_test_result_fail("Didn't get a BUS_MCEERR_A(R|O)\n");
> +		if (sigbus_addr != hwpoison_addr)
> +			ksft_test_result_fail("Incorrect address: got=%p, expected=%p\n",
> +					      sigbus_addr, hwpoison_addr);
> +		if (sigbus_addr_lsb != pshift())
> +			ksft_test_result_fail("Incorrect address LSB: got=%d, expected=%d\n",
> +					      sigbus_addr_lsb, pshift());
> +
> +		ksft_print_msg("Received expected and correct SIGBUS\n");
> +	}
> +}
> +
> +static int read_nr_hugepages(unsigned long hugepage_size,
> +			     unsigned long *nr_hugepages)
> +{
> +	char buffer[256] = {0};
> +	char cmd[256] = {0};
> +
> +	sprintf(cmd, "cat /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages",
> +		hugepage_size);
> +	FILE *cmdfile = popen(cmd, "r");
> +
> +	if (cmdfile == NULL) {
> +		ksft_perror(EPREFIX "failed to popen nr_hugepages");
> +		return -1;
> +	}
> +
> +	if (!fgets(buffer, sizeof(buffer), cmdfile)) {
> +		ksft_perror(EPREFIX "failed to read nr_hugepages");
> +		pclose(cmdfile);
> +		return -1;
> +	}
> +
> +	*nr_hugepages = atoll(buffer);
> +	pclose(cmdfile);
> +	return 0;
> +}
> +
> +/*
> + * Main thread that drives the test.
> + */
> +static void test_main(int fd, unsigned long hugepage_size)
> +{
> +	unsigned char *map, *iter;
> +	struct sigaction new, old;
> +	const unsigned long hugepagesize_kb = hugepage_size / 1024;
> +	unsigned long nr_hugepages_before = 0;
> +	unsigned long nr_hugepages_after = 0;
> +	unsigned long nodemask = 1UL << 0;
> +	unsigned long len = hugepage_size * 4;
> +	int ret;
> +
> +	if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_before) != 0) {
> +		close(fd);
> +		ksft_exit_fail_msg("Failed to read nr_hugepages\n");
> +	}
> +	ksft_print_msg("NR hugepages before MADV_HWPOISON is %ld\n", nr_hugepages_before);
> +
> +	if (ftruncate(fd, len) < 0)
> +		ksft_exit_fail_perror("Failed to ftruncate");
> +
> +	ksft_print_msg("Allocated %#lx bytes to HugeTLB file\n", len);
> +
> +	map = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> +	if (map == MAP_FAILED)
> +		ksft_exit_fail_msg("Failed to mmap");
> +
> +	ksft_print_msg("Created HugeTLB mapping: %p\n", map);
> +
> +	ret = mbind(map, len, MPOL_BIND, &nodemask, sizeof(nodemask) * 8,
> +		    MPOL_MF_STRICT | MPOL_MF_MOVE);
> +	if (ret < 0) {
> +		perror("mbind");
> +		ksft_exit_fail_msg("Failed to bind to node\n");
> +	}
> +
> +	memset(map, HUGETLB_FILL, len);
> +	ksft_print_msg("Memset every byte to 0xab\n");
> +
> +	new.sa_sigaction = &sigbus_handler;
> +	new.sa_flags = SA_SIGINFO;
> +	if (sigaction(SIGBUS, &new, &old) < 0)
> +		ksft_exit_fail_msg("Failed to setup SIGBUS handler");
> +
> +	ksft_print_msg("Setup SIGBUS handler successfully\n");
> +
> +	test_hwpoison_multiple_pages(map, hugepage_size);
> +
> +	/*
> +	 * Since MADV_HWPOISON doesn't corrupt the memory in hardware, and
> +	 * MFD_MF_KEEP_UE_MAPPED keeps the hugepage mapped, every byte should
> +	 * remain accessible and hold original data.
> +	 */
> +	expecting_sigbus = false;
> +	for (iter = map; iter < map + len; ++iter) {
> +		if (*iter != HUGETLB_FILL) {
> +			ksft_print_msg("At addr=%p: got=%#x, expected=%#x\n",
> +				       iter, *iter, HUGETLB_FILL);
> +			ksft_test_result_fail("Memory content corrupted\n");
> +			break;
> +		}
> +	}
> +	ksft_print_msg("Memory content all valid\n");
> +
> +	if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_after) != 0) {
> +		close(fd);
> +		ksft_exit_fail_msg("Failed to read nr_hugepages\n");
> +	}
> +
> +	/*
> +	 * After MADV_HWPOISON, hugepage should still be in HugeTLB pool.
> +	 */
> +	ksft_print_msg("NR hugepages after MADV_HWPOISON is %ld\n", nr_hugepages_after);
> +	if (nr_hugepages_before != nr_hugepages_after)
> +		ksft_test_result_fail("NR hugepages reduced by %ld after MADV_HWPOISON\n",
> +				      nr_hugepages_before - nr_hugepages_after);
> +
> +	/* End of the lifetime of the created HugeTLB memfd. */
> +	if (ftruncate(fd, 0) < 0)
> +		ksft_exit_fail_perror("Failed to ftruncate to 0");
> +	munmap(map, len);
> +	close(fd);
> +
> +	/*
> +	 * After freed by userspace, MADV_HWPOISON-ed hugepage should be
> +	 * dissolved into raw pages and removed from HugeTLB pool.
> +	 */
> +	if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_after) != 0) {
> +		close(fd);
> +		ksft_exit_fail_msg("Failed to read nr_hugepages\n");
> +	}
> +	ksft_print_msg("NR hugepages after closure is %ld\n", nr_hugepages_after);
> +	if (nr_hugepages_before != nr_hugepages_after + 1)
> +		ksft_test_result_fail("NR hugepages is not reduced after memfd closure\n");
> +
> +	ksft_test_result_pass("All done\n");
> +}
> +
> +static unsigned long parse_hugepage_size(char *argv)
> +{
> +	if (strncasecmp(argv, HUGETLB_1GB_STR, strlen(HUGETLB_1GB_STR)) == 0)
> +		return BYTE_LENTH_IN_1G;
> +
> +	if (strncasecmp(argv, HUGETLB_2MB_STR, strlen(HUGETLB_2MB_STR)) == 0)
> +		return BYTE_LENTH_IN_2M;
> +
> +	ksft_print_msg("Please provide valid hugepage_size: 1G or 2M\n");
> +	assert(false);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int fd;
> +	struct statfs file_stat;
> +	unsigned long hugepage_size;
> +
> +	if (argc != 2) {
> +		ksft_print_msg("Usage: %s <hugepage_size=1G|2M>\n", argv[0]);
> +		return -EINVAL;
> +	}
> +
> +	ksft_print_header();
> +	ksft_set_plan(1);
> +
> +	hugepage_size = parse_hugepage_size(argv[1]);
> +	fd = create_hugetlbfs_file(&file_stat, hugepage_size);
> +	test_main(fd, hugepage_size);
> +
> +	ksft_finished();
> +}


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ