lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <81f9a104-6d7d-4552-851e-8690d4a1b723@lucifer.local>
Date: Thu, 10 Jul 2025 17:57:23 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: wang lian <lianux.mm@...il.com>
Cc: akpm@...ux-foundation.org, ziy@...dia.com, david@...hat.com, sj@...nel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-kselftest@...r.kernel.org, shuah@...nel.org, broonie@...nel.org,
        Liam.Howlett@...cle.com, brauner@...nel.org, gkwang@...x-info.com,
        jannh@...gle.com, p1ucky0923@...il.com, ryncsn@...il.com,
        vbabka@...e.cz, zijing.zhang@...ton.me
Subject: Re: [PATCH v4] selftests/mm: add process_madvise() tests

This series doesn't apply against mm-new currently, seems some conflict on
vm_util.c. So unable to test.

Am reviewing based on code, will have to double-check functionality against
respin.

On Thu, Jul 10, 2025 at 07:22:49PM +0800, wang lian wrote:
> Add tests for process_madvise(), focusing on verifying behavior under
> various conditions including valid usage and error cases.
>
> Signed-off-by: wang lian <lianux.mm@...il.com>
> Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> Suggested-by: David Hildenbrand <david@...hat.com>
> Suggested-by: Zi Yan <ziy@...dia.com>
> Acked-by: SeongJae Park <sj@...nel.org>
> ---
> Changelog v4:
> - Refine resource cleanup logic in test teardown to be more robust.
> - Improve remote_collapse test to correctly handle different THP
>   (Transparent Huge Page) policies ('always', 'madvise', 'never'),
>   including handling race conditions with khugepaged.
> - Resolve build errors
>
> Changelog v3: https://lore.kernel.org/lkml/20250703044326.65061-1-lianux.mm@gmail.com/
> - Rebased onto the latest mm-stable branch to ensure clean application.
> - Refactor common signal handling logic into vm_util to reduce code duplication.
> - Improve test robustness and diagnostics based on community feedback.
> - Address minor code style and script corrections.
>
> Changelog v2: https://lore.kernel.org/lkml/20250630140957.4000-1-lianux.mm@gmail.com/
> - Drop MADV_DONTNEED tests based on feedback.
> - Focus solely on process_madvise() syscall.
> - Improve error handling and structure.
> - Add future-proof flag test.
> - Style and comment cleanups.
>
> -V1: https://lore.kernel.org/lkml/20250621133003.4733-1-lianux.mm@gmail.com/
>
>  tools/testing/selftests/mm/.gitignore      |   1 +
>  tools/testing/selftests/mm/Makefile        |   1 +
>  tools/testing/selftests/mm/guard-regions.c |  51 ---
>  tools/testing/selftests/mm/process_madv.c  | 447 +++++++++++++++++++++
>  tools/testing/selftests/mm/run_vmtests.sh  |   5 +
>  tools/testing/selftests/mm/vm_util.c       |  35 ++
>  tools/testing/selftests/mm/vm_util.h       |  22 +
>  7 files changed, 511 insertions(+), 51 deletions(-)
>  create mode 100644 tools/testing/selftests/mm/process_madv.c
>
> diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
> index 824266982aa3..95bd9c6ead9e 100644
> --- a/tools/testing/selftests/mm/.gitignore
> +++ b/tools/testing/selftests/mm/.gitignore
> @@ -25,6 +25,7 @@ pfnmap
>  protection_keys
>  protection_keys_32
>  protection_keys_64
> +process_madv
>  madv_populate
>  uffd-stress
>  uffd-unit-tests
> diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
> index ae6f994d3add..d13b3cef2a2b 100644
> --- a/tools/testing/selftests/mm/Makefile
> +++ b/tools/testing/selftests/mm/Makefile
> @@ -85,6 +85,7 @@ TEST_GEN_FILES += mseal_test
>  TEST_GEN_FILES += on-fault-limit
>  TEST_GEN_FILES += pagemap_ioctl
>  TEST_GEN_FILES += pfnmap
> +TEST_GEN_FILES += process_madv
>  TEST_GEN_FILES += thuge-gen
>  TEST_GEN_FILES += transhuge-stress
>  TEST_GEN_FILES += uffd-stress
> diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
> index 93af3d3760f9..4cf101b0fe5e 100644
> --- a/tools/testing/selftests/mm/guard-regions.c
> +++ b/tools/testing/selftests/mm/guard-regions.c
> @@ -9,8 +9,6 @@
>  #include <linux/limits.h>
>  #include <linux/userfaultfd.h>
>  #include <linux/fs.h>
> -#include <setjmp.h>
> -#include <signal.h>
>  #include <stdbool.h>
>  #include <stdio.h>
>  #include <stdlib.h>
> @@ -24,24 +22,6 @@
>
>  #include "../pidfd/pidfd.h"
>
> -/*
> - * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
> - *
> - * "If the signal occurs other than as the result of calling the abort or raise
> - *  function, the behavior is undefined if the signal handler refers to any
> - *  object with static storage duration other than by assigning a value to an
> - *  object declared as volatile sig_atomic_t"
> - */
> -static volatile sig_atomic_t signal_jump_set;
> -static sigjmp_buf signal_jmp_buf;

Please keep these static.

> -
> -/*
> - * Ignore the checkpatch warning, we must read from x but don't want to do
> - * anything with it in order to trigger a read page fault. We therefore must use
> - * volatile to stop the compiler from optimising this away.
> - */
> -#define FORCE_READ(x) (*(volatile typeof(x) *)x)

This one is ok to put in a shared header.

> -
>  /*
>   * How is the test backing the mapping being tested?
>   */
> @@ -120,14 +100,6 @@ static int userfaultfd(int flags)
>  	return syscall(SYS_userfaultfd, flags);
>  }
>
> -static void handle_fatal(int c)
> -{
> -	if (!signal_jump_set)
> -		return;
> -
> -	siglongjmp(signal_jmp_buf, c);
> -}
> -
>  static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
>  				   size_t n, int advice, unsigned int flags)
>  {
> @@ -180,29 +152,6 @@ static bool try_read_write_buf(char *ptr)
>  	return try_read_buf(ptr) && try_write_buf(ptr);
>  }
>
> -static void setup_sighandler(void)
> -{
> -	struct sigaction act = {
> -		.sa_handler = &handle_fatal,
> -		.sa_flags = SA_NODEFER,
> -	};
> -
> -	sigemptyset(&act.sa_mask);
> -	if (sigaction(SIGSEGV, &act, NULL))
> -		ksft_exit_fail_perror("sigaction");
> -}
> -
> -static void teardown_sighandler(void)
> -{
> -	struct sigaction act = {
> -		.sa_handler = SIG_DFL,
> -		.sa_flags = SA_NODEFER,
> -	};
> -
> -	sigemptyset(&act.sa_mask);
> -	sigaction(SIGSEGV, &act, NULL);
> -}
> -

I see what you're doing here, but I really don't feel comfortable with
having different tests share these signal-y variables. This stuff is fiddly
as it is.

Also let's please not have 'setup' or 'teardown' functions in
vm_util.h/vm_util.c - the util still is meant to be there for abstractions,
not test implementation details.

Also note this signal setup stuff is basically customised to the usecase
here - so overall I don't think you should abstract any of this.

Yes it's somewhat duplicative, but these are tests, that's ok.

>  static int open_file(const char *prefix, char *path)
>  {
>  	int fd;
> diff --git a/tools/testing/selftests/mm/process_madv.c b/tools/testing/selftests/mm/process_madv.c
> new file mode 100644
> index 000000000000..7d7509486d46
> --- /dev/null
> +++ b/tools/testing/selftests/mm/process_madv.c
> @@ -0,0 +1,447 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#define _GNU_SOURCE
> +#include "../kselftest_harness.h"
> +#include <errno.h>
> +#include <setjmp.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <linux/mman.h>
> +#include <sys/syscall.h>
> +#include <unistd.h>
> +#include <sched.h>
> +#include <linux/pidfd.h>

So you've seen the discussion around this.

Basically John has provided an excellent abstraction layer for this kind of
thing in tools/include. This _should_ be automatically available, so all
you _should_ need to do is:

cp include/uapi/linux/pidfd.h tools/include/uapi/linux/pidfd.h

However, the pidfd tests already have a stub in so you can alternatively
use:

#include "../pidfd/pidfd.h"

As is done in guard-regions.c.

> +#include <linux/uio.h>
> +#include "vm_util.h"
> +
> +FIXTURE(process_madvise)
> +{
> +	int pidfd;
> +	int flag;
> +	pid_t child_pid;
> +};
> +
> +FIXTURE_SETUP(process_madvise)
> +{
> +	self->pidfd = PIDFD_SELF;
> +	self->flag = 0;
> +	self->child_pid = -1;
> +	setup_sighandler();
> +};
> +
> +FIXTURE_TEARDOWN_PARENT(process_madvise)
> +{
> +	teardown_sighandler();
> +	if (self->child_pid > 0) {
> +		kill(self->child_pid, SIGKILL);
> +		waitpid(self->child_pid, NULL, 0);
> +	}
> +}
> +
> +static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
> +				   size_t vlen, int advice, unsigned int flags)
> +{
> +	return syscall(__NR_process_madvise, pidfd, iovec, vlen, advice, flags);
> +}
> +
> +/*
> + * Enable our signal catcher and try to read the specified buffer. The
> + * return value indicates whether the read succeeds without a fatal
> + * signal.
> + */
> +static bool try_read_buf(char *ptr)
> +{
> +	bool failed;
> +
> +	/* Tell signal handler to jump back here on fatal signal. */
> +	signal_jump_set = true;
> +	/* If a fatal signal arose, we will jump back here and failed is set. */
> +	failed = sigsetjmp(signal_jmp_buf, 0) != 0;
> +
> +	if (!failed)
> +		FORCE_READ(ptr);
> +
> +	signal_jump_set = false;
> +	return !failed;
> +}

At no point do you ever assert this false nor would you in any sane
situation get a page fault on anything you're testing, so I suggest you
just drop this + all related checks.

> +
> +TEST_F(process_madvise, basic)
> +{
> +	const unsigned long pagesize = (unsigned long)sysconf(_SC_PAGESIZE);

You should just put pagesize in self + get on setup. You can always do
something like:

	const unsigned long pagesize = self->pagesize;

Here for brevity after that.

> +	const int madvise_pages = 4;
> +	char *map;
> +	ssize_t ret;
> +	struct iovec vec[madvise_pages];
> +
> +	/*
> +	 * Create a single large mapping. We will pick pages from this
> +	 * mapping to advise on. This ensures we test non-contiguous iovecs.
> +	 */
> +	map = mmap(NULL, pagesize * 10, PROT_READ | PROT_WRITE,
> +		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +	if (map == MAP_FAILED)
> +		ksft_exit_skip("mmap failed, not enough memory.\n");
> +
> +	/* Fill the entire region with a known pattern. */
> +	memset(map, 'A', pagesize * 10);
> +
> +	/*
> +	 * Setup the iovec to point to 4 non-contiguous pages
> +	 * within the mapping.
> +	 */
> +	vec[0].iov_base = &map[0 * pagesize];
> +	vec[0].iov_len = pagesize;
> +	vec[1].iov_base = &map[3 * pagesize];
> +	vec[1].iov_len = pagesize;
> +	vec[2].iov_base = &map[5 * pagesize];
> +	vec[2].iov_len = pagesize;
> +	vec[3].iov_base = &map[8 * pagesize];
> +	vec[3].iov_len = pagesize;
> +
> +	ret = sys_process_madvise(PIDFD_SELF, vec, madvise_pages, MADV_DONTNEED,
> +				  0);
> +	if (ret == -1 && errno == EPERM)
> +		ksft_exit_skip(
> +			"process_madvise() unsupported or permission denied, try running as root.\n");

I think you can use the SKIP() macro here.

> +	else if (errno == EINVAL)
> +		ksft_exit_skip(
> +			"process_madvise() unsupported or parameter invalid, please check arguments.\n");

Isn't this latter one indicative of a bug?

> +
> +	/* The call should succeed and report the total bytes processed. */
> +	ASSERT_EQ(ret, madvise_pages * pagesize);
> +
> +	/* Check that advised pages are now zero. */
> +	for (int i = 0; i < madvise_pages; i++) {
> +		char *advised_page = (char *)vec[i].iov_base;
> +
> +		/* Access should be successful (kernel provides a new page). */
> +		ASSERT_TRUE(try_read_buf(advised_page));

This is a useless check really. We know page faulting works :)

> +		/* Content must be 0, not 'A'. */
> +		ASSERT_EQ(*advised_page, 0);

This is not clear, you're checking first byte of each page the below would
be clearer:

	ASSERT_EQ(advised_page[0], '\0');

> +	}
> +
> +	/* Check that an un-advised page in between is still 'A'. */
> +	char *unadvised_page = &map[1 * pagesize];
> +
> +	ASSERT_TRUE(try_read_buf(unadvised_page));

I don't see the point in using this. We know page faulting works.

> +	for (int i = 0; i < pagesize; i++)
> +		ASSERT_EQ(unadvised_page[i], 'A');
> +
> +	/* Cleanup. */
> +	ASSERT_EQ(munmap(map, pagesize * 10), 0);
> +}
> +
> +static long get_smaps_anon_huge_pages(pid_t pid, void *addr)
> +{
> +	char smaps_path[64];
> +	char *line = NULL;
> +	unsigned long start, end;
> +	long anon_huge_kb;
> +	size_t len;
> +	FILE *f;
> +	bool in_vma;
> +
> +	in_vma = false;
> +	snprintf(smaps_path, sizeof(smaps_path), "/proc/%d/smaps", pid);
> +	f = fopen(smaps_path, "r");
> +	if (!f)
> +		return -1;
> +
> +	while (getline(&line, &len, f) != -1) {
> +		/* Check if the line describes a VMA range */
> +		if (sscanf(line, "%lx-%lx", &start, &end) == 2) {
> +			if ((unsigned long)addr >= start &&
> +			    (unsigned long)addr < end)
> +				in_vma = true;
> +			else
> +				in_vma = false;
> +			continue;
> +		}
> +
> +		/* If we are in the correct VMA, look for the AnonHugePages field */
> +		if (in_vma &&
> +		    sscanf(line, "AnonHugePages: %ld kB", &anon_huge_kb) == 1)
> +			break;
> +	}
> +
> +	free(line);
> +	fclose(f);
> +
> +	return (anon_huge_kb > 0) ? (anon_huge_kb * 1024) : 0;
> +}

This seems like a lot of effort to check something that's pretty unreliable...

> +
> +static bool is_thp_always(void)
> +{
> +	const char *path = "/sys/kernel/mm/transparent_hugepage/enabled";
> +	char buf[32];
> +	FILE *f = fopen(path, "r");
> +
> +	if (!f)
> +		return false;
> +
> +	if (fgets(buf, sizeof(buf), f))
> +		if (strstr(buf, "[always]")) {
> +			fclose(f);
> +			return true;
> +		}
> +
> +	fclose(f);
> +	return false;
> +}
> +
> +/**
> + * TEST_F(process_madvise, remote_collapse)

We don't need kernel doc style comments in tests :) please just use normal
comments.

> + *
> + * This test deterministically validates process_madvise() with MADV_COLLAPSE
> + * on a remote process, other advices are difficult to verify reliably.
> + *
> + * The test verifies that a memory region in a child process, initially
> + * backed by small pages, can be collapsed into a Transparent Huge Page by a
> + * request from the parent. The result is verified by parsing the child's
> + * /proc/<pid>/smaps file.
> + */

This is clever and you've put a lot of effort in, but this just seems
absolutely prone to flaking and you're essentially testing something that's
highly automated.

I think you're also going way outside of the realms of testing
process_madvise() and are getting into testing essentially MADV_COLLAPSE
here.

We have to try to keep the test specific to what it is you're testing -
which is process_madvise() itself.

So for me, and I realise you've put a ton of work into this and I'm really
sorry to say it, I think you should drop this specific test.

For me simply testing the remote MADV_DONTNEED is enough.

> +TEST_F(process_madvise, remote_collapse)
> +{
> +	const unsigned long pagesize = (unsigned long)sysconf(_SC_PAGESIZE);
> +	int pidfd;
> +	long huge_page_size;
> +	int pipe_info[2];
> +	ssize_t ret;
> +	struct iovec vec;
> +
> +	struct child_info {
> +		pid_t pid;
> +		void *map_addr;
> +	} info;
> +
> +	huge_page_size = default_huge_page_size();
> +	if (huge_page_size <= 0)
> +		ksft_exit_skip("Could not determine a valid huge page size.\n");
> +
> +	ASSERT_EQ(pipe(pipe_info), 0);
> +
> +	self->child_pid = fork();
> +	ASSERT_NE(self->child_pid, -1);
> +
> +	if (self->child_pid == 0) {
> +		char *map;
> +		size_t map_size = 2 * huge_page_size;
> +
> +		close(pipe_info[0]);
> +
> +		map = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
> +			   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +		ASSERT_NE(map, MAP_FAILED);
> +
> +		/* Fault in as small pages */
> +		for (size_t i = 0; i < map_size; i += pagesize)
> +			map[i] = 'A';
> +
> +		/* Send info and pause */
> +		info.pid = getpid();
> +		info.map_addr = map;
> +		ret = write(pipe_info[1], &info, sizeof(info));
> +		ASSERT_EQ(ret, sizeof(info));
> +		close(pipe_info[1]);
> +
> +		pause();
> +		exit(0);
> +	}
> +
> +	close(pipe_info[1]);
> +
> +	/* Receive child info */
> +	ret = read(pipe_info[0], &info, sizeof(info));
> +	if (ret <= 0) {
> +		waitpid(self->child_pid, NULL, 0);
> +		ksft_exit_skip("Failed to read child info from pipe.\n");
> +	}
> +	ASSERT_EQ(ret, sizeof(info));
> +	close(pipe_info[0]);
> +	self->child_pid = info.pid;
> +
> +	pidfd = syscall(__NR_pidfd_open, self->child_pid, 0);
> +	ASSERT_GE(pidfd, 0);
> +
> +	vec.iov_base = info.map_addr;
> +	vec.iov_len = huge_page_size;
> +
> +	if (is_thp_always()) {
> +		long initial_huge_pages;
> +
> +		/*
> +		 * When THP is 'always', khugepaged may pre-emptively
> +		 * collapse the pages before our MADV_COLLAPSE call. Check
> +		 * the initial state to provide a more accurate test report.
> +		 */
> +		initial_huge_pages =
> +			get_smaps_anon_huge_pages(self->child_pid, info.map_addr);
> +
> +		if (initial_huge_pages == 2 * huge_page_size) {
> +			/*
> +			 * The pages were already collapsed by khugepaged.
> +			 * The test goal narrows to verifying that MADV_COLLAPSE
> +			 * correctly returns success on an already-collapsed
> +			 * region, as documented.
> +			 */
> +			ksft_test_result_skip(
> +				"THP is 'always' and pages were pre-collapsed; verifying success on already-collapsed page.\n");
> +
> +			ret = sys_process_madvise(pidfd, &vec, 1, MADV_COLLAPSE,
> +						  0);
> +			ASSERT_EQ(ret, huge_page_size);
> +			goto cleanup;
> +		}

Yeah this is asking for a flake. khugepaged can operate at any time.

> +
> +		/*
> +		 * Pages are still small, creating a race between our call
> +		 * and khugepaged. This is the main test scenario for 'always'.
> +		 */
> +		ret = sys_process_madvise(pidfd, &vec, 1, MADV_COLLAPSE, 0);
> +
> +		if (ret == -1) {
> +			/*
> +			 * MADV_COLLAPSE lost the race to khugepaged, which
> +			 * likely held a page lock. The kernel correctly
> +			 * reports this temporary contention with EAGAIN.
> +			 */
> +			if (errno == EAGAIN) {
> +				ksft_test_result_skip(
> +					"THP is 'always', process_madvise returned EAGAIN due to an expected race with khugepaged.\n");
> +			} else {
> +				ksft_test_result_fail(
> +					"process_madvise failed with unexpected errno %d in 'always' mode.\n",
> +					errno);
> +			}
> +			goto cleanup;
> +		}
> +
> +		/*
> +		 * MADV_COLLAPSE won the race and successfully collapsed
> +		 * the pages. Verify the final state.
> +		 */
> +		ASSERT_EQ(ret, huge_page_size);
> +		ASSERT_EQ(get_smaps_anon_huge_pages(self->child_pid, info.map_addr),
> +			  huge_page_size);
> +		ksft_test_result_pass(
> +			"THP is 'always', MADV_COLLAPSE won race and collapsed pages.\n");
> +		goto cleanup;
> +	}
> +
> +	/*
> +	 * THP is 'madvise' or 'never'. No race is expected with khugepaged.
> +	 * We can perform a straightforward state-change verification.
> +	 */
> +	ASSERT_EQ(get_smaps_anon_huge_pages(self->child_pid, info.map_addr), 0);
> +
> +	ret = sys_process_madvise(pidfd, &vec, 1, MADV_COLLAPSE, 0);
> +	if (ret == -1) {
> +		if (errno == EINVAL)
> +			ksft_exit_skip(
> +				"PROCESS_MADV_ADVISE is not supported.\n");
> +		else if (errno == EPERM)
> +			ksft_exit_skip(
> +				"No process_madvise() permissions, try running as root.\n");
> +		goto cleanup;
> +	}
> +	ASSERT_EQ(ret, huge_page_size);
> +
> +	ASSERT_EQ(get_smaps_anon_huge_pages(self->child_pid, info.map_addr),
> +		  huge_page_size);
> +
> +	ksft_test_result_pass(
> +		"MADV_COLLAPSE successfully verified via smaps.\n");
> +
> +cleanup:
> +	/* Cleanup */
> +	kill(self->child_pid, SIGKILL);
> +	waitpid(self->child_pid, NULL, 0);
> +	if (pidfd >= 0)
> +		close(pidfd);
> +}
> +
> +/*
> + * Test process_madvise() with various invalid pidfds to ensure correct error
> + * handling. This includes negative fds, non-pidfd fds, and pidfds for
> + * processes that no longer exist.
> + */
> +TEST_F(process_madvise, invalid_pidfd)
> +{
> +	struct iovec vec;
> +	pid_t child_pid;
> +	ssize_t ret;
> +	int pidfd;
> +
> +	vec.iov_base = (void *)0x1234;
> +	vec.iov_len = 4096;
> +
> +	/* Using an invalid fd number (-1) should fail with EBADF. */
> +	ret = sys_process_madvise(-1, &vec, 1, MADV_DONTNEED, 0);
> +	ASSERT_EQ(ret, -1);
> +	ASSERT_EQ(errno, EBADF);
> +
> +	/*
> +	 * Using a valid fd that is not a pidfd (e.g. stdin) should fail
> +	 * with EBADF.
> +	 */
> +	ret = sys_process_madvise(STDIN_FILENO, &vec, 1, MADV_DONTNEED, 0);
> +	ASSERT_EQ(ret, -1);
> +	ASSERT_EQ(errno, EBADF);
> +
> +	/*
> +	 * Using a pidfd for a process that has already exited should fail
> +	 * with ESRCH.
> +	 */
> +	child_pid = fork();
> +	ASSERT_NE(child_pid, -1);
> +
> +	if (child_pid == 0)
> +		exit(0);
> +
> +	pidfd = syscall(__NR_pidfd_open, child_pid, 0);
> +	ASSERT_GE(pidfd, 0);
> +
> +	/* Wait for the child to ensure it has terminated. */
> +	waitpid(child_pid, NULL, 0);
> +
> +	ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, 0);
> +	ASSERT_EQ(ret, -1);
> +	ASSERT_EQ(errno, ESRCH);
> +	close(pidfd);
> +}
> +
> +/*
> + * Test process_madvise() with an invalid flag value. Now we only support flag=0
> + * future we will use it support sync so reserve this test.
> + */
> +TEST_F(process_madvise, flag)
> +{
> +	const unsigned long pagesize = (unsigned long)sysconf(_SC_PAGESIZE);
> +	unsigned int invalid_flag;
> +	struct iovec vec;
> +	char *map;
> +	ssize_t ret;
> +
> +	map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
> +		   0);
> +	if (map == MAP_FAILED)
> +		ksft_exit_skip("mmap failed, not enough memory.\n");
> +
> +	vec.iov_base = map;
> +	vec.iov_len = pagesize;
> +
> +	invalid_flag = 0x80000000;
> +
> +	ret = sys_process_madvise(PIDFD_SELF, &vec, 1, MADV_DONTNEED,
> +				  invalid_flag);
> +	ASSERT_EQ(ret, -1);
> +	ASSERT_EQ(errno, EINVAL);
> +
> +	/* Cleanup. */
> +	ASSERT_EQ(munmap(map, pagesize), 0);
> +}
> +
> +TEST_HARNESS_MAIN
> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
> index dddd1dd8af14..84fb51902c3e 100755
> --- a/tools/testing/selftests/mm/run_vmtests.sh
> +++ b/tools/testing/selftests/mm/run_vmtests.sh
> @@ -65,6 +65,8 @@ separated by spaces:
>  	test pagemap_scan IOCTL
>  - pfnmap
>  	tests for VM_PFNMAP handling
> +- process_madv
> +	test process_madvise
>  - cow
>  	test copy-on-write semantics
>  - thp
> @@ -422,6 +424,9 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
>  # MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests
>  CATEGORY="madv_guard" run_test ./guard-regions
>
> +# PROCESS_MADVISE TEST
> +CATEGORY="process_madv" run_test ./process_madv
> +
>  # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
>  CATEGORY="madv_populate" run_test ./madv_populate
>
> diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
> index 5492e3f784df..85b209260e5a 100644
> --- a/tools/testing/selftests/mm/vm_util.c
> +++ b/tools/testing/selftests/mm/vm_util.c
> @@ -20,6 +20,9 @@
>  unsigned int __page_size;
>  unsigned int __page_shift;
>
> +volatile sig_atomic_t signal_jump_set;
> +sigjmp_buf signal_jmp_buf;
> +
>  uint64_t pagemap_get_entry(int fd, char *start)
>  {
>  	const unsigned long pfn = (unsigned long)start / getpagesize();
> @@ -524,3 +527,35 @@ int read_sysfs(const char *file_path, unsigned long *val)
>
>  	return 0;
>  }
> +
> +static void handle_fatal(int c)
> +{
> +	if (!signal_jump_set)
> +		return;
> +
> +	siglongjmp(signal_jmp_buf, c);
> +}
> +
> +void setup_sighandler(void)
> +{
> +	struct sigaction act = {
> +		.sa_handler = &handle_fatal,
> +		.sa_flags = SA_NODEFER,
> +	};
> +
> +	sigemptyset(&act.sa_mask);
> +	if (sigaction(SIGSEGV, &act, NULL))
> +		ksft_exit_fail_perror("sigaction in setup");
> +}
> +
> +void teardown_sighandler(void)
> +{
> +	struct sigaction act = {
> +		.sa_handler = SIG_DFL,
> +		.sa_flags = SA_NODEFER,
> +	};
> +
> +	sigemptyset(&act.sa_mask);
> +	if (sigaction(SIGSEGV, &act, NULL))
> +		ksft_exit_fail_perror("sigaction in teardown");
> +}

As stated above, please do not abstract these.

> diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
> index b8136d12a0f8..6bc4177a2807 100644
> --- a/tools/testing/selftests/mm/vm_util.h
> +++ b/tools/testing/selftests/mm/vm_util.h
> @@ -8,6 +8,8 @@
>  #include <unistd.h> /* _SC_PAGESIZE */
>  #include "../kselftest.h"
>  #include <linux/fs.h>
> +#include <setjmp.h>
> +#include <signal.h>
>
>  #define BIT_ULL(nr)                   (1ULL << (nr))
>  #define PM_SOFT_DIRTY                 BIT_ULL(55)
> @@ -61,6 +63,24 @@ static inline void skip_test_dodgy_fs(const char *op_name)
>  	ksft_test_result_skip("%s failed with ENOENT. Filesystem might be buggy (9pfs?)\n", op_name);
>  }
>
> +/*
> + * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
> + *
> + * "If the signal occurs other than as the result of calling the abort or raise
> + *  function, the behavior is undefined if the signal handler refers to any
> + *  object with static storage duration other than by assigning a value to an
> + *  object declared as volatile sig_atomic_t"
> + */
> +extern volatile sig_atomic_t signal_jump_set;
> +extern sigjmp_buf signal_jmp_buf;
> +
> +/*
> + * Ignore the checkpatch warning, we must read from x but don't want to do
> + * anything with it in order to trigger a read page fault. We therefore must use
> + * volatile to stop the compiler from optimising this away.
> + */
> +#define FORCE_READ(x) (*(volatile typeof(x) *)x)
> +
>  uint64_t pagemap_get_entry(int fd, char *start);
>  bool pagemap_is_softdirty(int fd, char *start);
>  bool pagemap_is_swapped(int fd, char *start);
> @@ -90,6 +110,8 @@ bool find_vma_procmap(struct procmap_fd *procmap, void *address);
>  int close_procmap(struct procmap_fd *procmap);
>  int write_sysfs(const char *file_path, unsigned long val);
>  int read_sysfs(const char *file_path, unsigned long *val);
> +void setup_sighandler(void);
> +void teardown_sighandler(void);

And as stated previously, please un-abstract all these.

>
>  static inline int open_self_procmap(struct procmap_fd *procmap_out)
>  {
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ