[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <78c3a163-551b-ef53-4018-7b6ba0640757@linuxfoundation.org>
Date: Mon, 16 May 2022 14:29:29 -0600
From: Shuah Khan <skhan@...uxfoundation.org>
To: Suren Baghdasaryan <surenb@...gle.com>, akpm@...ux-foundation.org
Cc: mhocko@...e.com, rientjes@...gle.com, willy@...radead.org,
hannes@...xchg.org, guro@...com, minchan@...nel.org,
kirill@...temov.name, aarcange@...hat.com, brauner@...nel.org,
hch@...radead.org, oleg@...hat.com, david@...hat.com,
jannh@...gle.com, shakeelb@...gle.com, peterx@...hat.com,
jhubbard@...dia.com, shuah@...nel.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
linux-kselftest@...r.kernel.org, kernel-team@...roid.com,
Shuah Khan <skhan@...uxfoundation.org>
Subject: Re: [PATCH v2 1/1] selftests: vm: add process_mrelease tests
On 5/16/22 1:55 AM, Suren Baghdasaryan wrote:
> Introduce process_mrelease syscall sanity tests which include tests
> which expect to fail:
> - process_mrelease with invalid pidfd and flags inputs
> - process_mrelease on a live process with no pending signals
> and valid process_mrelease usage which is expected to succeed.
> Because process_mrelease has to be used against a process with a pending
> SIGKILL, it's possible that the process exits before process_mrelease
> gets called. In such cases we retry the test with a victim that allocates
> twice more memory up to 1GB. This would require the victim process to
> spend more time during exit and process_mrelease has a better chance of
> catching the process before it exits and succeeding.
>
> On success the test reports the amount of memory the child had to
> allocate for reaping to succeed. Sample output:
> Success reaping a child with 1MB of memory allocations
>
> On failure the test reports the failure. Sample outputs:
> All process_mrelease attempts failed!
> process_mrelease: Invalid argument
>
Nit: Please format this better - include actual example output from the
command and how to run the test examples.
> Signed-off-by: Suren Baghdasaryan <surenb@...gle.com>
> ---
> tools/testing/selftests/vm/.gitignore | 1 +
> tools/testing/selftests/vm/Makefile | 1 +
> tools/testing/selftests/vm/mrelease_test.c | 214 +++++++++++++++++++++
> tools/testing/selftests/vm/run_vmtests.sh | 16 ++
> 4 files changed, 232 insertions(+)
> create mode 100644 tools/testing/selftests/vm/mrelease_test.c
>
> diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
> index d7507f3c7c76..c019e53f24f9 100644
> --- a/tools/testing/selftests/vm/.gitignore
> +++ b/tools/testing/selftests/vm/.gitignore
> @@ -10,6 +10,7 @@ map_populate
> thuge-gen
> compaction_test
> mlock2-tests
> +mrelease_test
> mremap_dontunmap
> mremap_test
> on-fault-limit
> diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
> index 04a49e876a46..733fccbff0ef 100644
> --- a/tools/testing/selftests/vm/Makefile
> +++ b/tools/testing/selftests/vm/Makefile
> @@ -43,6 +43,7 @@ TEST_GEN_FILES += map_populate
> TEST_GEN_FILES += memfd_secret
> TEST_GEN_FILES += mlock-random-test
> TEST_GEN_FILES += mlock2-tests
> +TEST_GEN_FILES += mrelease_test
> TEST_GEN_FILES += mremap_dontunmap
> TEST_GEN_FILES += mremap_test
> TEST_GEN_FILES += on-fault-limit
> diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c
> new file mode 100644
> index 000000000000..99680676069b
> --- /dev/null
> +++ b/tools/testing/selftests/vm/mrelease_test.c
> @@ -0,0 +1,214 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2022 Google LLC
> + */
> +#define _GNU_SOURCE
> +#include <errno.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <sys/wait.h>
> +#include <unistd.h>
> +
> +#include "util.h"
> +
> +#include "../kselftest.h"
> +
> +#if defined(__NR_pidfd_open) && defined(__NR_process_mrelease)
> +
> +static inline int pidfd_open(pid_t pid, unsigned int flags)
> +{
> + return syscall(__NR_pidfd_open, pid, flags);
> +}
> +
> +static inline int process_mrelease(int pidfd, unsigned int flags)
> +{
> + return syscall(__NR_process_mrelease, pidfd, flags);
> +}
> +
> +static void write_fault_pages(char *addr, unsigned long nr_pages)
> +{
> + unsigned long i;
> +
> + for (i = 0; i < nr_pages; i++)
> + *((unsigned long *)(addr + (i * PAGE_SIZE))) = i;
> +}
> +
Okay these above 3 routines are called once. I am not seeing any point
in making these separate routines. I made the same comment on v1.
> +static int alloc_noexit(unsigned long nr_pages, int pipefd)
> +{
> + int timeout = 10; /* 10sec timeout to get killed */
> + int ppid = getppid();
> + void *buf;
> +
> + buf = mmap(NULL, nr_pages * PAGE_SIZE, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANON, 0, 0);
> + if (buf == MAP_FAILED) {
> + perror("mmap failed, halting the test");
> + return KSFT_FAIL;
> + }
> +
> + write_fault_pages((char *)buf, nr_pages);
> +
> + /* Signal the parent that the child is ready */
> + if (write(pipefd, "", 1) < 0) {
> + perror("write");
> + return KSFT_FAIL;
> + }
> +
> + /* Wait to be killed (when reparenting happens) */
> + while (getppid() == ppid && timeout > 0) {
> + sleep(1);
> + timeout--;
> + }
> +
> + munmap(buf, nr_pages * PAGE_SIZE);
> +
> + return (timeout > 0) ? KSFT_PASS : KSFT_FAIL;
> +}
> +
> +/* The process_mrelease calls in this test are expected to fail */
> +void run_negative_tests(int pidfd)
> +{
> + /* Test invalid flags. Expect to fail with EINVAL error code. */
> + if (!process_mrelease(pidfd, (unsigned int)-1) || errno != EINVAL) {
> + perror("process_mrelease with wrong flags");
> + exit(KSFT_FAIL);
> + }
> + /*
> + * Test reaping while process is alive with no pending SIGKILL.
> + * Expect to fail with EINVAL error code.
> + */
> + if (!process_mrelease(pidfd, 0) || errno != EINVAL) {
> + perror("process_mrelease on a live process");
> + exit(KSFT_FAIL);
> + }
> +}
> +
> +#define MB(x) (x << 20)
> +#define MAX_SIZE_MB 1024
> +
> +int main(void)
> +{
> + int pipefd[2], pidfd;
> + bool success, retry;
> + size_t size;
> + pid_t pid;
> + char byte;
> + int res;
> +
> + /* Test a wrong pidfd */
> + if (!process_mrelease(-1, 0) || errno != EBADF) {
> + perror("process_mrelease with wrong pidfd");
> + exit(KSFT_FAIL);
> + }
> +
> + /* Start the test with 1MB child memory allocation */
> + size = 1;
> +retry:
> + /*
> + * Pipe for the child to signal when it's done allocating
> + * memory
> + */
> + if (pipe(pipefd)) {
> + perror("pipe");
> + exit(KSFT_FAIL);
> + }
> + pid = fork();
> + if (pid < 0) {
> + perror("fork");
> + close(pipefd[0]);
> + close(pipefd[1]);
> + exit(KSFT_FAIL);
> + }
> +
> + if (pid == 0) {
> + /*
> + * Child main routine:
> + * Allocate and fault-in memory and wait to be killed
> + */
> + close(pipefd[0]);
> + res = alloc_noexit(MB(size) / PAGE_SIZE, pipefd[1]);
> + close(pipefd[1]);
> + exit(res);
> + }
> +
Now the above code can be a separate function which will make it readable.
> + /*
> + * Parent main routine:
> + * Wait for the child to finish allocations, then kill and reap
> + */
> + close(pipefd[1]);
> + /* Block until the child is ready */
> + res = read(pipefd[0], &byte, 1);
> + close(pipefd[0]);
> + if (res < 0) {
> + perror("read");
> + if (!kill(pid, SIGKILL))
> + waitpid(pid, NULL, 0);
> + exit(KSFT_FAIL);
> + }
> +
> + pidfd = pidfd_open(pid, 0);
> + if (pidfd < 0) {
> + perror("pidfd_open");
> + if (!kill(pid, SIGKILL))
> + waitpid(pid, NULL, 0);
> + exit(KSFT_FAIL);
> + }
> +
> + /* Run negative tests which require a live child */
> + run_negative_tests(pidfd);
> +
> + if (kill(pid, SIGKILL)) {
> + perror("kill");
> + exit(KSFT_FAIL);
> + }
> +
> + success = (process_mrelease(pidfd, 0) == 0);
> + if (!success) {
> + /*
> + * If we failed to reap because the child exited too soon,
> + * before we could call process_mrelease. Double child's memory
> + * which causes it to spend more time on cleanup and increases
> + * our chances of reaping its memory before it exits.
> + * Retry until we succeed or reach MAX_SIZE_MB.
> + */
> + if (errno == ESRCH) {
> + retry = (size <= MAX_SIZE_MB);
> + } else {
> + perror("process_mrelease");
> + waitpid(pid, NULL, 0);
> + exit(KSFT_FAIL);
> + }
> + }
> +
> + /* Cleanup to prevent zombies */
> + if (waitpid(pid, NULL, 0) < 0) {
> + perror("waitpid");
> + exit(KSFT_FAIL);
> + }
> + close(pidfd);
> +
> + if (!success) {
> + if (retry) {
> + size *= 2;
> + goto retry;
> + }
> + printf("All process_mrelease attempts failed!\n");
> + exit(KSFT_FAIL);
> + }
> +
> + printf("Success reaping a child with %zuMB of memory allocations\n",
> + size);
> + return KSFT_PASS;
> +}
> +
> +#else /* defined(__NR_pidfd_open) && defined(__NR_process_mrelease) */
> +
> +int main(int argc, char *argv[])
> +{
> + printf("skip: skipping process_mrelease test " \
> + "(missing __NR_pidfd_open and/or __NR_process_mrelease)\n");
> + return KSFT_SKIP;
> +}
> +
Why do you need these ifdefs - syscall will return ENOSYS and you can
key off that. Please take a look at other usages of syscall in the
repo.
> +#endif /* defined(__NR_pidfd_open) && defined(__NR_process_mrelease) */
> diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
> index 352ba00cf26b..1986162fea39 100755
> --- a/tools/testing/selftests/vm/run_vmtests.sh
> +++ b/tools/testing/selftests/vm/run_vmtests.sh
> @@ -287,6 +287,22 @@ else
> echo "[PASS]"
> fi
>
> +echo "---------------------"
> +echo "running mrelease_test"
> +echo "---------------------"
> +./mrelease_test
> +ret_val=$?
> +
> +if [ $ret_val -eq 0 ]; then
> + echo "[PASS]"
> +elif [ $ret_val -eq $ksft_skip ]; then
> + echo "[SKIP]"
> + exitcode=$ksft_skip
> +else
> + echo "[FAIL]"
> + exitcode=1
> +fi
> +
> echo "-------------------"
> echo "running mremap_test"
> echo "-------------------"
>
thanks,
-- Shuah
Powered by blists - more mailing lists