[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f0585ae4-5642-361f-11d6-9399bd9cc550@linuxfoundation.org>
Date: Wed, 9 Feb 2022 18:22:18 -0700
From: Shuah Khan <skhan@...uxfoundation.org>
To: Michal Koutný <mkoutny@...e.com>,
Eric Biederman <ebiederm@...ssion.com>,
Alexey Gladkov <legion@...nel.org>
Cc: Kees Cook <keescook@...omium.org>, Shuah Khan <shuah@...nel.org>,
Christian Brauner <brauner@...nel.org>,
Solar Designer <solar@...nwall.com>,
Ran Xiaokai <ran.xiaokai@....com.cn>,
linux-kernel@...r.kernel.org, linux-kselftest@...r.kernel.org,
Linux Containers <containers@...ts.linux-foundation.org>,
Shuah Khan <skhan@...uxfoundation.org>
Subject: Re: [RFC PATCH 5/6] selftests: Challenge RLIMIT_NPROC in user
namespaces
On 2/7/22 5:17 AM, Michal Koutný wrote:
> The services are started in descendant user namepaces, each of them
> should honor the RLIMIT_NPROC that's passed during user namespace
> creation.
>
> main [user_ns_0]
> ` service [user_ns_1]
> ` worker 1
> ` worker 2
> ...
> ` worker k
> ...
> ` service [user_ns_n]
> ` worker 1
> ` worker 2
> ...
> ` worker k
>
> Test uses explicit synchronization, to make sure original parent's limit
> does not interfere with descendants.
>
Thank you for updating the test with the kernel updates. Please see
comments below. A bit of a concern with how long this test will run.
Did you time it?
> Signed-off-by: Michal Koutný <mkoutny@...e.com>
> ---
> .../selftests/rlimits/rlimits-per-userns.c | 154 ++++++++++++++----
> 1 file changed, 125 insertions(+), 29 deletions(-)
>
> diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
> index 26dc949e93ea..54c1b345e42b 100644
> --- a/tools/testing/selftests/rlimits/rlimits-per-userns.c
> +++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
> @@ -9,7 +9,9 @@
> #include <sys/resource.h>
> #include <sys/prctl.h>
> #include <sys/stat.h>
> +#include <sys/socket.h>
>
> +#include <assert.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <stdio.h>
> @@ -21,38 +23,74 @@
> #include <errno.h>
> #include <err.h>
>
> -#define NR_CHILDS 2
> +#define THE_LIMIT 4
> +#define NR_CHILDREN 5
> +
> +static_assert(NR_CHILDREN >= THE_LIMIT-1, "Need slots for limit-1 children.");
>
> static char *service_prog;
> static uid_t user = 60000;
> static uid_t group = 60000;
> +static struct rlimit saved_limit;
> +
> +/* Two uses: main and service */
> +static pid_t child[NR_CHILDREN];
> +static pid_t pid;
>
> static void setrlimit_nproc(rlim_t n)
> {
> - pid_t pid = getpid();
> struct rlimit limit = {
> .rlim_cur = n,
> .rlim_max = n
> };
> -
> - warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
> + if (getrlimit(RLIMIT_NPROC, &saved_limit) < 0)
> + err(EXIT_FAILURE, "(pid=%d): getrlimit(RLIMIT_NPROC)", pid);
>
> if (setrlimit(RLIMIT_NPROC, &limit) < 0)
> err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
> +
> + warnx("(pid=%d): Set RLIMIT_NPROC=%ld", pid, n);
> +}
> +
> +static void restore_rlimit_nproc(void)
> +{
> + if (setrlimit(RLIMIT_NPROC, &saved_limit) < 0)
> + err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC, saved)", pid);
> + warnx("(pid=%d) Restored RLIMIT_NPROC", pid);
> }
>
> -static pid_t fork_child(void)
> +enum msg_sync {
> + UNSHARE,
> + RLIMIT_RESTORE,
> +};
> +
> +static void sync_notify(int fd, enum msg_sync m)
> {
> - pid_t pid = fork();
> + char tmp = m;
> +
> + if (write(fd, &tmp, 1) < 0)
> + warnx("(pid=%d): failed sync-write", pid);
> +}
>
> - if (pid < 0)
> +static void sync_wait(int fd, enum msg_sync m)
> +{
> + char tmp;
> +
> + if (read(fd, &tmp, 1) < 0)
> + warnx("(pid=%d): failed sync-read", pid);
> +}
> +
> +static pid_t fork_child(int control_fd)
> +{
> + pid_t new_pid = fork();
> +
> + if (new_pid < 0)
> err(EXIT_FAILURE, "fork");
>
> - if (pid > 0)
> - return pid;
> + if (new_pid > 0)
> + return new_pid;
>
> pid = getpid();
> -
> warnx("(pid=%d): New process starting ...", pid);
>
> if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
> @@ -73,6 +111,9 @@ static pid_t fork_child(void)
> if (unshare(CLONE_NEWUSER) < 0)
> err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
>
> + sync_notify(control_fd, UNSHARE);
> + sync_wait(control_fd, RLIMIT_RESTORE);
> +
> char *const argv[] = { "service", NULL };
> char *const envp[] = { "I_AM_SERVICE=1", NULL };
>
> @@ -82,37 +123,92 @@ static pid_t fork_child(void)
> err(EXIT_FAILURE, "(pid=%d): execve", pid);
> }
>
> +static void run_service(void)
> +{
> + size_t i;
> + int ret = EXIT_SUCCESS;
> + struct rlimit limit;
> + char user_ns[PATH_MAX];
> +
> + if (getrlimit(RLIMIT_NPROC, &limit) < 0)
> + err(EXIT_FAILURE, "(pid=%d) failed getrlimit", pid);
> + if (readlink("/proc/self/ns/user", user_ns, PATH_MAX) < 0)
> + err(EXIT_FAILURE, "(pid=%d) failed readlink", pid);
> +
> + warnx("(pid=%d) Service instance attempts %i children, limit %lu:%lu, ns=%s",
> + pid, THE_LIMIT, limit.rlim_cur, limit.rlim_max, user_ns);
> +
> + /* test rlimit inside the service, effectively THE_LIMIT-1 becaue of service itself */
> + for (i = 0; i < THE_LIMIT; i++) {
> + child[i] = fork();
> + if (child[i] == 0) {
> + /* service child */
> + pause();
> + exit(EXIT_SUCCESS);
> + }
> + if (child[i] < 0) {
> + warnx("(pid=%d) service fork %lu failed, errno = %i", pid, i+1, errno);
> + if (!(i == THE_LIMIT-1 && errno == EAGAIN))
> + ret = EXIT_FAILURE;
> + } else if (i == THE_LIMIT-1) {
> + warnx("(pid=%d) RLIMIT_NPROC not honored", pid);
> + ret = EXIT_FAILURE;
> + }
> + }
> +
> + /* service cleanup */
> + for (i = 0; i < THE_LIMIT; i++)
> + if (child[i] > 0)
> + kill(child[i], SIGUSR1);
> +
> + for (i = 0; i < THE_LIMIT; i++)
> + if (child[i] > 0)
> + waitpid(child[i], NULL, WNOHANG);
> +
> + if (ret)
> + exit(ret);
> + pause();
> +}
> +
> int main(int argc, char **argv)
> {
> size_t i;
> - pid_t child[NR_CHILDS];
> - int wstatus[NR_CHILDS];
> - int childs = NR_CHILDS;
> - pid_t pid;
> + int control_fd[NR_CHILDREN];
> + int wstatus[NR_CHILDREN];
> + int children = NR_CHILDREN;
> + int sockets[2];
> +
> + pid = getpid();
>
> if (getenv("I_AM_SERVICE")) {
> - pause();
> - exit(EXIT_SUCCESS);
> + run_service();
> + exit(EXIT_FAILURE);
Why is this a failure unconditionally?
> }
>
> service_prog = argv[0];
> - pid = getpid();
>
> warnx("(pid=%d) Starting testcase", pid);
>
> - /*
> - * This rlimit is not a problem for root because it can be exceeded.
> - */
> - setrlimit_nproc(1);
> -
> - for (i = 0; i < NR_CHILDS; i++) {
> - child[i] = fork_child();
> + setrlimit_nproc(THE_LIMIT);
> + for (i = 0; i < NR_CHILDREN; i++) {
> + if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, sockets) < 0)
> + err(EXIT_FAILURE, "(pid=%d) socketpair failed", pid);
> + control_fd[i] = sockets[0];
> + child[i] = fork_child(sockets[1]);
> wstatus[i] = 0;
> + }
> +
> + for (i = 0; i < NR_CHILDREN; i++)
> + sync_wait(control_fd[i], UNSHARE);
> + restore_rlimit_nproc();
> +
> + for (i = 0; i < NR_CHILDREN; i++) {
> + sync_notify(control_fd[i], RLIMIT_RESTORE);
> usleep(250000);
How long does this test now run for with this loop?
> }
>
> while (1) {
> - for (i = 0; i < NR_CHILDS; i++) {
> + for (i = 0; i < NR_CHILDREN; i++) {
> if (child[i] <= 0)
> continue;
>
> @@ -126,22 +222,22 @@ int main(int argc, char **argv)
> warn("(pid=%d): waitpid(%d)", pid, child[i]);
>
> child[i] *= -1;
> - childs -= 1;
> + children -= 1;
> }
>
> - if (!childs)
> + if (!children)
> break;
>
> usleep(250000);
>
> - for (i = 0; i < NR_CHILDS; i++) {
> + for (i = 0; i < NR_CHILDREN; i++) {
> if (child[i] <= 0)
> continue;
> kill(child[i], SIGUSR1);
> }
> }
>
> - for (i = 0; i < NR_CHILDS; i++) {
> + for (i = 0; i < NR_CHILDREN; i++) {
> if (WIFEXITED(wstatus[i]))
> warnx("(pid=%d): pid %d exited, status=%d",
> pid, -child[i], WEXITSTATUS(wstatus[i]));
>
Please a add few more comments in the code path.
thanks,
-- Shuah
Powered by blists - more mailing lists