[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240925-herziehen-unerbittlich-23c5845fed06@brauner>
Date: Wed, 25 Sep 2024 10:31:04 +0200
From: Christian Brauner <brauner@...nel.org>
To: Tycho Andersen <tycho@...ho.pizza>
Cc: Alexander Viro <viro@...iv.linux.org.uk>, Jan Kara <jack@...e.cz>,
Eric Biederman <ebiederm@...ssion.com>, Kees Cook <kees@...nel.org>, Jeff Layton <jlayton@...nel.org>,
Chuck Lever <chuck.lever@...cle.com>, Alexander Aring <alex.aring@...il.com>,
linux-fsdevel@...r.kernel.org, linux-mm@...ck.org, linux-kernel@...r.kernel.org,
Tycho Andersen <tandersen@...flix.com>, Zbigniew Jędrzejewski-Szmek <zbyszek@...waw.pl>,
Aleksa Sarai <cyphar@...har.com>
Subject: Re: [RFC] exec: add a flag for "reasonable" execveat() comm
On Tue, Sep 24, 2024 at 08:10:01AM GMT, Tycho Andersen wrote:
> From: Tycho Andersen <tandersen@...flix.com>
>
> Zbigniew mentioned at Linux Plumber's that systemd is interested in
> switching to execveat() for service execution, but can't, because the
> contents of /proc/pid/comm are the file descriptor which was used,
> instead of the path to the binary. This makes the output of tools like
> top and ps useless, especially in a world where most fds are opened
> CLOEXEC so the number is truly meaningless.
>
> This patch adds an AT_ flag to fix up /proc/pid/comm to instead be the
> contents of argv[0], instead of the fdno.
>
> Signed-off-by: Tycho Andersen <tandersen@...flix.com>
> Suggested-by: Zbigniew Jędrzejewski-Szmek <zbyszek@...waw.pl>
> CC: Aleksa Sarai <cyphar@...har.com>
> ---
> There is some question about what to name the flag; it seems to me that
> "everyone wants this" instead of the fdno, but probably "REASONABLE" is not
> a good choice.
>
> Also, requiring the arg to alloc_bprm() is a bit ugly: kernel-based execs
> will never use this, so they just have to pass an empty thing. We could
> introduce a bprm_fixup_comm() to do the munging there, but then the code
> paths start to diverge, which is maybe not nice. I left it this way because
> this is the smallest patch in terms of size, but I'm happy to change it.
>
> Finally, here is a small set of test programs, I'm happy to turn them into
> kselftests if we agree on an API
>
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
>
> int main(void)
> {
> int fd;
> char buf[128];
>
> fd = open("/proc/self/comm", O_RDONLY);
> if (fd < 0) {
> perror("open comm");
> exit(1);
> }
>
> if (read(fd, buf, 128) < 0) {
> perror("read");
> exit(1);
> }
>
> printf("comm: %s", buf);
> exit(0);
> }
>
> #define _GNU_SOURCE
> #include <stdio.h>
> #include <syscall.h>
> #include <stdbool.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <stdlib.h>
> #include <errno.h>
> #include <sys/wait.h>
>
> #ifndef AT_EMPTY_PATH
> #define AT_EMPTY_PATH 0x1000 /* Allow empty relative */
> #endif
>
> #ifndef AT_EXEC_REASONABLE_COMM
> #define AT_EXEC_REASONABLE_COMM 0x200
> #endif
>
> int main(int argc, char *argv[])
> {
> pid_t pid;
> int status;
> bool wants_reasonable_comm = argc > 1;
>
> pid = fork();
> if (pid < 0) {
> perror("fork");
> exit(1);
> }
>
> if (pid == 0) {
> int fd;
> long ret, flags;
>
> fd = open("./catprocselfcomm", O_PATH);
> if (fd < 0) {
> perror("open catprocselfname");
> exit(1);
> }
>
> flags = AT_EMPTY_PATH;
> if (wants_reasonable_comm)
> flags |= AT_EXEC_REASONABLE_COMM;
> syscall(__NR_execveat, fd, "", (char *[]){"./catprocselfcomm", NULL}, NULL, flags);
Yes, that one is the actually palatable solution that I mentioned during
the session and not the questionable version where the path argument is
overloaded by the flag.
Please add a:
Link: https://github.com/uapi-group/kernel-features#set-comm-field-before-exec
to the commit where this originated from.
> fprintf(stderr, "execveat failed %d\n", errno);
> exit(1);
> }
>
> if (waitpid(pid, &status, 0) != pid) {
> fprintf(stderr, "wrong child\n");
> exit(1);
> }
>
> if (!WIFEXITED(status)) {
> fprintf(stderr, "exit status %x\n", status);
> exit(1);
> }
>
> if (WEXITSTATUS(status) != 0) {
> fprintf(stderr, "child failed\n");
> exit(1);
> }
>
> return 0;
> }
> ---
> fs/exec.c | 22 ++++++++++++++++++----
> include/uapi/linux/fcntl.h | 3 ++-
> 2 files changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/fs/exec.c b/fs/exec.c
> index dad402d55681..36434feddb7b 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1569,11 +1569,15 @@ static void free_bprm(struct linux_binprm *bprm)
> kfree(bprm);
> }
>
> -static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags)
> +static struct linux_binprm *alloc_bprm(int fd, struct filename *filename,
> + struct user_arg_ptr argv, int flags)
> {
> struct linux_binprm *bprm;
> struct file *file;
> int retval = -ENOMEM;
> + bool needs_comm_fixup = flags & AT_EXEC_REASONABLE_COMM;
> +
> + flags &= ~AT_EXEC_REASONABLE_COMM;
>
> file = do_open_execat(fd, filename, flags);
> if (IS_ERR(file))
> @@ -1590,11 +1594,20 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl
> if (fd == AT_FDCWD || filename->name[0] == '/') {
> bprm->filename = filename->name;
> } else {
> - if (filename->name[0] == '\0')
> + if (needs_comm_fixup) {
> + const char __user *p = get_user_arg_ptr(argv, 0);
> +
> + retval = -EFAULT;
> + if (!p)
> + goto out_free;
> +
> + bprm->fdpath = strndup_user(p, MAX_ARG_STRLEN);
> + } else if (filename->name[0] == '\0')
> bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
> else
> bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
> fd, filename->name);
> + retval = -ENOMEM;
> if (!bprm->fdpath)
> goto out_free;
>
> @@ -1969,7 +1982,7 @@ static int do_execveat_common(int fd, struct filename *filename,
> * further execve() calls fail. */
> current->flags &= ~PF_NPROC_EXCEEDED;
>
> - bprm = alloc_bprm(fd, filename, flags);
> + bprm = alloc_bprm(fd, filename, argv, flags);
> if (IS_ERR(bprm)) {
> retval = PTR_ERR(bprm);
> goto out_ret;
> @@ -2034,6 +2047,7 @@ int kernel_execve(const char *kernel_filename,
> struct linux_binprm *bprm;
> int fd = AT_FDCWD;
> int retval;
> + struct user_arg_ptr user_argv = {};
>
> /* It is non-sense for kernel threads to call execve */
> if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
> @@ -2043,7 +2057,7 @@ int kernel_execve(const char *kernel_filename,
> if (IS_ERR(filename))
> return PTR_ERR(filename);
>
> - bprm = alloc_bprm(fd, filename, 0);
> + bprm = alloc_bprm(fd, filename, user_argv, 0);
> if (IS_ERR(bprm)) {
> retval = PTR_ERR(bprm);
> goto out_ret;
> diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
> index 87e2dec79fea..7178d1e4a3de 100644
> --- a/include/uapi/linux/fcntl.h
> +++ b/include/uapi/linux/fcntl.h
> @@ -100,7 +100,8 @@
> /* Reserved for per-syscall flags 0xff. */
> #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic
> links. */
> -/* Reserved for per-syscall flags 0x200 */
> +#define AT_EXEC_REASONABLE_COMM 0x200 /* Use argv[0] for comm in
> + execveat */
> #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
> #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount
> traversal. */
>
> base-commit: baeb9a7d8b60b021d907127509c44507539c15e5
> --
> 2.34.1
>
Powered by blists - more mailing lists