[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240924141001.116584-1-tycho@tycho.pizza>
Date: Tue, 24 Sep 2024 08:10:01 -0600
From: Tycho Andersen <tycho@...ho.pizza>
To: Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>,
Jan Kara <jack@...e.cz>,
Eric Biederman <ebiederm@...ssion.com>,
Kees Cook <kees@...nel.org>,
Jeff Layton <jlayton@...nel.org>,
Chuck Lever <chuck.lever@...cle.com>,
Alexander Aring <alex.aring@...il.com>
Cc: linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org,
linux-kernel@...r.kernel.org,
Tycho Andersen <tycho@...ho.pizza>,
Tycho Andersen <tandersen@...flix.com>,
Zbigniew Jędrzejewski-Szmek <zbyszek@...waw.pl>,
Aleksa Sarai <cyphar@...har.com>
Subject: [RFC] exec: add a flag for "reasonable" execveat() comm
From: Tycho Andersen <tandersen@...flix.com>
Zbigniew mentioned at Linux Plumber's that systemd is interested in
switching to execveat() for service execution, but can't, because the
contents of /proc/pid/comm are the file descriptor which was used,
instead of the path to the binary. This makes the output of tools like
top and ps useless, especially in a world where most fds are opened
CLOEXEC so the number is truly meaningless.
This patch adds an AT_ flag to fix up /proc/pid/comm to instead be the
contents of argv[0], instead of the fdno.
Signed-off-by: Tycho Andersen <tandersen@...flix.com>
Suggested-by: Zbigniew Jędrzejewski-Szmek <zbyszek@...waw.pl>
CC: Aleksa Sarai <cyphar@...har.com>
---
There is some question about what to name the flag; it seems to me that
"everyone wants this" instead of the fdno, but probably "REASONABLE" is not
a good choice.
Also, requiring the arg to alloc_bprm() is a bit ugly: kernel-based execs
will never use this, so they just have to pass an empty thing. We could
introduce a bprm_fixup_comm() to do the munging there, but then the code
paths start to diverge, which is maybe not nice. I left it this way because
this is the smallest patch in terms of size, but I'm happy to change it.
Finally, here is a small set of test programs, I'm happy to turn them into
kselftests if we agree on an API
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(void)
{
int fd;
char buf[128];
fd = open("/proc/self/comm", O_RDONLY);
if (fd < 0) {
perror("open comm");
exit(1);
}
if (read(fd, buf, 128) < 0) {
perror("read");
exit(1);
}
printf("comm: %s", buf);
exit(0);
}
#define _GNU_SOURCE
#include <stdio.h>
#include <syscall.h>
#include <stdbool.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/wait.h>
#ifndef AT_EMPTY_PATH
#define AT_EMPTY_PATH 0x1000 /* Allow empty relative */
#endif
#ifndef AT_EXEC_REASONABLE_COMM
#define AT_EXEC_REASONABLE_COMM 0x200
#endif
int main(int argc, char *argv[])
{
pid_t pid;
int status;
bool wants_reasonable_comm = argc > 1;
pid = fork();
if (pid < 0) {
perror("fork");
exit(1);
}
if (pid == 0) {
int fd;
long ret, flags;
fd = open("./catprocselfcomm", O_PATH);
if (fd < 0) {
perror("open catprocselfname");
exit(1);
}
flags = AT_EMPTY_PATH;
if (wants_reasonable_comm)
flags |= AT_EXEC_REASONABLE_COMM;
syscall(__NR_execveat, fd, "", (char *[]){"./catprocselfcomm", NULL}, NULL, flags);
fprintf(stderr, "execveat failed %d\n", errno);
exit(1);
}
if (waitpid(pid, &status, 0) != pid) {
fprintf(stderr, "wrong child\n");
exit(1);
}
if (!WIFEXITED(status)) {
fprintf(stderr, "exit status %x\n", status);
exit(1);
}
if (WEXITSTATUS(status) != 0) {
fprintf(stderr, "child failed\n");
exit(1);
}
return 0;
}
---
fs/exec.c | 22 ++++++++++++++++++----
include/uapi/linux/fcntl.h | 3 ++-
2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index dad402d55681..36434feddb7b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1569,11 +1569,15 @@ static void free_bprm(struct linux_binprm *bprm)
kfree(bprm);
}
-static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags)
+static struct linux_binprm *alloc_bprm(int fd, struct filename *filename,
+ struct user_arg_ptr argv, int flags)
{
struct linux_binprm *bprm;
struct file *file;
int retval = -ENOMEM;
+ bool needs_comm_fixup = flags & AT_EXEC_REASONABLE_COMM;
+
+ flags &= ~AT_EXEC_REASONABLE_COMM;
file = do_open_execat(fd, filename, flags);
if (IS_ERR(file))
@@ -1590,11 +1594,20 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl
if (fd == AT_FDCWD || filename->name[0] == '/') {
bprm->filename = filename->name;
} else {
- if (filename->name[0] == '\0')
+ if (needs_comm_fixup) {
+ const char __user *p = get_user_arg_ptr(argv, 0);
+
+ retval = -EFAULT;
+ if (!p)
+ goto out_free;
+
+ bprm->fdpath = strndup_user(p, MAX_ARG_STRLEN);
+ } else if (filename->name[0] == '\0')
bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
else
bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
fd, filename->name);
+ retval = -ENOMEM;
if (!bprm->fdpath)
goto out_free;
@@ -1969,7 +1982,7 @@ static int do_execveat_common(int fd, struct filename *filename,
* further execve() calls fail. */
current->flags &= ~PF_NPROC_EXCEEDED;
- bprm = alloc_bprm(fd, filename, flags);
+ bprm = alloc_bprm(fd, filename, argv, flags);
if (IS_ERR(bprm)) {
retval = PTR_ERR(bprm);
goto out_ret;
@@ -2034,6 +2047,7 @@ int kernel_execve(const char *kernel_filename,
struct linux_binprm *bprm;
int fd = AT_FDCWD;
int retval;
+ struct user_arg_ptr user_argv = {};
/* It is non-sense for kernel threads to call execve */
if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
@@ -2043,7 +2057,7 @@ int kernel_execve(const char *kernel_filename,
if (IS_ERR(filename))
return PTR_ERR(filename);
- bprm = alloc_bprm(fd, filename, 0);
+ bprm = alloc_bprm(fd, filename, user_argv, 0);
if (IS_ERR(bprm)) {
retval = PTR_ERR(bprm);
goto out_ret;
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 87e2dec79fea..7178d1e4a3de 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -100,7 +100,8 @@
/* Reserved for per-syscall flags 0xff. */
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic
links. */
-/* Reserved for per-syscall flags 0x200 */
+#define AT_EXEC_REASONABLE_COMM 0x200 /* Use argv[0] for comm in
+ execveat */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount
traversal. */
base-commit: baeb9a7d8b60b021d907127509c44507539c15e5
--
2.34.1
Powered by blists - more mailing lists