[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250509-work-coredump-socket-v5-5-23c5b14df1bc@kernel.org>
Date: Fri, 09 May 2025 12:25:37 +0200
From: Christian Brauner <brauner@...nel.org>
To: linux-fsdevel@...r.kernel.org, Jann Horn <jannh@...gle.com>,
Daniel Borkmann <daniel@...earbox.net>,
Kuniyuki Iwashima <kuniyu@...zon.com>
Cc: Eric Dumazet <edumazet@...gle.com>, Oleg Nesterov <oleg@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Alexander Viro <viro@...iv.linux.org.uk>,
Daan De Meyer <daan.j.demeyer@...il.com>,
David Rheinsberg <david@...dahead.eu>, Jakub Kicinski <kuba@...nel.org>,
Jan Kara <jack@...e.cz>, Lennart Poettering <lennart@...ttering.net>,
Luca Boccassi <bluca@...ian.org>, Mike Yuan <me@...dnzj.com>,
Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>,
Zbigniew Jędrzejewski-Szmek <zbyszek@...waw.pl>,
linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
linux-security-module@...r.kernel.org,
Christian Brauner <brauner@...nel.org>,
Alexander Mikhalitsyn <alexander@...alicyn.com>
Subject: [PATCH v5 5/9] pidfs, coredump: add PIDFD_INFO_COREDUMP
Extend the PIDFD_INFO_COREDUMP ioctl() with the new PIDFD_INFO_COREDUMP
mask flag. This adds the fields @coredump_mask and @coredump_cookie to
struct pidfd_info.
When a task coredumps the kernel will provide the following information
to userspace in @coredump_mask:
* PIDFD_COREDUMPED is raised if the task did actually coredump.
* PIDFD_COREDUMP_SKIP is raised if the task skipped coredumping (e.g.,
undumpable).
* PIDFD_COREDUMP_USER is raised if this is a regular coredump and
doesn't need special care by the coredump server.
* PIDFD_COREDUMP_ROOT is raised if the generated coredump should be
treated as sensitive and the coredump server should restrict to the
generated coredump to sufficiently privileged users.
If userspace uses the coredump socket to process coredumps it needs to
be able to discern connection from the kernel from connects from
userspace (e.g., Python generating it's own coredumps and forwarding
them to systemd). The @coredump_cookie extension uses the SO_COOKIE of
the new connection. This allows userspace to validate that the
connection has been made from the kernel by a crashing task:
fd_coredump = accept4(fd_socket, NULL, NULL, SOCK_CLOEXEC);
getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd, &fd_peer_pidfd_len);
struct pidfd_info info = {
info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP,
};
ioctl(pidfd, PIDFD_GET_INFO, &info);
/* Refuse connections that aren't from a crashing task. */
if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED) )
close(fd_coredump);
/*
* Make sure that the coredump cookie matches the connection cookie.
* If they don't it's not the coredump connection from the kernel.
* We'll get another connection request in a bit.
*/
getsocketop(fd_coredump, SOL_SOCKET, SO_COOKIE, &peer_cookie, &peer_cookie_len);
if (!info.coredump_cookie || (info.coredump_cookie != peer_cookie))
close(fd_coredump);
The kernel guarantees that by the time the connection is made the all
PIDFD_INFO_COREDUMP info is available.
Signed-off-by: Christian Brauner <brauner@...nel.org>
---
fs/coredump.c | 37 ++++++++++++++++++++++-
fs/pidfs.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++
include/linux/net.h | 1 +
include/linux/pidfs.h | 10 +++++++
include/uapi/linux/pidfd.h | 22 ++++++++++++++
net/unix/af_unix.c | 7 +++++
6 files changed, 151 insertions(+), 1 deletion(-)
diff --git a/fs/coredump.c b/fs/coredump.c
index d3599d671c51..ff42688ec9ac 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -46,7 +46,9 @@
#include <linux/pidfs.h>
#include <linux/net.h>
#include <linux/socket.h>
+#include <net/af_unix.h>
#include <net/net_namespace.h>
+#include <net/sock.h>
#include <uapi/linux/pidfd.h>
#include <uapi/linux/un.h>
@@ -599,6 +601,8 @@ static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
if (IS_ERR(pidfs_file))
return PTR_ERR(pidfs_file);
+ pidfs_coredump(cp);
+
/*
* Usermode helpers are childen of either
* system_unbound_wq or of kthreadd. So we know that
@@ -899,15 +903,46 @@ void do_coredump(const kernel_siginfo_t *siginfo)
goto close_fail;
}
+ /*
+ * Set the thread-group leader pid which is used for the
+ * peer credentials during connect() below. Then
+ * immediately register it in pidfs...
+ */
+ cprm.pid = task_tgid(current);
+ retval = pidfs_register_pid(cprm.pid);
+ if (retval) {
+ sock_release(socket);
+ goto close_fail;
+ }
+
+ /*
+ * ... and set the coredump information so userspace
+ * has it available after connect()...
+ */
+ pidfs_coredump(&cprm);
+
+ /*
+ * ... On connect() the peer credentials are recorded
+ * and @cprm.pid registered in pidfs...
+ */
retval = kernel_connect(socket,
(struct sockaddr *)(&coredump_unix_socket),
- COREDUMP_UNIX_SOCKET_ADDR_SIZE, O_NONBLOCK);
+ COREDUMP_UNIX_SOCKET_ADDR_SIZE, O_NONBLOCK |
+ SOCK_COREDUMP);
+
+ /* ... So we can safely put our pidfs reference now... */
+ pidfs_put_pid(cprm.pid);
+
if (retval) {
if (retval == -EAGAIN)
coredump_report_failure("Skipping as coredump socket connection %s couldn't complete immediately", cn.corename);
goto close_fail;
}
+ /* ... and validate that @sk_peer_pid matches @cprm.pid. */
+ if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid))
+ goto close_fail;
+
cprm.limit = RLIM_INFINITY;
cprm.file = no_free_ptr(file);
#else
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 3b39e471840b..848a12c8f9cf 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -20,6 +20,7 @@
#include <linux/time_namespace.h>
#include <linux/utsname.h>
#include <net/net_namespace.h>
+#include <linux/coredump.h>
#include "internal.h"
#include "mount.h"
@@ -33,6 +34,8 @@ static struct kmem_cache *pidfs_cachep __ro_after_init;
struct pidfs_exit_info {
__u64 cgroupid;
__s32 exit_code;
+ __u32 coredump_mask;
+ __u64 coredump_cookie;
};
struct pidfs_inode {
@@ -240,6 +243,22 @@ static inline bool pid_in_current_pidns(const struct pid *pid)
return false;
}
+static __u32 pidfs_coredump_mask(unsigned long mm_flags)
+{
+ switch (__get_dumpable(mm_flags)) {
+ case SUID_DUMP_USER:
+ return PIDFD_COREDUMP_USER;
+ case SUID_DUMP_ROOT:
+ return PIDFD_COREDUMP_ROOT;
+ case SUID_DUMP_DISABLE:
+ return PIDFD_COREDUMP_SKIP;
+ default:
+ WARN_ON_ONCE(true);
+ }
+
+ return 0;
+}
+
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
{
struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
@@ -280,6 +299,12 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
}
}
+ if (mask & PIDFD_INFO_COREDUMP) {
+ kinfo.mask |= PIDFD_INFO_COREDUMP;
+ kinfo.coredump_cookie = READ_ONCE(pidfs_i(inode)->__pei.coredump_cookie);
+ kinfo.coredump_mask = READ_ONCE(pidfs_i(inode)->__pei.coredump_mask);
+ }
+
task = get_pid_task(pid, PIDTYPE_PID);
if (!task) {
/*
@@ -296,6 +321,15 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
if (!c)
return -ESRCH;
+ if (!(kinfo.mask & PIDFD_INFO_COREDUMP)) {
+ task_lock(task);
+ if (task->mm) {
+ kinfo.coredump_cookie = READ_ONCE(pidfs_i(inode)->__pei.coredump_cookie);
+ kinfo.coredump_mask = pidfs_coredump_mask(task->mm->flags);
+ }
+ task_unlock(task);
+ }
+
/* Unconditionally return identifiers and credentials, the rest only on request */
user_ns = current_user_ns();
@@ -559,6 +593,47 @@ void pidfs_exit(struct task_struct *tsk)
}
}
+#if defined(CONFIG_COREDUMP) && defined(CONFIG_UNIX)
+void pidfs_coredump_cookie(struct pid *pid, u64 coredump_cookie)
+{
+ struct pidfs_exit_info *exit_info;
+ struct dentry *dentry = pid->stashed;
+ struct inode *inode;
+
+ if (WARN_ON_ONCE(!dentry))
+ return;
+
+ inode = d_inode(dentry);
+ exit_info = &pidfs_i(inode)->__pei;
+ smp_store_release(&exit_info->coredump_cookie, coredump_cookie);
+}
+#endif
+
+#ifdef CONFIG_COREDUMP
+void pidfs_coredump(const struct coredump_params *cprm)
+{
+ struct pid *pid = cprm->pid;
+ struct pidfs_exit_info *exit_info;
+ struct dentry *dentry;
+ struct inode *inode;
+ __u32 coredump_mask = 0;
+
+ dentry = pid->stashed;
+ if (WARN_ON_ONCE(!dentry))
+ return;
+
+ inode = d_inode(dentry);
+ exit_info = &pidfs_i(inode)->__pei;
+ /* Note how we were coredumped. */
+ coredump_mask = pidfs_coredump_mask(cprm->mm_flags);
+ /* Note that we actually did coredump. */
+ coredump_mask |= PIDFD_COREDUMPED;
+ /* If coredumping is set to skip we should never end up here. */
+ VFS_WARN_ON_ONCE(coredump_mask & PIDFD_COREDUMP_SKIP);
+ smp_store_release(&exit_info->coredump_mask, coredump_mask);
+}
+#endif
+
static struct vfsmount *pidfs_mnt __ro_after_init;
/*
diff --git a/include/linux/net.h b/include/linux/net.h
index 0ff950eecc6b..139c85d0f2ea 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -81,6 +81,7 @@ enum sock_type {
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif
+#define SOCK_COREDUMP O_NOCTTY
#endif /* ARCH_HAS_SOCKET_TYPES */
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 2676890c4d0d..497997bc5e34 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -2,11 +2,21 @@
#ifndef _LINUX_PID_FS_H
#define _LINUX_PID_FS_H
+struct coredump_params;
+
struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
void __init pidfs_init(void);
void pidfs_add_pid(struct pid *pid);
void pidfs_remove_pid(struct pid *pid);
void pidfs_exit(struct task_struct *tsk);
+#ifdef CONFIG_COREDUMP
+void pidfs_coredump(const struct coredump_params *cprm);
+#endif
+#if defined(CONFIG_COREDUMP) && defined(CONFIG_UNIX)
+void pidfs_coredump_cookie(struct pid *pid, u64 coredump_cookie);
+#elif defined(CONFIG_UNIX)
+static inline void pidfs_coredump_cookie(struct pid *pid, u64 coredump_cookie) { }
+#endif
extern const struct dentry_operations pidfs_dentry_operations;
int pidfs_register_pid(struct pid *pid);
void pidfs_get_pid(struct pid *pid);
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 8c1511edd0e9..69267c5ae6d0 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -25,9 +25,28 @@
#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
#define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */
+#define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */
#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
+/*
+ * Values for @coredump_mask in pidfd_info.
+ * Only valid if PIDFD_INFO_COREDUMP is set in @mask.
+ *
+ * Note, the @PIDFD_COREDUMP_ROOT flag indicates that the generated
+ * coredump should be treated as sensitive and access should only be
+ * granted to privileged users.
+ *
+ * If the coredump AF_UNIX socket is used for processing coredumps
+ * @coredump_cookie will be set to the socket SO_COOKIE of the receivers
+ * client socket. This allows the coredump handler to detect whether an
+ * incoming coredump connection was initiated from the crashing task.
+ */
+#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
+#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */
+#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */
+#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */
+
/*
* The concept of process and threads in userland and the kernel is a confusing
* one - within the kernel every thread is a 'task' with its own individual PID,
@@ -92,6 +111,9 @@ struct pidfd_info {
__u32 fsuid;
__u32 fsgid;
__s32 exit_code;
+ __u32 coredump_mask;
+ __u32 __spare1;
+ __u64 coredump_cookie;
};
#define PIDFS_IOCTL_MAGIC 0xFF
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 05e5a4737333..294667b572ee 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -101,6 +101,7 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/pidfs.h>
+#include <linux/sock_diag.h>
#include <linux/coredump.h>
#include <net/af_unix.h>
#include <net/net_namespace.h>
@@ -753,6 +754,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
struct unix_peercred {
struct pid *peer_pid;
+ u64 cookie;
const struct cred *peer_cred;
};
@@ -788,6 +790,8 @@ static void drop_peercred(struct unix_peercred *peercred)
static inline void init_peercred(struct sock *sk,
const struct unix_peercred *peercred)
{
+ if (peercred->cookie)
+ pidfs_coredump_cookie(peercred->peer_pid, peercred->cookie);
sk->sk_peer_pid = peercred->peer_pid;
sk->sk_peer_cred = peercred->peer_cred;
}
@@ -1699,6 +1703,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
unix_peer(newsk) = sk;
newsk->sk_state = TCP_ESTABLISHED;
newsk->sk_type = sk->sk_type;
+ /* Prepare a new socket cookie for the receiver. */
+ if (flags & SOCK_COREDUMP)
+ peercred.cookie = sock_gen_cookie(newsk);
init_peercred(newsk, &peercred);
newu = unix_sk(newsk);
newu->listener = other;
--
2.47.2
Powered by blists - more mailing lists