lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <6a45d8bf01c241a9a87c8f71e83a0c79@AcuMS.aculab.com>
Date: Wed, 9 Oct 2024 14:01:17 +0000
From: David Laight <David.Laight@...LAB.COM>
To: "'luca.boccassi@...il.com'" <luca.boccassi@...il.com>,
	"linux-fsdevel@...r.kernel.org" <linux-fsdevel@...r.kernel.org>
CC: "christian@...uner.io" <christian@...uner.io>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"oleg@...hat.com" <oleg@...hat.com>
Subject: RE: [PATCH v10] pidfd: add ioctl to retrieve pid info

From: luca.boccassi@...il.com
> Sent: 08 October 2024 14:20
> 
> A common pattern when using pid fds is having to get information
> about the process, which currently requires /proc being mounted,
> resolving the fd to a pid, and then do manual string parsing of
> /proc/N/status and friends. This needs to be reimplemented over
> and over in all userspace projects (e.g.: I have reimplemented
> resolving in systemd, dbus, dbus-daemon, polkit so far), and
> requires additional care in checking that the fd is still valid
> after having parsed the data, to avoid races.
> 
> Having a programmatic API that can be used directly removes all
> these requirements, including having /proc mounted.
> 
> As discussed at LPC24, add an ioctl with an extensible struct
> so that more parameters can be added later if needed. Start with
> returning pid/tgid/ppid and creds unconditionally, and cgroupid
> optionally.
...
>  fs/pidfs.c                                    | 91 ++++++++++++++++++-
>  include/uapi/linux/pidfd.h                    | 31 +++++++
>  .../testing/selftests/pidfd/pidfd_open_test.c | 78 +++++++++++++++-
>  3 files changed, 196 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/pidfs.c b/fs/pidfs.c
> index 80675b6bf884..3b24124a4f41 100644
> --- a/fs/pidfs.c
> +++ b/fs/pidfs.c
> @@ -2,6 +2,7 @@
>  #include <linux/anon_inodes.h>
>  #include <linux/file.h>
>  #include <linux/fs.h>
> +#include <linux/cgroup.h>
>  #include <linux/magic.h>
>  #include <linux/mount.h>
>  #include <linux/pid.h>
> @@ -114,6 +115,86 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
>  	return poll_flags;
>  }
> 
> +static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg)
> +{
> +	struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
> +	size_t usize = _IOC_SIZE(cmd);
> +	struct pidfd_info kinfo = {};
> +	struct user_namespace *user_ns;
> +	const struct cred *c;
> +	__u64 request_mask;
> +
> +	if (!uinfo)
> +		return -EINVAL;
> +	if (usize < sizeof(struct pidfd_info))
> +		return -EINVAL; /* First version, no smaller struct possible */

That is the wrong test, if/when someone allocates an extra field it will
be wrong.
You could use an explicit 64 (the size of the initial version) or something
that ends up being 8.

> +
> +	if (copy_from_user(&request_mask, &uinfo->request_mask, sizeof(request_mask)))
> +		return -EFAULT;

You should probably use get_user() here.

> +
> +	c = get_task_cred(task);
> +	if (!c)
> +		return -ESRCH;
> +
> +	/* Unconditionally return identifiers and credentials, the rest only on request */
> +
> +	user_ns = current_user_ns();
> +	kinfo.ruid = from_kuid_munged(user_ns, c->uid);
> +	kinfo.rgid = from_kgid_munged(user_ns, c->gid);
> +	kinfo.euid = from_kuid_munged(user_ns, c->euid);
> +	kinfo.egid = from_kgid_munged(user_ns, c->egid);
> +	kinfo.suid = from_kuid_munged(user_ns, c->suid);
> +	kinfo.sgid = from_kgid_munged(user_ns, c->sgid);
> +	kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid);
> +	kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid);
> +	put_cred(c);
> +
> +#ifdef CONFIG_CGROUPS
> +	if (request_mask & PIDFD_INFO_CGROUPID) {
> +		/*
> +		 * The cgroup id cannot be retrieved anymore after the task has exited
> +		 * (even if it has not been reaped yet), contrary to other fields. Set
> +		 * the flag only if we can still access it. */
> +		struct cgroup *cgrp;
> +
> +		guard(rcu)();
> +		cgrp = task_cgroup(task, pids_cgrp_id);
> +		if (cgrp) {
> +			kinfo.cgroupid = cgroup_id(cgrp);
> +			kinfo.request_mask |= PIDFD_INFO_CGROUPID;
> +		}
> +	}
> +#endif
> +
> +	/*
> +	 * Copy pid/tgid last, to reduce the chances the information might be
> +	 * stale. Note that it is not possible to ensure it will be valid as the
> +	 * task might return as soon as the copy_to_user finishes, but that's ok
> +	 * and userspace expects that might happen and can act accordingly, so
> +	 * this is just best-effort. What we can do however is checking that all
> +	 * the fields are set correctly, or return ESRCH to avoid providing
> +	 * incomplete information. */
> +
> +	kinfo.ppid = task_ppid_nr_ns(task, NULL);
> +	kinfo.tgid = task_tgid_vnr(task);
> +	kinfo.pid = task_pid_vnr(task);
> +
> +	if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1))
> +		return -ESRCH;
> +
> +	/*
> +	 * If userspace and the kernel have the same struct size it can just
> +	 * be copied. If userspace provides an older struct, only the bits that
> +	 * userspace knows about will be copied. If userspace provides a new
> +	 * struct, only the bits that the kernel knows about will be copied and
> +	 * the size value will be set to the size the kernel knows about.

There is no 'size value'.
I'm not 100% sure but I think you should zero the rest of the user structure.

Also if more 'unconditional' fields are added the application won't be
able to easily tell if they are unsupported or just happen to be zero.
So maybe for consistency there should be PIDFD_INFO_xxx bits for the uids
and pids that indicate that the values are valid - even if they are always
set in the response.

> +	 */
> +	if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo))))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>  static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  {
>  	struct task_struct *task __free(put_task) = NULL;
> @@ -122,13 +203,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  	struct ns_common *ns_common = NULL;
>  	struct pid_namespace *pid_ns;
> 
> -	if (arg)
> -		return -EINVAL;
> -
>  	task = get_pid_task(pid, PIDTYPE_PID);
>  	if (!task)
>  		return -ESRCH;
> 
> +	/* Extensible IOCTL that does not open namespace FDs, take a shortcut */
> +	if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO))
> +		return pidfd_info(task, cmd, arg);
> +
> +	if (arg)
> +		return -EINVAL;
> +
>  	scoped_guard(task_lock, task) {
>  		nsp = task->nsproxy;
>  		if (nsp)
> diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
> index 565fc0629fff..5501cd808517 100644
> --- a/include/uapi/linux/pidfd.h
> +++ b/include/uapi/linux/pidfd.h
> @@ -16,6 +16,36 @@
>  #define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
>  #define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)
> 
> +/* Flags for pidfd_info. */
> +#define PIDFD_INFO_CGROUPID		(1UL << 0)
> +
> +struct pidfd_info {
> +	/* Let userspace request expensive stuff explictly, and let the kernel
> +	 * indicate whether it knows about it, and/or whether it can still retrieve
> +	 * it (e.g.: the cgroup id is not available after the task has exited). */
> +	__u64 request_mask;
> +	/*
> +	 * The information contained in the following fields might be stale at the
> +	 * time it is received, as the target process might have exited as soon as
> +	 * the IOCTL was processed, and there is no way to avoid that. However, it
> +	 * is guaranteed that if the call was successful, then the information was
> +	 * correct and referred to the intended process at the time the work was
> +	 * performed. */
> +	__u64 cgroupid;
> +	__u32 pid;
> +	__u32 tgid;
> +	__u32 ppid;
> +	__u32 ruid;
> +	__u32 rgid;
> +	__u32 euid;
> +	__u32 egid;
> +	__u32 suid;
> +	__u32 sgid;
> +	__u32 fsuid;
> +	__u32 fsgid;
> +	__u32 spare0[1];

That should be __u32 pad;
Although it may not even be needed.
It shouldn't matter if the ioctl from a 32bit app is 4 bytes short.
Any extension can safely add extra fields at the end.

	David

> +};
> +
>  #define PIDFS_IOCTL_MAGIC 0xFF
> 
>  #define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
> @@ -28,5 +58,6 @@
>  #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
>  #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
>  #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
> +#define PIDFD_GET_INFO                        _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
> 
>  #endif /* _UAPI_LINUX_PIDFD_H */
> diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c
> b/tools/testing/selftests/pidfd/pidfd_open_test.c
> index c62564c264b1..036e517246a3 100644
> --- a/tools/testing/selftests/pidfd/pidfd_open_test.c
> +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
> @@ -13,6 +13,7 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <syscall.h>
> +#include <sys/ioctl.h>
>  #include <sys/mount.h>
>  #include <sys/prctl.h>
>  #include <sys/wait.h>
> @@ -21,6 +22,32 @@
>  #include "pidfd.h"
>  #include "../kselftest.h"
> 

I'd comment that the definitions below are needed to compile
with old userspace.
(Do the selftests actually need that?)

	David

> +#ifndef PIDFS_IOCTL_MAGIC
> +#define PIDFS_IOCTL_MAGIC 0xFF
> +#endif
> +
> +#ifndef PIDFD_GET_INFO
> +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
> +#define PIDFD_INFO_CGROUPID		(1UL << 0)
> +
> +struct pidfd_info {
> +	__u64 request_mask;
> +	__u64 cgroupid;
> +	__u32 pid;
> +	__u32 tgid;
> +	__u32 ppid;
> +	__u32 ruid;
> +	__u32 rgid;
> +	__u32 euid;
> +	__u32 egid;
> +	__u32 suid;
> +	__u32 sgid;
> +	__u32 fsuid;
> +	__u32 fsgid;
> +	__u32 spare0[1];
> +};
> +#endif
> +
>  static int safe_int(const char *numstr, int *converted)
>  {
>  	char *err = NULL;
> @@ -120,10 +147,13 @@ static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
> 
>  int main(int argc, char **argv)
>  {
> +	struct pidfd_info info = {
> +		.request_mask = PIDFD_INFO_CGROUPID,
> +	};
>  	int pidfd = -1, ret = 1;
>  	pid_t pid;
> 
> -	ksft_set_plan(3);
> +	ksft_set_plan(4);
> 
>  	pidfd = sys_pidfd_open(-1, 0);
>  	if (pidfd >= 0) {
> @@ -153,6 +183,52 @@ int main(int argc, char **argv)
>  	pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1);
>  	ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid);
> 
> +	if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) {
> +		ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno));
> +		goto on_error;
> +	}
> +	if (info.pid != pid) {
> +		ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n",
> +			       pid, info.pid);
> +		goto on_error;
> +	}
> +	if (info.ppid != getppid()) {
> +		ksft_print_msg("ppid %d does not match ppid from ioctl %d\n",
> +			       pid, info.pid);
> +		goto on_error;
> +	}
> +	if (info.ruid != getuid()) {
> +		ksft_print_msg("uid %d does not match uid from ioctl %d\n",
> +			       getuid(), info.ruid);
> +		goto on_error;
> +	}
> +	if (info.rgid != getgid()) {
> +		ksft_print_msg("gid %d does not match gid from ioctl %d\n",
> +			       getgid(), info.rgid);
> +		goto on_error;
> +	}
> +	if (info.euid != geteuid()) {
> +		ksft_print_msg("euid %d does not match euid from ioctl %d\n",
> +			       geteuid(), info.euid);
> +		goto on_error;
> +	}
> +	if (info.egid != getegid()) {
> +		ksft_print_msg("egid %d does not match egid from ioctl %d\n",
> +			       getegid(), info.egid);
> +		goto on_error;
> +	}
> +	if (info.suid != geteuid()) {
> +		ksft_print_msg("suid %d does not match suid from ioctl %d\n",
> +			       geteuid(), info.suid);
> +		goto on_error;
> +	}
> +	if (info.sgid != getegid()) {
> +		ksft_print_msg("sgid %d does not match sgid from ioctl %d\n",
> +			       getegid(), info.sgid);
> +		goto on_error;
> +	}
> +	ksft_test_result_pass("get info from pidfd test: passed\n");
> +
>  	ret = 0;
> 
>  on_error:
> 
> base-commit: 8cf0b93919e13d1e8d4466eb4080a4c4d9d66d7b
> --
> 2.45.2
> 

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ