[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <94c6ecc5c659b54f62fd7930dd9026a7ebf8e53b.1727644404.git.lorenzo.stoakes@oracle.com>
Date: Mon, 30 Sep 2024 10:22:29 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Christian Brauner <christian@...uner.io>
Cc: Shuah Khan <shuah@...nel.org>,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Suren Baghdasaryan <surenb@...gle.com>,
Vlastimil Babka <vbabka@...e.cz>, pedro.falcato@...il.com,
linux-kselftest@...r.kernel.org, linux-mm@...ck.org,
linux-fsdevel@...r.kernel.org, linux-api@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [RFC PATCH 2/3] pidfd: add PIDFD_SELF sentinel to refer to own process
Add a PIDFD_SELF special sentinel value which can be passed as a pidfd to
indicate that the current process is to be targeted (as this is a process
from the userland perspective, this means from the kernel's perspective the
current thread group leader is to be targeted).
Due to the refactoring of the central __pidfd_get_pid() function we can
implement this functionality centrally, providing the use of this sentinel
in most functionality which utilises pidfd's.
We need to explicitly adjust kernel_waitid_prepare() to permit this (though
it wouldn't really make sense to use this there, we provide the ability for
consistency).
We explicitly disallow use of this in setns(), which would otherwise have
required explicit custom handling, as it doesn't make sense to set the
current calling thread to join the namespace of itself.
As the callers of pidfd_get_pid() expect an increased reference count on
the pid we do so in this case even for the current thread leader, reducing
churn and avoiding any breakage from existing logic which decrements this
reference count.
In the pidfd_send_signal() system call, we can continue to fdput() the
struct fd output by pidfs_to_pid_proc() even if PIDFD_SELF is specified, as
this will be empty and the invocation will be a no-op.
This change implicitly provides PIDFD_SELF support in the waitid(P_PIDFS,
...), process_madvise(), process_mrelease(), pidfd_send_signal(), and
pidfd_getfd() system calls.
Things such as polling a pidfs and general fd operations are not supported,
this strictly provides the sentinel for APIs which explicitly accept a
pidfd.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
---
include/linux/pid.h | 9 +++---
include/uapi/linux/pidfd.h | 3 ++
kernel/exit.c | 3 +-
kernel/nsproxy.c | 1 +
kernel/pid.c | 63 ++++++++++++++++++++++----------------
5 files changed, 47 insertions(+), 32 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 68b02eab7509..e95dd7b81ae2 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -77,18 +77,19 @@ struct file;
/**
* __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd.
*
- * @pidfd: The pidfd whose pid we want, or the fd of a /proc/<pid> file if
- * @alloc_proc is also set.
+ * @pidfd: The pidfd whose pid we want, the fd of a /proc/<pid> file if
+ * @alloc_proc is also set, or PIDFD_SELF if the pid we require
+ * is the thread group leader.
* @pin_pid: If set, then the reference counter of the returned pid is
* incremented. If not set, then @fd should be provided to pin the
* pidfd.
* @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead
* of a pidfd, and this will be used to determine the pid.
* @flags: Output variable, if non-NULL, then the file->f_flags of the
- * pidfd will be set here.
+ * pidfd will be set here. If PIDFD_SELF set, this is zero.
* @fd: Output variable, if non-NULL, then the pidfd reference will
* remain elevated and the caller will need to decrement it
- * themselves.
+ * themselves. If PIDFD_SELF set, this is empty.
*
* Returns: If successful, the pid associated with the pidfd, otherwise an
* error.
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 565fc0629fff..0bff5276b51d 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -29,4 +29,7 @@
#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
+#define PIDFD_SELF -200 /* Special sentinel value which can be passed as a pidfd
+ * to refer to the current thread group leader. */
+
#endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 619f0014c33b..fc10eb2b180c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -71,6 +71,7 @@
#include <linux/user_events.h>
#include <linux/uaccess.h>
+#include <uapi/linux/pidfd.h>
#include <uapi/linux/wait.h>
#include <asm/unistd.h>
@@ -1739,7 +1740,7 @@ int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
break;
case P_PIDFD:
type = PIDTYPE_PID;
- if (upid < 0)
+ if (upid < 0 && upid != PIDFD_SELF)
return -EINVAL;
pid = pidfd_get_pid(upid, &f_flags);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index dc952c3b05af..aee86fcaf670 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -550,6 +550,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, flags)
struct nsset nsset = {};
int err = 0;
+ /* If fd is PIDFD_SELF, implicitly fail here, as invalid. */
if (!fd_file(f))
return -EBADF;
diff --git a/kernel/pid.c b/kernel/pid.c
index 26e2581210c4..9c037441afb8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -539,23 +539,28 @@ struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid,
bool allow_proc, unsigned int *flags,
struct fd *fd)
{
- struct fd f;
struct pid *pid;
- struct file *file;
-
- f = fdget(fd);
- file = fd_file(f);
- if (!file)
- return ERR_PTR(-EBADF);
-
- pid = pidfd_pid(file);
- /* If we allow opening a pidfd via /proc/<pid>, do so. */
- if (IS_ERR(pid) && allow_proc)
- pid = tgid_pidfd_to_pid(file);
-
- if (IS_ERR(pid)) {
- fdput(f);
- return pid;
+ struct fd f = {};
+ struct file *file = NULL;
+ unsigned int f_flags = 0;
+
+ if (pidfd == PIDFD_SELF) {
+ pid = *task_pid_ptr(current, PIDTYPE_TGID);
+ } else {
+ f = fdget(pidfd);
+ file = fd_file(f);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ pid = pidfd_pid(file);
+ /* If we allow opening a pidfd via /proc/<pid>, do so. */
+ if (IS_ERR(pid) && allow_proc)
+ pid = tgid_pidfd_to_pid(file);
+
+ if (IS_ERR(pid)) {
+ fdput(f);
+ return pid;
+ }
}
if (pin_pid)
@@ -563,18 +568,22 @@ struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid,
else
WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */
- if (flags)
- *flags = file->f_flags;
+ if (file) {
+ f_flags = file->f_flags;
- /*
- * If the user provides an fd output then it will handle decrementing
- * its reference counter.
- */
- if (fd)
- *fd = f;
- else
- /* Otherwise we release it. */
- fdput(f);
+ /*
+ * If the user provides an fd output then it will handle decrementing
+ * its reference counter.
+ */
+ if (fd)
+ *fd = f;
+ else
+ /* Otherwise we release it. */
+ fdput(f);
+ }
+
+ if (flags)
+ *flags = f_flags;
return pid;
}
--
2.46.2
Powered by blists - more mailing lists