lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250409-rohstoff-ungnade-d1afa571f32c@brauner>
Date: Wed,  9 Apr 2025 20:18:58 +0200
From: Christian Brauner <brauner@...nel.org>
To: Oleg Nesterov <oleg@...hat.com>
Cc: Christian Brauner <brauner@...nel.org>,
	linux-fsdevel@...r.kernel.org,
	Jeff Layton <jlayton@...nel.org>,
	Lennart Poettering <lennart@...ttering.net>,
	Daan De Meyer <daan.j.demeyer@...il.com>,
	Mike Yuan <me@...dnzj.com>,
	linux-kernel@...r.kernel.org,
	Peter Ziljstra <peterz@...radead.org>
Subject: [RFC PATCH] pidfs: ensure consistent ENOENT/ESRCH reporting

In a prior patch series we tried to cleanly differentiate between:

(1) The task has already been reaped.
(2) The caller requested a pidfd for a thread-group leader but the pid
    actually references a struct pid that isn't used as a thread-group
    leader.

as this was causing issues for non-threaded workloads.

But there's cases where the current simple logic is wrong. Specifically,
if the pid was a leader pid and the check races with __unhash_process().

Stabilize this by using a sequence counter associated with tasklist_lock
and retry while we're in __unhash_process(). The seqcounter might be
useful independent of pidfs.

Signed-off-by: Christian Brauner <brauner@...nel.org>
---
 include/linux/pid.h |  1 +
 kernel/exit.c       | 11 +++++++++++
 kernel/fork.c       | 22 ++++++++++++----------
 kernel/pid.c        |  1 +
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 311ecebd7d56..b54a4c1ef602 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -65,6 +65,7 @@ struct pid
 	struct hlist_head inodes;
 	/* wait queue for pidfd notifications */
 	wait_queue_head_t wait_pidfd;
+	seqcount_rwlock_t pid_seq;
 	struct rcu_head rcu;
 	struct upid numbers[];
 };
diff --git a/kernel/exit.c b/kernel/exit.c
index 1b51dc099f1e..8050572fe682 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -133,17 +133,28 @@ struct release_task_post {
 static void __unhash_process(struct release_task_post *post, struct task_struct *p,
 			     bool group_dead)
 {
+	struct pid *pid;
+
+	lockdep_assert_held_write(&tasklist_lock);
+
 	nr_threads--;
+
+	pid = task_pid(p);
+	raw_write_seqcount_begin(&pid->pid_seq);
 	detach_pid(post->pids, p, PIDTYPE_PID);
 	if (group_dead) {
 		detach_pid(post->pids, p, PIDTYPE_TGID);
 		detach_pid(post->pids, p, PIDTYPE_PGID);
 		detach_pid(post->pids, p, PIDTYPE_SID);
+	}
+	raw_write_seqcount_end(&pid->pid_seq);
 
+	if (group_dead) {
 		list_del_rcu(&p->tasks);
 		list_del_init(&p->sibling);
 		__this_cpu_dec(process_counts);
 	}
+
 	list_del_rcu(&p->thread_node);
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 4a2080b968c8..1480bf6f5f38 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2109,24 +2109,26 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
 int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
 {
 	int err = 0;
+	unsigned int seq;
 
-	if (!(flags & PIDFD_THREAD)) {
+	do {
+		seq = raw_seqcount_begin(&pid->pid_seq);
 		/*
 		 * If this is struct pid isn't used as a thread-group
 		 * leader pid but the caller requested to create a
 		 * thread-group leader pidfd then report ENOENT to the
 		 * caller as a hint.
 		 */
-		if (!pid_has_task(pid, PIDTYPE_TGID))
+		if (!(flags & PIDFD_THREAD) && !pid_has_task(pid, PIDTYPE_TGID))
 			err = -ENOENT;
-	}
-
-	/*
-	 * If this wasn't a thread-group leader struct pid or the task
-	 * got reaped in the meantime report -ESRCH to userspace.
-	 */
-	if (!pid_has_task(pid, PIDTYPE_PID))
-		err = -ESRCH;
+		/*
+		 * If this wasn't a thread-group leader struct pid or
+		 * the task got reaped in the meantime report -ESRCH to
+		 * userspace.
+		 */
+		if (!pid_has_task(pid, PIDTYPE_PID))
+			err = -ESRCH;
+	} while (read_seqcount_retry(&pid->pid_seq, seq));
 	if (err)
 		return err;
 
diff --git a/kernel/pid.c b/kernel/pid.c
index 4ac2ce46817f..bbca61f62faa 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -271,6 +271,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 	upid = pid->numbers + ns->level;
 	idr_preload(GFP_KERNEL);
 	spin_lock(&pidmap_lock);
+	seqcount_rwlock_init(&pid->pid_seq, &tasklist_lock);
 	if (!(ns->pid_allocated & PIDNS_ADDING))
 		goto out_unlock;
 	pidfs_add_pid(pid);
-- 
2.47.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ