lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 11 Mar 2021 17:38:23 -0600
From:   Jim Newsome <jnewsome@...project.org>
To:     Andrew Morton <akpm@...ux-foundation.org>
Cc:     Oleg Nesterov <oleg@...hat.com>,
        "Eric W . Biederman" <ebiederm@...ssion.com>,
        Christian Brauner <christian@...uner.io>,
        linux-kernel@...r.kernel.org, Jim Newsome <jnewsome@...project.org>
Subject: [PATCH v4] do_wait: make PIDTYPE_PID case O(1) instead of O(n)

do_wait is an internal function used to implement waitpid, waitid,
wait4, etc. To handle the general case, it does an O(n) linear scan of
the thread group's children and tracees.

This patch adds a special-case when waiting on a pid to skip these scans
and instead do an O(1) lookup. This improves performance when waiting on
a pid from a thread group with many children and/or tracees.

Signed-off-by: James Newsome <jnewsome@...project.org>
Reviewed-by: Oleg Nesterov <oleg@...hat.com>
---

v3: https://lkml.org/lkml/2021/3/9/1134

Oleg - I kept your "reviewed-by", but LMK if I should drop it; wasn't
sure whether these changes were enough to have to drop it or not. Also
while making the other requested changes I found the code was cleaner
with a helper function after all, which I named `is_effectively_child`.

 kernel/exit.c | 73 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 10 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 04029e35e69a..e0fd782463c5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1439,9 +1439,54 @@ void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
 			   TASK_INTERRUPTIBLE, p);
 }
 
+static bool is_effectively_child(struct wait_opts *wo, bool ptrace,
+				 struct task_struct *target)
+{
+	struct task_struct *parent =
+		!ptrace ? target->real_parent : target->parent;
+
+	return current == parent || (!(wo->wo_flags & __WNOTHREAD) &&
+				     same_thread_group(current, parent));
+}
+
+/*
+ * Optimization for waiting on PIDTYPE_PID. No need to iterate through child
+ * and tracee lists to find the target task.
+ */
+static int do_wait_pid(struct wait_opts *wo)
+{
+	bool ptrace;
+	struct task_struct *target;
+	int retval;
+
+	ptrace = false;
+
+	/* A non-ptrace wait can only be performed on a thread group leader. */
+	target = pid_task(wo->wo_pid, PIDTYPE_TGID);
+
+	if (target && is_effectively_child(wo, ptrace, target)) {
+		retval = wait_consider_task(wo, ptrace, target);
+		if (retval)
+			return retval;
+	}
+
+	ptrace = true;
+
+	/* A ptrace wait can be done on non-thread-group-leaders. */
+	if (!target)
+		target = pid_task(wo->wo_pid, PIDTYPE_PID);
+
+	if (target && is_effectively_child(wo, ptrace, target)) {
+		retval = wait_consider_task(wo, ptrace, target);
+		if (retval)
+			return retval;
+	}
+
+	return 0;
+}
+
 static long do_wait(struct wait_opts *wo)
 {
-	struct task_struct *tsk;
 	int retval;
 
 	trace_sched_process_wait(wo->wo_pid);
@@ -1463,19 +1508,27 @@ static long do_wait(struct wait_opts *wo)
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	read_lock(&tasklist_lock);
-	tsk = current;
-	do {
-		retval = do_wait_thread(wo, tsk);
-		if (retval)
-			goto end;
 
-		retval = ptrace_do_wait(wo, tsk);
+	if (wo->wo_type == PIDTYPE_PID) {
+		retval = do_wait_pid(wo);
 		if (retval)
 			goto end;
+	} else {
+		struct task_struct *tsk = current;
+
+		do {
+			retval = do_wait_thread(wo, tsk);
+			if (retval)
+				goto end;
 
-		if (wo->wo_flags & __WNOTHREAD)
-			break;
-	} while_each_thread(current, tsk);
+			retval = ptrace_do_wait(wo, tsk);
+			if (retval)
+				goto end;
+
+			if (wo->wo_flags & __WNOTHREAD)
+				break;
+		} while_each_thread(current, tsk);
+	}
 	read_unlock(&tasklist_lock);
 
 notask:
-- 
2.30.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ