lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 7 Jul 2021 14:49:41 +0200
From:   Vasily Gorbik <gor@...ux.ibm.com>
To:     Josh Poimboeuf <jpoimboe@...hat.com>,
        Jiri Kosina <jikos@...nel.org>,
        Miroslav Benes <mbenes@...e.cz>, Petr Mladek <pmladek@...e.com>
Cc:     Joe Lawrence <joe.lawrence@...hat.com>,
        Heiko Carstens <hca@...ux.ibm.com>,
        Sven Schnelle <svens@...ux.ibm.com>,
        Sumanth Korikkar <sumanthk@...ux.ibm.com>,
        live-patching@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [RFC PATCH] livepatch: Speed up transition retries

That's just a racy hack for now for demonstration purposes.

On a s390 system with large amount of cpus
klp_try_complete_transition() often cannot be "complete" from the first
attempt. klp_try_complete_transition() schedules itself as delayed work
after a second delay. This accumulates to significant amount of time when
there are large number of livepatching transitions.

This patch tries to minimize this delay to counting processes which still
need to be transitioned and then scheduling
klp_try_complete_transition() right away.

For s390 LPAR with 128 cpu this reduces livepatch kselftest run time
from
real    1m11.837s
user    0m0.603s
sys     0m10.940s

to
real    0m14.550s
user    0m0.420s
sys     0m5.779s

And qa_test_klp run time from
real    5m15.950s
user    0m34.447s
sys     15m11.345s

to
real    3m51.987s
user    0m27.074s
sys     9m37.301s

Would smth like that be useful for production use cases?
Any ideas how to approach that more gracefully?

Signed-off-by: Vasily Gorbik <gor@...ux.ibm.com>
---
 kernel/livepatch/transition.c | 41 +++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 793eba46e970..fc4bb7a4a116 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -26,6 +26,8 @@ static int klp_target_state = KLP_UNDEFINED;
 
 static unsigned int klp_signals_cnt;
 
+static atomic_t klp_procs;
+
 /*
  * This work can be performed periodically to finish patching or unpatching any
  * "straggler" tasks which failed to transition in the first attempt.
@@ -181,8 +183,15 @@ void klp_update_patch_state(struct task_struct *task)
 	 *    of func->transition, if klp_ftrace_handler() is called later on
 	 *    the same CPU.  See __klp_disable_patch().
 	 */
-	if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING))
+	if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING)) {
 		task->patch_state = READ_ONCE(klp_target_state);
+		if (atomic_read(&klp_procs) == 0)
+			pr_err("klp_procs misaccounting\n");
+		else if (atomic_sub_return(1, &klp_procs) == 0) {
+			if (delayed_work_pending(&klp_transition_work))
+				mod_delayed_work(system_wq, &klp_transition_work, 0);
+		}
+	}
 
 	preempt_enable_notrace();
 }
@@ -320,7 +329,8 @@ static bool klp_try_switch_task(struct task_struct *task)
 
 	success = true;
 
-	clear_tsk_thread_flag(task, TIF_PATCH_PENDING);
+	if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING))
+		atomic_sub(1, &klp_procs);
 	task->patch_state = klp_target_state;
 
 done:
@@ -402,11 +412,6 @@ void klp_try_complete_transition(void)
 	 * Usually this will transition most (or all) of the tasks on a system
 	 * unless the patch includes changes to a very common function.
 	 */
-	read_lock(&tasklist_lock);
-	for_each_process_thread(g, task)
-		if (!klp_try_switch_task(task))
-			complete = false;
-	read_unlock(&tasklist_lock);
 
 	/*
 	 * Ditto for the idle "swapper" tasks.
@@ -424,10 +429,17 @@ void klp_try_complete_transition(void)
 			/* offline idle tasks can be switched immediately */
 			clear_tsk_thread_flag(task, TIF_PATCH_PENDING);
 			task->patch_state = klp_target_state;
+			atomic_sub(1, &klp_procs);
 		}
 	}
 	put_online_cpus();
 
+	read_lock(&tasklist_lock);
+	for_each_process_thread(g, task)
+		if (!klp_try_switch_task(task))
+			complete = false;
+	read_unlock(&tasklist_lock);
+
 	if (!complete) {
 		if (klp_signals_cnt && !(klp_signals_cnt % SIGNALS_TIMEOUT))
 			klp_send_signals();
@@ -438,8 +450,8 @@ void klp_try_complete_transition(void)
 		 * later and/or wait for other methods like kernel exit
 		 * switching.
 		 */
-		schedule_delayed_work(&klp_transition_work,
-				      round_jiffies_relative(HZ));
+		schedule_delayed_work(&klp_transition_work, atomic_read(&klp_procs) ?
+				      round_jiffies_relative(HZ) : 0);
 		return;
 	}
 
@@ -473,6 +485,7 @@ void klp_start_transition(void)
 		  klp_transition_patch->mod->name,
 		  klp_target_state == KLP_PATCHED ? "patching" : "unpatching");
 
+	atomic_set(&klp_procs, 0);
 	/*
 	 * Mark all normal tasks as needing a patch state update.  They'll
 	 * switch either in klp_try_complete_transition() or as they exit the
@@ -480,8 +493,10 @@ void klp_start_transition(void)
 	 */
 	read_lock(&tasklist_lock);
 	for_each_process_thread(g, task)
-		if (task->patch_state != klp_target_state)
+		if (task->patch_state != klp_target_state) {
 			set_tsk_thread_flag(task, TIF_PATCH_PENDING);
+			atomic_inc(&klp_procs);
+		}
 	read_unlock(&tasklist_lock);
 
 	/*
@@ -491,8 +506,10 @@ void klp_start_transition(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		task = idle_task(cpu);
-		if (task->patch_state != klp_target_state)
+		if (task->patch_state != klp_target_state) {
 			set_tsk_thread_flag(task, TIF_PATCH_PENDING);
+			atomic_inc(&klp_procs);
+		}
 	}
 
 	klp_signals_cnt = 0;
@@ -614,6 +631,8 @@ void klp_reverse_transition(void)
 void klp_copy_process(struct task_struct *child)
 {
 	child->patch_state = current->patch_state;
+	if (child->patch_state != klp_target_state)
+		atomic_add(1, &klp_procs);
 
 	/* TIF_PATCH_PENDING gets copied in setup_thread_stack() */
 }
-- 
2.25.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ