linux-kernel - [PATCH v2 5/5] kmod: throttle kmod thread limit

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170526001630.19203-6-mcgrof@kernel.org>
Date:   Thu, 25 May 2017 17:16:30 -0700
From:   "Luis R. Rodriguez" <mcgrof@...nel.org>
To:     akpm@...ux-foundation.org, jeyu@...hat.com, shuah@...nel.org,
        rusty@...tcorp.com.au, ebiederm@...ssion.com,
        dmitry.torokhov@...il.com, acme@...hat.com, corbet@....net
Cc:     martin.wilck@...e.com, mmarek@...e.com, pmladek@...e.com,
        hare@...e.com, rwright@....com, jeffm@...e.com, DSterba@...e.com,
        fdmanana@...e.com, neilb@...e.com, linux@...ck-us.net,
        rgoldwyn@...e.com, subashab@...eaurora.org, xypron.glpk@....de,
        keescook@...omium.org, atomlin@...hat.com, mbenes@...e.cz,
        paulmck@...ux.vnet.ibm.com, dan.j.williams@...el.com,
        jpoimboe@...hat.com, davem@...emloft.net, mingo@...hat.com,
        alan@...ux.intel.com, tytso@....edu, gregkh@...uxfoundation.org,
        torvalds@...ux-foundation.org, linux-kselftest@...r.kernel.org,
        linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org,
        "Luis R. Rodriguez" <mcgrof@...nel.org>
Subject: [PATCH v2 5/5] kmod: throttle kmod thread limit

If we reach the limit of modprobe_limit threads running the next
request_module() call will fail. The original reason for adding
a kill was to do away with possible issues with in old circumstances
which would create a recursive series of request_module() calls.
We can do better than just be super aggressive and reject calls
once we've reached the limit by simply making pending callers wait
until the threshold has been reduced.

The only difference is the clutch helps with avoiding making
request_module() requests fatal more often. With x86_64 qemu,
with 4 cores, 4 GiB of RAM it takes the following run time to
run both tests:

time kmod.sh -t 0008
real    0m14.066s
user    0m1.403s
sys     0m5.837s

time kmod.sh -t 0009
real    0m53.928s
user    0m1.271s
sys     0m7.343s

Signed-off-by: Luis R. Rodriguez <mcgrof@...nel.org>
---
 kernel/kmod.c                        | 15 +++++----------
 tools/testing/selftests/kmod/kmod.sh | 24 ++----------------------
 2 files changed, 7 insertions(+), 32 deletions(-)

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 17de776cf368..6cd4c88ab98d 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -59,6 +59,7 @@ static DECLARE_RWSEM(umhelper_sem);
 #ifdef CONFIG_MODULES
 static atomic_t kmod_concurrent_max = ATOMIC_INIT(0);
 #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
+static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
 
 /*
 	modprobe_path is set via /proc/sys.
@@ -142,7 +143,6 @@ int __request_module(bool wait, const char *fmt, ...)
 	va_list args;
 	char module_name[MODULE_NAME_LEN];
 	int ret;
-	static int kmod_loop_msg;
 
 	/*
 	 * We don't allow synchronous module loading from async.  Module
@@ -166,15 +166,9 @@ int __request_module(bool wait, const char *fmt, ...)
 		return ret;
 
 	if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
-		/* We may be blaming an innocent here, but unlikely */
-		if (kmod_loop_msg < 5) {
-			printk(KERN_ERR
-			       "request_module: runaway loop modprobe %s\n",
-			       module_name);
-			kmod_loop_msg++;
-		}
-		atomic_dec(&kmod_concurrent);
-		return -ENOMEM;
+		pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s\n, throttling...",
+				    atomic_read(&kmod_concurrent_max), max_modprobes, module_name);
+		wait_event_interruptible(kmod_wq, atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
 	}
 
 	trace_module_request(module_name, wait, _RET_IP_);
@@ -182,6 +176,7 @@ int __request_module(bool wait, const char *fmt, ...)
 	ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
 
 	atomic_inc(&kmod_concurrent_max);
+	wake_up_all(&kmod_wq);
 
 	return ret;
 }
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 10196a62ed09..8cecae9a8bca 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -59,28 +59,8 @@ ALL_TESTS="$ALL_TESTS 0004:1:1"
 ALL_TESTS="$ALL_TESTS 0005:10:1"
 ALL_TESTS="$ALL_TESTS 0006:10:1"
 ALL_TESTS="$ALL_TESTS 0007:5:1"
-
-# Disabled tests:
-#
-# 0008 x 150 -  multithreaded - push kmod_concurrent over max_modprobes for request_module()"
-# Current best-effort failure interpretation:
-# Enough module requests get loaded in place fast enough to reach over the
-# max_modprobes limit and trigger a failure -- before we're even able to
-# start processing pending requests.
-ALL_TESTS="$ALL_TESTS 0008:150:0"
-
-# 0009 x 150 - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
-# Current best-effort failure interpretation:
-#
-# get_fs_type() requests modules using aliases as such the optimization in
-# place today to look for already loaded modules will not take effect and
-# we end up requesting a new module to load, this bumps the kmod_concurrent,
-# and in certain circumstances can lead to pushing the kmod_concurrent over
-# the max_modprobe limit.
-#
-# This test fails much easier than test 0008 since the alias optimizations
-# are not in place.
-ALL_TESTS="$ALL_TESTS 0009:150:0"
+ALL_TESTS="$ALL_TESTS 0008:150:1"
+ALL_TESTS="$ALL_TESTS 0009:150:1"
 
 test_modprobe()
 {
-- 
2.11.0