lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240213055554.1802415-2-ankur.a.arora@oracle.com>
Date: Mon, 12 Feb 2024 21:55:25 -0800
From: Ankur Arora <ankur.a.arora@...cle.com>
To: linux-kernel@...r.kernel.org
Cc: tglx@...utronix.de, peterz@...radead.org, torvalds@...ux-foundation.org,
        paulmck@...nel.org, akpm@...ux-foundation.org, luto@...nel.org,
        bp@...en8.de, dave.hansen@...ux.intel.com, hpa@...or.com,
        mingo@...hat.com, juri.lelli@...hat.com, vincent.guittot@...aro.org,
        willy@...radead.org, mgorman@...e.de, jpoimboe@...nel.org,
        mark.rutland@....com, jgross@...e.com, andrew.cooper3@...rix.com,
        bristot@...nel.org, mathieu.desnoyers@...icios.com,
        geert@...ux-m68k.org, glaubitz@...sik.fu-berlin.de,
        anton.ivanov@...bridgegreys.com, mattst88@...il.com,
        krypton@...ich-teichert.org, rostedt@...dmis.org,
        David.Laight@...LAB.COM, richard@....at, mjguzik@...il.com,
        jon.grimm@....com, bharata@....com, raghavendra.kt@....com,
        boris.ostrovsky@...cle.com, konrad.wilk@...cle.com,
        Ankur Arora <ankur.a.arora@...cle.com>,
        Jonathan Corbet <corbet@....net>
Subject: [PATCH 01/30] preempt: introduce CONFIG_PREEMPT_AUTO

PREEMPT_AUTO adds a new scheduling model which, like PREEMPT_DYNAMIC,
allows dynamic switching between a none/voluntary/full preemption
model. However, unlike PREEMPT_DYNAMIC, it doesn't use explicit
preemption points for the voluntary models.

It works by depending on CONFIG_PREEMPTION (and thus PREEMPT_COUNT),
allowing the scheduler to always know when it is safe to preempt
for all three preemption models.

In addition, it uses an additional need-resched bit
(TIF_NEED_RESCHED_LAZY) which, with TIF_NEED_RESCHED allows the
scheduler to express two kinds of rescheduling intent: schedule at
the earliest opportunity (the usual TIF_NEED_RESCHED semantics), or
express a need for rescheduling while allowing the task on the
runqueue to run to timeslice completion (TIF_NEED_RESCHED_LAZY).

Based on the preemption model in use, the scheduler chooses
need-resched in the following manner:

		TIF_NEED_RESCHED 	TIF_NEED_RESCHED_LAZY

none		never   		always [*]
voluntary       higher sched class	other tasks [*]
full 		always                  never

[*] when preempting idle, or for kernel tasks that are 'urgent' in
some way (ex. resched_cpu() used as an RCU hammer), we use
TIF_NEED_RESCHED.

As mentioned above, the other part is when preemption happens -- when
are the need-resched flags checked:

                 exit-to-user    ret-to-kernel    preempt_count()
NEED_RESCHED_LAZY     Y               N                N
NEED_RESCHED          Y               Y                Y

Exposed under CONFIG_EXPERT for now.

Cc: Peter Ziljstra <peterz@...radead.org>
Cc: Jonathan Corbet <corbet@....net>
Originally-by: Thomas Gleixner <tglx@...utronix.de>
Link: https://lore.kernel.org/lkml/87jzshhexi.ffs@tglx/
Signed-off-by: Ankur Arora <ankur.a.arora@...cle.com>
---
 .../admin-guide/kernel-parameters.txt         |  1 +
 include/linux/thread_info.h                   |  8 ++++
 init/Makefile                                 |  1 +
 kernel/Kconfig.preempt                        | 37 +++++++++++++++++--
 4 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 31b3a25680d0..5d2bd21f98e1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4662,6 +4662,7 @@
 
 	preempt=	[KNL]
 			Select preemption mode if you have CONFIG_PREEMPT_DYNAMIC
+			or CONFIG_PREEMPT_AUTO.
 			none - Limited to cond_resched() calls
 			voluntary - Limited to cond_resched() and might_sleep() calls
 			full - Any section that isn't explicitly preempt disabled
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 9ea0b28068f4..7b1d9185aac6 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -59,6 +59,14 @@ enum syscall_work_bit {
 
 #include <asm/thread_info.h>
 
+/*
+ * Fall back to the default behaviour if we don't have CONFIG_PREEMPT_AUTO.
+ */
+#ifndef CONFIG_PREEMPT_AUTO
+#define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED
+#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
+#endif
+
 #ifdef __KERNEL__
 
 #ifndef arch_set_restart_data
diff --git a/init/Makefile b/init/Makefile
index cbac576c57d6..da1dba3116dc 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -27,6 +27,7 @@ smp-flag-$(CONFIG_SMP)			:= SMP
 preempt-flag-$(CONFIG_PREEMPT_BUILD)	:= PREEMPT
 preempt-flag-$(CONFIG_PREEMPT_DYNAMIC)	:= PREEMPT_DYNAMIC
 preempt-flag-$(CONFIG_PREEMPT_RT)	:= PREEMPT_RT
+preempt-flag-$(CONFIG_PREEMPT_AUTO)	:= PREEMPT_AUTO
 
 build-version = $(or $(KBUILD_BUILD_VERSION), $(build-version-auto))
 build-timestamp = $(or $(KBUILD_BUILD_TIMESTAMP), $(build-timestamp-auto))
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index c2f1fd95a821..fe83040ad755 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -11,13 +11,17 @@ config PREEMPT_BUILD
 	select PREEMPTION
 	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
 
+config HAVE_PREEMPT_AUTO
+	bool
+
 choice
 	prompt "Preemption Model"
 	default PREEMPT_NONE
 
 config PREEMPT_NONE
 	bool "No Forced Preemption (Server)"
-	select PREEMPT_NONE_BUILD if !PREEMPT_DYNAMIC
+	select PREEMPT_NONE_BUILD if (!PREEMPT_DYNAMIC && !PREEMPT_AUTO)
+
 	help
 	  This is the traditional Linux preemption model, geared towards
 	  throughput. It will still provide good latencies most of the
@@ -32,7 +36,7 @@ config PREEMPT_NONE
 config PREEMPT_VOLUNTARY
 	bool "Voluntary Kernel Preemption (Desktop)"
 	depends on !ARCH_NO_PREEMPT
-	select PREEMPT_VOLUNTARY_BUILD if !PREEMPT_DYNAMIC
+	select PREEMPT_VOLUNTARY_BUILD if (!PREEMPT_DYNAMIC && !PREEMPT_AUTO)
 	help
 	  This option reduces the latency of the kernel by adding more
 	  "explicit preemption points" to the kernel code. These new
@@ -95,7 +99,7 @@ config PREEMPTION
 
 config PREEMPT_DYNAMIC
 	bool "Preemption behaviour defined on boot"
-	depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
+	depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT && !PREEMPT_AUTO
 	select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
 	select PREEMPT_BUILD
 	default y if HAVE_PREEMPT_DYNAMIC_CALL
@@ -115,6 +119,33 @@ config PREEMPT_DYNAMIC
 	  Interesting if you want the same pre-built kernel should be used for
 	  both Server and Desktop workloads.
 
+config PREEMPT_AUTO
+	bool "Scheduler controlled preemption model"
+	depends on EXPERT && HAVE_PREEMPT_AUTO && !ARCH_NO_PREEMPT
+	select PREEMPT_BUILD
+	help
+	  This option allows to define the preemption model on the kernel
+	  command line parameter and thus override the default preemption
+	  model selected during compile time.
+
+	  However, note that the compile time choice of preemption model
+	  might impact other kernel options like the specific RCU model.
+
+	  This feature makes the latency of the kernel configurable by
+	  allowing the scheduler to choose when to preempt based on
+	  the preemption policy in effect. It does this without needing
+	  voluntary preemption points.
+
+	  With PREEMPT_NONE: the scheduler allows a task (executing in
+	  user or kernel context) to run to completion, at least until
+	  its current tick expires.
+
+	  With PREEMPT_VOLUNTARY: similar to PREEMPT_NONE, but the scheduler
+	  will also preempt for higher priority class of processes but not
+	  lower.
+
+	  With PREEMPT: the scheduler preempts at the earliest opportunity.
+
 config SCHED_CORE
 	bool "Core Scheduling for SMT"
 	depends on SCHED_SMT
-- 
2.31.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ