linux-kernel - Re: [PATCH v2 3/4] Provide the always inline version of some functions

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <04CA2D22-4DE2-4DE1-A2BC-AACE666F5F93@zytor.com>
Date: Sat, 08 Nov 2025 14:14:44 -0800
From: "H. Peter Anvin" <hpa@...or.com>
To: Xie Yuanbin <qq570070308@...il.com>, david@...hat.com, tglx@...utronix.de,
        segher@...nel.crashing.org, riel@...riel.com, peterz@...radead.org,
        linux@...linux.org.uk, mathieu.desnoyers@...icios.com,
        paulmck@...nel.org, pjw@...nel.org, palmer@...belt.com,
        aou@...s.berkeley.edu, alex@...ti.fr, hca@...ux.ibm.com,
        gor@...ux.ibm.com, agordeev@...ux.ibm.com, borntraeger@...ux.ibm.com,
        svens@...ux.ibm.com, davem@...emloft.net, andreas@...sler.com,
        luto@...nel.org, mingo@...hat.com, bp@...en8.de,
        dave.hansen@...ux.intel.com, acme@...nel.org, namhyung@...nel.org,
        mark.rutland@....com, alexander.shishkin@...ux.intel.com,
        jolsa@...nel.org, irogers@...gle.com, adrian.hunter@...el.com,
        james.clark@...aro.org, anna-maria@...utronix.de, frederic@...nel.org,
        juri.lelli@...hat.com, vincent.guittot@...aro.org,
        dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
        mgorman@...e.de, vschneid@...hat.com, nathan@...nel.org,
        nick.desaulniers+lkml@...il.com, morbo@...gle.com,
        justinstitt@...gle.com, qq570070308@...il.com, thuth@...hat.com,
        brauner@...nel.org, arnd@...db.de, jlayton@...nel.org,
        aalbersh@...hat.com, akpm@...ux-foundation.org, david@...nel.org,
        lorenzo.stoakes@...cle.com, max.kellermann@...os.com,
        ryan.roberts@....com, nysal@...ux.ibm.com, urezki@...il.com
CC: x86@...nel.org, linux-arm-kernel@...ts.infradead.org,
        linux-kernel@...r.kernel.org, linux-riscv@...ts.infradead.org,
        linux-s390@...r.kernel.org, sparclinux@...r.kernel.org,
        linux-perf-users@...r.kernel.org, llvm@...ts.linux.dev,
        will@...nel.org
Subject: Re: [PATCH v2 3/4] Provide the always inline version of some functions

On November 8, 2025 9:23:45 AM PST, Xie Yuanbin <qq570070308@...il.com> wrote:
>On critical hot code paths, inline functions can optimize performance.
>However, for current compilers, there is no way to request them to inline
>at a specific calling point of a function.
>
>Add a always inline version to some functions, so that they can be chosen
>when called in hot paths.
>
>Signed-off-by: Xie Yuanbin <qq570070308@...il.com>
>Cc: Thomas Gleixner <tglx@...utronix.de>
>Cc: Rik van Riel <riel@...riel.com>
>Cc: Segher Boessenkool <segher@...nel.crashing.org>
>Cc: David Hildenbrand <david@...hat.com>
>Cc: Peter Zijlstra <peterz@...radead.org>
>---
> arch/arm/include/asm/mmu_context.h      | 12 +++++++-
> arch/s390/include/asm/mmu_context.h     | 12 +++++++-
> arch/sparc/include/asm/mmu_context_64.h | 12 +++++++-
> kernel/sched/core.c                     | 38 ++++++++++++++++++++++---
> 4 files changed, 67 insertions(+), 7 deletions(-)
>
>diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
>index db2cb06aa8cf..e77b271570c1 100644
>--- a/arch/arm/include/asm/mmu_context.h
>+++ b/arch/arm/include/asm/mmu_context.h
>@@ -80,7 +80,12 @@ static inline void check_and_switch_context(struct mm_struct *mm,
> #ifndef MODULE
> #define finish_arch_post_lock_switch \
> 	finish_arch_post_lock_switch
>-static inline void finish_arch_post_lock_switch(void)
>+/*
>+ * finish_arch_post_lock_switch_ainline - the always inline version of
>+ * finish_arch_post_lock_switch, used for performance sensitive paths.
>+ * If unsure, use finish_arch_post_lock_switch instead.
>+ */
>+static __always_inline void finish_arch_post_lock_switch_ainline(void)
> {
> 	struct mm_struct *mm = current->mm;
> 
>@@ -99,6 +104,11 @@ static inline void finish_arch_post_lock_switch(void)
> 		preempt_enable_no_resched();
> 	}
> }
>+
>+static inline void finish_arch_post_lock_switch(void)
>+{
>+	finish_arch_post_lock_switch_ainline();
>+}
> #endif /* !MODULE */
> 
> #endif	/* CONFIG_MMU */
>diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
>index d9b8501bc93d..577062834906 100644
>--- a/arch/s390/include/asm/mmu_context.h
>+++ b/arch/s390/include/asm/mmu_context.h
>@@ -97,7 +97,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> }
> 
> #define finish_arch_post_lock_switch finish_arch_post_lock_switch
>-static inline void finish_arch_post_lock_switch(void)
>+/*
>+ * finish_arch_post_lock_switch_ainline - the always inline version of
>+ * finish_arch_post_lock_switch, used for performance sensitive paths.
>+ * If unsure, use finish_arch_post_lock_switch instead.
>+ */
>+static __always_inline void finish_arch_post_lock_switch_ainline(void)
> {
> 	struct task_struct *tsk = current;
> 	struct mm_struct *mm = tsk->mm;
>@@ -120,6 +125,11 @@ static inline void finish_arch_post_lock_switch(void)
> 	local_irq_restore(flags);
> }
> 
>+static inline void finish_arch_post_lock_switch(void)
>+{
>+	finish_arch_post_lock_switch_ainline();
>+}
>+
> #define activate_mm activate_mm
> static inline void activate_mm(struct mm_struct *prev,
>                                struct mm_struct *next)
>diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
>index 78bbacc14d2d..ca7019080574 100644
>--- a/arch/sparc/include/asm/mmu_context_64.h
>+++ b/arch/sparc/include/asm/mmu_context_64.h
>@@ -160,7 +160,12 @@ static inline void arch_start_context_switch(struct task_struct *prev)
> }
> 
> #define finish_arch_post_lock_switch	finish_arch_post_lock_switch
>-static inline void finish_arch_post_lock_switch(void)
>+/*
>+ * finish_arch_post_lock_switch_ainline - the always inline version of
>+ * finish_arch_post_lock_switch, used for performance sensitive paths.
>+ * If unsure, use finish_arch_post_lock_switch instead.
>+ */
>+static __always_inline void finish_arch_post_lock_switch_ainline(void)
> {
> 	/* Restore the state of MCDPER register for the new process
> 	 * just switched to.
>@@ -185,6 +190,11 @@ static inline void finish_arch_post_lock_switch(void)
> 	}
> }
> 
>+static inline void finish_arch_post_lock_switch(void)
>+{
>+	finish_arch_post_lock_switch_ainline();
>+}
>+
> #define mm_untag_mask mm_untag_mask
> static inline unsigned long mm_untag_mask(struct mm_struct *mm)
> {
>diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>index 0e50ef3d819a..c50e672e22c4 100644
>--- a/kernel/sched/core.c
>+++ b/kernel/sched/core.c
>@@ -4884,7 +4884,13 @@ static inline void finish_task(struct task_struct *prev)
> 	smp_store_release(&prev->on_cpu, 0);
> }
> 
>-static void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
>+/*
>+ * do_balance_callbacks_ainline - the always inline version of
>+ * do_balance_callbacks, used for performance sensitive paths.
>+ * If unsure, use do_balance_callbacks instead.
>+ */
>+static __always_inline void do_balance_callbacks_ainline(struct rq *rq,
>+		struct balance_callback *head)
> {
> 	void (*func)(struct rq *rq);
> 	struct balance_callback *next;
>@@ -4901,6 +4907,11 @@ static void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
> 	}
> }
> 
>+static void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
>+{
>+	do_balance_callbacks_ainline(rq, head);
>+}
>+
> static void balance_push(struct rq *rq);
> 
> /*
>@@ -4949,11 +4960,21 @@ struct balance_callback *splice_balance_callbacks(struct rq *rq)
> 	return __splice_balance_callbacks(rq, true);
> }
> 
>-static void __balance_callbacks(struct rq *rq)
>+/*
>+ * __balance_callbacks_ainline - the always inline version of
>+ * __balance_callbacks, used for performance sensitive paths.
>+ * If unsure, use __balance_callbacks instead.
>+ */
>+static __always_inline void __balance_callbacks_ainline(struct rq *rq)
> {
> 	do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
> }
> 
>+static void __balance_callbacks(struct rq *rq)
>+{
>+	__balance_callbacks_ainline(rq);
>+}
>+
> void balance_callbacks(struct rq *rq, struct balance_callback *head)
> {
> 	unsigned long flags;
>@@ -5003,7 +5024,8 @@ static inline void finish_lock_switch(struct rq *rq)
> #endif
> 
> #ifndef finish_arch_post_lock_switch
>-# define finish_arch_post_lock_switch()	do { } while (0)
>+# define finish_arch_post_lock_switch()		do { } while (0)
>+# define finish_arch_post_lock_switch_ainline()	do { } while (0)
> #endif
> 
> static inline void kmap_local_sched_out(void)
>@@ -5050,6 +5072,9 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
> 
> /**
>  * finish_task_switch - clean up after a task-switch
>+ * finish_task_switch_ainline - the always inline version of this func
>+ * used for performance sensitive paths
>+ *
>  * @prev: the thread we just switched away from.
>  *
>  * finish_task_switch must be called after the context switch, paired
>@@ -5067,7 +5092,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
>  * past. 'prev == current' is still correct but we need to recalculate this_rq
>  * because prev may have moved to another CPU.
>  */
>-static struct rq *finish_task_switch(struct task_struct *prev)
>+static __always_inline struct rq *finish_task_switch_ainline(struct task_struct *prev)
> 	__releases(rq->lock)
> {
> 	struct rq *rq = this_rq();
>@@ -5159,6 +5184,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
> 	return rq;
> }
> 
>+static struct rq *finish_task_switch(struct task_struct *prev)
>+{
>+	return finish_task_switch_ainline(prev);
>+}
>+
> /**
>  * schedule_tail - first thing a freshly forked thread must call.
>  * @prev: the thread we just switched away from.

There is, in fact: you have to have an always_inline version, and wrap it in a noinline version.