linux-kernel - [PATCH 2/3] x86/mm: Avoid repeated this_cpu_*() ops in switch_mm_irqs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20250520110632.168981626@infradead.org>
Date: Tue, 20 May 2025 12:55:44 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: x86@...nel.org
Cc: linux-kernel@...r.kernel.org,
 peterz@...radead.org,
 kys@...rosoft.com,
 haiyangz@...rosoft.com,
 wei.liu@...nel.org,
 decui@...rosoft.com,
 tglx@...utronix.de,
 mingo@...hat.com,
 bp@...en8.de,
 dave.hansen@...ux.intel.com,
 hpa@...or.com,
 luto@...nel.org,
 linux-hyperv@...r.kernel.org
Subject: [PATCH 2/3] x86/mm: Avoid repeated this_cpu_*() ops in switch_mm_irqs_off()

Aside from generating slightly better code for not having to use %fs
prefixed ops, the real purpose is to clarify code by switching some to
smp_store_release() later on.

Notably, this_cpu_{read,write}() imply {READ,WRITE}_ONCE().

Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
 arch/x86/mm/tlb.c |   21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -51,7 +51,7 @@
 
 /*
  * Bits to mangle the TIF_SPEC_* state into the mm pointer which is
- * stored in cpu_tlb_state.last_user_mm_spec.
+ * stored in cpu_tlbstate.last_user_mm_spec.
  */
 #define LAST_USER_MM_IBPB	0x1UL
 #define LAST_USER_MM_L1D_FLUSH	0x2UL
@@ -782,8 +782,9 @@ static inline void cr4_update_pce_mm(str
 void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
 			struct task_struct *tsk)
 {
-	struct mm_struct *prev = this_cpu_read(cpu_tlbstate.loaded_mm);
-	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	struct tlb_state *this_tlbstate = this_cpu_ptr(&cpu_tlbstate);
+	struct mm_struct *prev = READ_ONCE(this_tlbstate->loaded_mm);
+	u16 prev_asid = READ_ONCE(this_tlbstate->loaded_mm_asid);
 	bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
 	unsigned cpu = smp_processor_id();
 	unsigned long new_lam;
@@ -840,7 +841,7 @@ void switch_mm_irqs_off(struct mm_struct
 	if (prev == next) {
 		/* Not actually switching mm's */
 		VM_WARN_ON(is_dyn_asid(prev_asid) &&
-			   this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+			   READ_ONCE(this_tlbstate->ctxs[prev_asid].ctx_id) !=
 			   next->context.ctx_id);
 
 		/*
@@ -888,7 +889,7 @@ void switch_mm_irqs_off(struct mm_struct
 		 */
 		smp_mb();
 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
-		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
+		if (READ_ONCE(this_tlbstate->ctxs[prev_asid].tlb_gen) ==
 				next_tlb_gen)
 			return;
 
@@ -910,7 +911,7 @@ void switch_mm_irqs_off(struct mm_struct
 		 * and others are sensitive to the window where mm_cpumask(),
 		 * CR3 and cpu_tlbstate.loaded_mm are not all in sync.
 		 */
-		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+		WRITE_ONCE(this_tlbstate->loaded_mm, LOADED_MM_SWITCHING);
 		barrier();
 
 		/* Start receiving IPIs and then read tlb_gen (and LAM below) */
@@ -925,8 +926,8 @@ void switch_mm_irqs_off(struct mm_struct
 	new_lam = mm_lam_cr3_mask(next);
 	if (ns.need_flush) {
 		VM_WARN_ON_ONCE(is_global_asid(ns.asid));
-		this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id);
-		this_cpu_write(cpu_tlbstate.ctxs[ns.asid].tlb_gen, next_tlb_gen);
+		WRITE_ONCE(this_tlbstate->ctxs[ns.asid].ctx_id, next->context.ctx_id);
+		WRITE_ONCE(this_tlbstate->ctxs[ns.asid].tlb_gen, next_tlb_gen);
 		load_new_mm_cr3(next->pgd, ns.asid, new_lam, true);
 
 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
@@ -940,8 +941,8 @@ void switch_mm_irqs_off(struct mm_struct
 	/* Make sure we write CR3 before loaded_mm. */
 	barrier();
 
-	this_cpu_write(cpu_tlbstate.loaded_mm, next);
-	this_cpu_write(cpu_tlbstate.loaded_mm_asid, ns.asid);
+	WRITE_ONCE(this_tlbstate->loaded_mm, next);
+	WRITE_ONCE(this_tlbstate->loaded_mm_asid, ns.asid);
 	cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next));
 
 	if (next != prev) {