linux-kernel - [RFC PATCH v2 6/9] riscv: mm: Introduce percpu TLB Flush queue

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251127141117.87420-7-luxu.kernel@bytedance.com>
Date: Thu, 27 Nov 2025 22:11:14 +0800
From: Xu Lu <luxu.kernel@...edance.com>
To: pjw@...nel.org,
	palmer@...belt.com,
	aou@...s.berkeley.edu,
	alex@...ti.fr,
	kees@...nel.org,
	mingo@...hat.com,
	peterz@...radead.org,
	juri.lelli@...hat.com,
	vincent.guittot@...aro.org,
	akpm@...ux-foundation.org,
	david@...hat.com,
	apatel@...tanamicro.com,
	guoren@...nel.org
Cc: linux-riscv@...ts.infradead.org,
	linux-kernel@...r.kernel.org,
	linux-mm@...ck.org,
	Xu Lu <luxu.kernel@...edance.com>
Subject: [RFC PATCH v2 6/9] riscv: mm: Introduce percpu TLB Flush queue

When memory mapping of a mm is modified, instead of sending IPI to all
CPUs recorded in its mm_cpumask, we check whether each target CPU is
using this mm right now. If not, we just store the TLB Flush information
in target CPU's percpu TLB Flush queue, avoiding the IPI.

Signed-off-by: Xu Lu <luxu.kernel@...edance.com>
---
 arch/riscv/include/asm/tlbflush.h | 19 +++++++++
 arch/riscv/mm/context.c           |  2 +
 arch/riscv/mm/tlbflush.c          | 71 ++++++++++++++++++++++++++++++-
 3 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index e7365a53265a6..c9630267c58cd 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -75,23 +75,40 @@ extern unsigned long tlb_flush_all_threshold;
 #ifdef CONFIG_RISCV_LAZY_TLB_FLUSH
 
 #define MAX_LOADED_MM					6
+#define MAX_TLB_FLUSH_TASK				32
+#define FLUSH_TLB_ALL_ASID				0x1
 
 struct tlb_context {
 	struct mm_struct *mm;
 	unsigned int gen;
+	bool need_flush;
 };
 
+struct tlb_flush_task {
+	unsigned long start;
+	unsigned long size;
+	unsigned long stride;
+};
+
+struct tlb_flush_queue {
+	atomic_t len;
+	unsigned int flag;
+	struct tlb_flush_task tasks[MAX_TLB_FLUSH_TASK];
+} ____cacheline_aligned_in_smp;
+
 struct tlb_info {
 	rwlock_t rwlock;
 	struct mm_struct *active_mm;
 	unsigned int next_gen;
 	struct tlb_context contexts[MAX_LOADED_MM];
+	struct tlb_flush_queue *flush_queues;
 };
 
 DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_info, tlbinfo);
 
 void local_load_tlb_mm(struct mm_struct *mm);
 void local_flush_tlb_mm(struct mm_struct *mm);
+void __init lazy_tlb_flush_init(void);
 
 #else /* CONFIG_RISCV_LAZY_TLB_FLUSH */
 
@@ -102,6 +119,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 	local_flush_tlb_all_asid(get_mm_asid(mm));
 }
 
+static inline void lazy_tlb_flush_init(void) {}
+
 #endif /* CONFIG_RISCV_LAZY_TLB_FLUSH */
 
 #else /* CONFIG_MMU */
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 3335080e5f720..c381c4ed46bfb 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -263,6 +263,8 @@ static int __init asids_init(void)
 
 		__set_bit(0, context_asid_map);
 
+		lazy_tlb_flush_init();
+
 		static_branch_enable(&use_asid_allocator);
 
 		pr_info("ASID allocator using %lu bits (%lu entries)\n",
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index a47bacf5801ab..b5a2d9874d62b 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -97,6 +97,7 @@ void flush_tlb_all(void)
 }
 
 struct flush_tlb_range_data {
+	struct mm_struct *mm;
 	unsigned long asid;
 	unsigned long start;
 	unsigned long size;
@@ -109,7 +110,8 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_info, tlbinfo) = {
 	.rwlock = __RW_LOCK_UNLOCKED(tlbinfo.rwlock),
 	.active_mm = NULL,
 	.next_gen = 1,
-	.contexts = { { NULL, 0, }, },
+	.contexts = { { NULL, 0, false, }, },
+	.next_gen = 0,
 };
 
 static DEFINE_PER_CPU(mm_context_t *, mmdrop_victims);
@@ -155,6 +157,47 @@ static inline void mmdrop_lazy_mm(struct mm_struct *mm)
 	}
 }
 
+static bool should_ipi_flush(int cpu, void *data)
+{
+	struct tlb_info *info = per_cpu_ptr(&tlbinfo, cpu);
+	struct tlb_context *contexts = info->contexts;
+	struct tlb_flush_queue *queue = NULL;
+	struct flush_tlb_range_data *ftd = data;
+	unsigned int i, index;
+	unsigned long flags;
+
+	if (info->active_mm == ftd->mm)
+		return true;
+
+	read_lock_irqsave(&info->rwlock, flags);
+
+	if (info->active_mm == ftd->mm) {
+		read_unlock_irqrestore(&info->rwlock, flags);
+		return true;
+	}
+
+	for (i = 0; i < MAX_LOADED_MM; i++) {
+		if (contexts[i].mm != ftd->mm)
+			continue;
+
+		queue = &info->flush_queues[i];
+		index = atomic_fetch_add_unless(&queue->len, 1, MAX_TLB_FLUSH_TASK);
+		if (index < MAX_TLB_FLUSH_TASK) {
+			queue->tasks[index].start = ftd->start;
+			queue->tasks[index].stride = ftd->stride;
+			queue->tasks[index].size = ftd->size;
+		} else {
+			queue->flag |= FLUSH_TLB_ALL_ASID;
+		}
+		contexts[i].need_flush = true;
+		break;
+	}
+
+	read_unlock_irqrestore(&info->rwlock, flags);
+
+	return false;
+}
+
 #endif /* CONFIG_RISCV_LAZY_TLB_FLUSH */
 
 static void __ipi_flush_tlb_range_asid(void *info)
@@ -185,11 +228,20 @@ static void __flush_tlb_range(struct mm_struct *mm,
 	} else {
 		struct flush_tlb_range_data ftd;
 
+		ftd.mm = mm;
 		ftd.asid = asid;
 		ftd.start = start;
 		ftd.size = size;
 		ftd.stride = stride;
-		on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1);
+#ifdef CONFIG_RISCV_LAZY_TLB_FLUSH
+		if (static_branch_unlikely(&use_asid_allocator) && mm)
+			on_each_cpu_cond_mask(should_ipi_flush,
+					      __ipi_flush_tlb_range_asid,
+					      &ftd, 1, cmask);
+		else
+#endif
+			on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid,
+					 &ftd, 1);
 	}
 
 	put_cpu();
@@ -376,4 +428,19 @@ void local_flush_tlb_mm(struct mm_struct *mm)
 	local_flush_tlb_all_asid(asid);
 }
 
+void __init lazy_tlb_flush_init(void)
+{
+	struct tlb_flush_queue *queue;
+	unsigned int cpu, size;
+
+	size = MAX_LOADED_MM * sizeof(struct tlb_flush_queue);
+	for_each_possible_cpu(cpu) {
+		queue = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
+		if (!queue)
+			panic("Failed to alloc per cpu tlb flush queue\n");
+
+		per_cpu(tlbinfo, cpu).flush_queues = queue;
+	}
+}
+
 #endif /* CONFIG_RISCV_LAZY_TLB_FLUSH */
-- 
2.20.1