linux-kernel - [RFC v2 13/35] RPAL: add tlb flushing support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <c1eeca7e95433f2e51eeae63375f41d7fafd4c5b.1748594840.git.libo.gcs85@bytedance.com>
Date: Fri, 30 May 2025 17:27:41 +0800
From: Bo Li <libo.gcs85@...edance.com>
To: tglx@...utronix.de,
	mingo@...hat.com,
	bp@...en8.de,
	dave.hansen@...ux.intel.com,
	x86@...nel.org,
	luto@...nel.org,
	kees@...nel.org,
	akpm@...ux-foundation.org,
	david@...hat.com,
	juri.lelli@...hat.com,
	vincent.guittot@...aro.org,
	peterz@...radead.org
Cc: dietmar.eggemann@....com,
	hpa@...or.com,
	acme@...nel.org,
	namhyung@...nel.org,
	mark.rutland@....com,
	alexander.shishkin@...ux.intel.com,
	jolsa@...nel.org,
	irogers@...gle.com,
	adrian.hunter@...el.com,
	kan.liang@...ux.intel.com,
	viro@...iv.linux.org.uk,
	brauner@...nel.org,
	jack@...e.cz,
	lorenzo.stoakes@...cle.com,
	Liam.Howlett@...cle.com,
	vbabka@...e.cz,
	rppt@...nel.org,
	surenb@...gle.com,
	mhocko@...e.com,
	rostedt@...dmis.org,
	bsegall@...gle.com,
	mgorman@...e.de,
	vschneid@...hat.com,
	jannh@...gle.com,
	pfalcato@...e.de,
	riel@...riel.com,
	harry.yoo@...cle.com,
	linux-kernel@...r.kernel.org,
	linux-perf-users@...r.kernel.org,
	linux-fsdevel@...r.kernel.org,
	linux-mm@...ck.org,
	duanxiongchun@...edance.com,
	yinhongbo@...edance.com,
	dengliang.1214@...edance.com,
	xieyongji@...edance.com,
	chaiwen.cc@...edance.com,
	songmuchun@...edance.com,
	yuanzhu@...edance.com,
	chengguozhu@...edance.com,
	sunjiadong.lff@...edance.com,
	Bo Li <libo.gcs85@...edance.com>
Subject: [RFC v2 13/35] RPAL: add tlb flushing support

When a thread flushes the TLB, since the address space is shared,
not only other threads in the current process but also other processes
that share the address space may access the corresponding memory (related
to the TLB flush). Therefore, the cpuset used for TLB flushing should be
the union of the mm_cpumasks of all processes that share the address
space.

This patch extend flush_tlb_info to store other process's mm_struct,
and when a CPU in the union of the mm_cpumasks if invoked to handle
tlb flushing, it will check whether cpu_tlbstate.loaded_mm matches any
of mm_structs stored in flush_tlb_info. If match, the CPU will do local
tlb flushing for that mm_struct.

Signed-off-by: Bo Li <libo.gcs85@...edance.com>
---
 arch/x86/include/asm/tlbflush.h |  10 ++
 arch/x86/mm/tlb.c               | 172 ++++++++++++++++++++++++++++++++
 arch/x86/rpal/internal.h        |   3 -
 include/linux/rpal.h            |  12 +++
 mm/rmap.c                       |   4 +
 5 files changed, 198 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index e9b81876ebe4..f57b745af75c 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -227,6 +227,11 @@ struct flush_tlb_info {
 	u8			stride_shift;
 	u8			freed_tables;
 	u8			trim_cpumask;
+#ifdef CONFIG_RPAL
+	struct mm_struct **mm_list;
+	u64 *tlb_gen_list;
+	int nr_mm;
+#endif
 };
 
 void flush_tlb_local(void);
@@ -356,6 +361,11 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
 	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
 }
 
+#ifdef CONFIG_RPAL
+void rpal_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+	struct mm_struct *mm);
+#endif
+
 static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
 {
 	flush_tlb_mm(mm);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 39f80111e6f1..a0fe17b13887 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -12,6 +12,7 @@
 #include <linux/task_work.h>
 #include <linux/mmu_notifier.h>
 #include <linux/mmu_context.h>
+#include <linux/rpal.h>
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -1361,6 +1362,169 @@ void flush_tlb_multi(const struct cpumask *cpumask,
 	__flush_tlb_multi(cpumask, info);
 }
 
+#ifdef CONFIG_RPAL
+static void rpal_flush_tlb_func_remote(void *info)
+{
+	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+	struct flush_tlb_info *f = info;
+	struct flush_tlb_info tf = *f;
+	int i;
+
+	/* As it comes from RPAL path, f->mm cannot be NULL */
+	if (f->mm == loaded_mm) {
+		flush_tlb_func(f);
+		return;
+	}
+
+	for (i = 0; i < f->nr_mm; i++) {
+		/* We always have f->mm_list[i] != NULL */
+		if (f->mm_list[i] == loaded_mm) {
+			tf.mm = f->mm_list[i];
+			tf.new_tlb_gen = f->tlb_gen_list[i];
+			flush_tlb_func(&tf);
+			return;
+		}
+	}
+}
+
+static void rpal_flush_tlb_func_multi(const struct cpumask *cpumask,
+			       const struct flush_tlb_info *info)
+{
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+	if (info->end == TLB_FLUSH_ALL)
+		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
+	else
+		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
+				(info->end - info->start) >> PAGE_SHIFT);
+
+	if (info->freed_tables || mm_in_asid_transition(info->mm))
+		on_each_cpu_mask(cpumask, rpal_flush_tlb_func_remote,
+				 (void *)info, true);
+	else
+		on_each_cpu_cond_mask(should_flush_tlb,
+				      rpal_flush_tlb_func_remote, (void *)info,
+				      1, cpumask);
+}
+
+static void rpal_flush_tlb_func_local(struct mm_struct *mm, int cpu,
+				      struct flush_tlb_info *info,
+				      u64 new_tlb_gen)
+{
+	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+
+	if (loaded_mm == info->mm) {
+		lockdep_assert_irqs_enabled();
+		local_irq_disable();
+		flush_tlb_func(info);
+		local_irq_enable();
+	} else {
+		int i;
+
+		for (i = 0; i < info->nr_mm; i++) {
+			if (info->mm_list[i] == loaded_mm) {
+				lockdep_assert_irqs_enabled();
+				local_irq_disable();
+				info->mm = info->mm_list[i];
+				info->new_tlb_gen = info->tlb_gen_list[i];
+				flush_tlb_func(info);
+				info->mm = mm;
+				info->new_tlb_gen = new_tlb_gen;
+				local_irq_enable();
+			}
+		}
+	}
+}
+
+static void rpal_flush_tlb_mm_range(struct mm_struct *mm, int cpu,
+			     struct flush_tlb_info *info, u64 new_tlb_gen)
+{
+	struct rpal_service *cur = mm->rpal_rs;
+	cpumask_t merged_mask;
+	struct mm_struct *mm_list[MAX_REQUEST_SERVICE];
+	u64 tlb_gen_list[MAX_REQUEST_SERVICE];
+	int nr_mm = 0;
+	int i;
+
+	cpumask_copy(&merged_mask, mm_cpumask(mm));
+	if (cur) {
+		struct rpal_service *tgt;
+		struct mm_struct *tgt_mm;
+
+		rpal_for_each_requested_service(cur, i) {
+			struct rpal_mapped_service *node;
+
+			if (i == cur->id)
+				continue;
+			node = rpal_get_mapped_node(cur, i);
+			if (!rpal_service_mapped(node))
+				continue;
+
+			tgt = rpal_get_service(node->rs);
+			if (!tgt)
+				continue;
+			tgt_mm = tgt->mm;
+			if (!mmget_not_zero(tgt_mm)) {
+				rpal_put_service(tgt);
+				continue;
+			}
+			mm_list[nr_mm] = tgt_mm;
+			tlb_gen_list[nr_mm] = inc_mm_tlb_gen(tgt_mm);
+
+			nr_mm++;
+			cpumask_or(&merged_mask, &merged_mask,
+				   mm_cpumask(tgt_mm));
+			rpal_put_service(tgt);
+		}
+		info->mm_list = mm_list;
+		info->tlb_gen_list = tlb_gen_list;
+		info->nr_mm = nr_mm;
+	}
+
+	if (cpumask_any_but(&merged_mask, cpu) < nr_cpu_ids)
+		rpal_flush_tlb_func_multi(&merged_mask, info);
+	else
+		rpal_flush_tlb_func_local(mm, cpu, info, new_tlb_gen);
+
+	for (i = 0; i < nr_mm; i++)
+		mmput_async(mm_list[i]);
+}
+
+void rpal_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+			      struct mm_struct *mm)
+{
+	struct rpal_service *cur = mm->rpal_rs;
+	struct rpal_service *tgt;
+	struct mm_struct *tgt_mm;
+	int i;
+
+	rpal_for_each_requested_service(cur, i) {
+		struct rpal_mapped_service *node;
+
+		if (i == cur->id)
+			continue;
+
+		node = rpal_get_mapped_node(cur, i);
+		if (!rpal_service_mapped(node))
+			continue;
+
+		tgt = rpal_get_service(node->rs);
+		if (!tgt)
+			continue;
+		tgt_mm = tgt->mm;
+		if (!mmget_not_zero(tgt_mm)) {
+			rpal_put_service(tgt);
+			continue;
+		}
+		inc_mm_tlb_gen(tgt_mm);
+		cpumask_or(&batch->cpumask, &batch->cpumask,
+			   mm_cpumask(tgt_mm));
+		mmu_notifier_arch_invalidate_secondary_tlbs(tgt_mm, 0, -1UL);
+		rpal_put_service(tgt);
+		mmput_async(tgt_mm);
+	}
+}
+#endif
+
 /*
  * See Documentation/arch/x86/tlb.rst for details.  We choose 33
  * because it is large enough to cover the vast majority (at
@@ -1439,6 +1603,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 	info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
 				  new_tlb_gen);
 
+#if IS_ENABLED(CONFIG_RPAL)
+	if (mm->rpal_rs)
+		rpal_flush_tlb_mm_range(mm, cpu, info, new_tlb_gen);
+	else {
+#endif
 	/*
 	 * flush_tlb_multi() is not optimized for the common case in which only
 	 * a local TLB flush is needed. Optimize this use-case by calling
@@ -1456,6 +1625,9 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 		flush_tlb_func(info);
 		local_irq_enable();
 	}
+#if IS_ENABLED(CONFIG_RPAL)
+	}
+#endif
 
 	put_flush_tlb_info();
 	put_cpu();
diff --git a/arch/x86/rpal/internal.h b/arch/x86/rpal/internal.h
index c504b6efff64..cf6d608a994a 100644
--- a/arch/x86/rpal/internal.h
+++ b/arch/x86/rpal/internal.h
@@ -12,9 +12,6 @@
 #include <linux/mm.h>
 #include <linux/file.h>
 
-#define RPAL_REQUEST_MAP 0x1
-#define RPAL_REVERSE_MAP 0x2
-
 extern bool rpal_inited;
 
 /* service.c */
diff --git a/include/linux/rpal.h b/include/linux/rpal.h
index b9622f0235bf..36be1ab6a9f3 100644
--- a/include/linux/rpal.h
+++ b/include/linux/rpal.h
@@ -80,6 +80,11 @@
 /* No more than 15 services can be requested due to limitation of MPK. */
 #define MAX_REQUEST_SERVICE 15
 
+enum {
+	RPAL_REQUEST_MAP,
+	RPAL_REVERSE_MAP,
+};
+
 extern unsigned long rpal_cap;
 
 enum rpal_task_flag_bits {
@@ -326,6 +331,13 @@ rpal_get_mapped_node(struct rpal_service *rs, int id)
 	return &rs->service_map[id];
 }
 
+static inline bool rpal_service_mapped(struct rpal_mapped_service *node)
+{
+	unsigned long type = (1 << RPAL_REQUEST_MAP) | (1 << RPAL_REVERSE_MAP);
+
+	return (node->type & type) == type;
+}
+
 #ifdef CONFIG_RPAL
 static inline struct rpal_service *rpal_current_service(void)
 {
diff --git a/mm/rmap.c b/mm/rmap.c
index 67bb273dfb80..e68384f97ab9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -682,6 +682,10 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
 		return;
 
 	arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, start, end);
+#ifdef CONFIG_RPAL
+	if (mm->rpal_rs)
+		rpal_tlbbatch_add_pending(&tlb_ubc->arch, mm);
+#endif
 	tlb_ubc->flush_required = true;
 
 	/*
-- 
2.20.1