lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <47c919a7d65cb5def07c561e29305d39d9df925f.1748594841.git.libo.gcs85@bytedance.com>
Date: Fri, 30 May 2025 17:27:52 +0800
From: Bo Li <libo.gcs85@...edance.com>
To: tglx@...utronix.de,
	mingo@...hat.com,
	bp@...en8.de,
	dave.hansen@...ux.intel.com,
	x86@...nel.org,
	luto@...nel.org,
	kees@...nel.org,
	akpm@...ux-foundation.org,
	david@...hat.com,
	juri.lelli@...hat.com,
	vincent.guittot@...aro.org,
	peterz@...radead.org
Cc: dietmar.eggemann@....com,
	hpa@...or.com,
	acme@...nel.org,
	namhyung@...nel.org,
	mark.rutland@....com,
	alexander.shishkin@...ux.intel.com,
	jolsa@...nel.org,
	irogers@...gle.com,
	adrian.hunter@...el.com,
	kan.liang@...ux.intel.com,
	viro@...iv.linux.org.uk,
	brauner@...nel.org,
	jack@...e.cz,
	lorenzo.stoakes@...cle.com,
	Liam.Howlett@...cle.com,
	vbabka@...e.cz,
	rppt@...nel.org,
	surenb@...gle.com,
	mhocko@...e.com,
	rostedt@...dmis.org,
	bsegall@...gle.com,
	mgorman@...e.de,
	vschneid@...hat.com,
	jannh@...gle.com,
	pfalcato@...e.de,
	riel@...riel.com,
	harry.yoo@...cle.com,
	linux-kernel@...r.kernel.org,
	linux-perf-users@...r.kernel.org,
	linux-fsdevel@...r.kernel.org,
	linux-mm@...ck.org,
	duanxiongchun@...edance.com,
	yinhongbo@...edance.com,
	dengliang.1214@...edance.com,
	xieyongji@...edance.com,
	chaiwen.cc@...edance.com,
	songmuchun@...edance.com,
	yuanzhu@...edance.com,
	chengguozhu@...edance.com,
	sunjiadong.lff@...edance.com,
	Bo Li <libo.gcs85@...edance.com>
Subject: [RFC v2 24/35] RPAL: critical section optimization

The critical section is defined as the user mode code segment within the
receiver that executes when control returns from the receiver to the
sender. This code segment, located in the receiver, involves operations
such as switching the fsbase register and changing the stack pointer.
Handling the critical section can be categorized into two scenarios:

- First Scenario: If no lazy switch has occurred prior to the return and
  the fsbase switch is incomplete, a lazy switch is triggered to
  transition the kernel context from the sender to the receiver. After
  the fsbase is updated in user mode, another lazy switch occurs to revert
  the kernel context from the receiver back to the sender. This results in
  two unnecessary lazy switches.

- Second Scenario: If a lazy switch has already occurred during execution
  of the critical section, the lazy switch can be preemptively triggered.
  This avoids re-entering the kernel solely to initiate another lazy
  switch.

The implementation of the critical section involves modifying the fsbase
register in kernel mode and setting the sender's user mode context to a
predefined state. These steps minimize redundant user/kernel transitions
and lazy switches.

Signed-off-by: Bo Li <libo.gcs85@...edance.com>
---
 arch/x86/rpal/core.c    | 88 ++++++++++++++++++++++++++++++++++++++++-
 arch/x86/rpal/service.c | 12 ++++++
 include/linux/rpal.h    |  6 +++
 3 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/arch/x86/rpal/core.c b/arch/x86/rpal/core.c
index c48df1ce4324..406d54788bac 100644
--- a/arch/x86/rpal/core.c
+++ b/arch/x86/rpal/core.c
@@ -219,14 +219,98 @@ static inline struct task_struct *rpal_misidentify(void)
 	return next;
 }
 
+static bool in_ret_section(struct rpal_service *rs, unsigned long ip)
+{
+	return ip >= rs->rsm.rcs.ret_begin && ip < rs->rsm.rcs.ret_end;
+}
+
+/*
+ * rpal_update_fsbase - fastpath when RPAL call returns
+ * @regs: pt_regs saved in kernel entry
+ *
+ * If the user is executing rpal call return code and it does
+ * not update fsbase yet, force fsbase update to perform a
+ * lazy switch immediately.
+ */
+static inline void rpal_update_fsbase(struct pt_regs *regs)
+{
+	struct rpal_service *cur = rpal_current_service();
+	struct task_struct *sender = current->rpal_rd->sender;
+
+	if (in_ret_section(cur, regs->ip))
+		wrfsbase(sender->thread.fsbase);
+}
+
+/*
+ * rpal_skip_receiver_code - skip rpal call return code
+ * @next: the next task to be lazy switched to.
+ * @regs: pt_regs saved in kernel entry
+ *
+ * If the user is executing rpal call return code and we are about
+ * to perform a lazy switch, skip the remaining return code to
+ * release receiver's stack. This avoids stack conflict when there
+ * are more than one senders calls the receiver.
+ */
+static inline void rpal_skip_receiver_code(struct task_struct *next,
+					   struct pt_regs *regs)
+{
+	rebuild_sender_stack(next->rpal_sd, regs);
+}
+
+/*
+ * rpal_skip_receiver_code - skip lazy switch when rpal call return
+ * @next: the next task to be lazy switched to.
+ * @regs: pt_regs saved in kernel entry
+ *
+ * If the user is executing rpal call return code and we have not
+ * performed a lazy switch, there is no need to perform lazy switch
+ * now. Update fsbase and other states to avoid lazy switch.
+ */
+static inline struct task_struct *
+rpal_skip_lazy_switch(struct task_struct *next, struct pt_regs *regs)
+{
+	struct rpal_service *tgt;
+
+	tgt = next->rpal_rs;
+	if (in_ret_section(tgt, regs->ip)) {
+		wrfsbase(current->thread.fsbase);
+		rebuild_sender_stack(current->rpal_sd, regs);
+		rpal_clear_task_thread_flag(next, RPAL_LAZY_SWITCHED_BIT);
+		next->rpal_rd->sender = NULL;
+		next = NULL;
+	}
+	return next;
+}
+
+static struct task_struct *rpal_fix_critical_section(struct task_struct *next,
+						     struct pt_regs *regs)
+{
+	struct rpal_service *cur = rpal_current_service();
+
+	/* sender->receiver */
+	if (rpal_test_task_thread_flag(next, RPAL_LAZY_SWITCHED_BIT))
+		next = rpal_skip_lazy_switch(next, regs);
+	/* receiver->sender */
+	else if (rpal_is_correct_address(cur, regs->ip))
+		rpal_skip_receiver_code(next, regs);
+
+	return next;
+}
+
 static inline struct task_struct *
 rpal_kernel_context_switch(struct pt_regs *regs)
 {
 	struct task_struct *next = NULL;
 
+	if (rpal_test_current_thread_flag(RPAL_LAZY_SWITCHED_BIT))
+		rpal_update_fsbase(regs);
+
 	next = rpal_misidentify();
-	if (unlikely(next != NULL))
-		next = rpal_do_kernel_context_switch(next, regs);
+	if (unlikely(next != NULL)) {
+		next = rpal_fix_critical_section(next, regs);
+		if (next)
+			next = rpal_do_kernel_context_switch(next, regs);
+	}
 
 	return next;
 }
diff --git a/arch/x86/rpal/service.c b/arch/x86/rpal/service.c
index 49458321e7dc..16e94d710445 100644
--- a/arch/x86/rpal/service.c
+++ b/arch/x86/rpal/service.c
@@ -545,6 +545,13 @@ int rpal_release_service(u64 key)
 	return ret;
 }
 
+static bool rpal_check_critical_section(struct rpal_service *rs,
+			     struct rpal_critical_section *rcs)
+{
+	return rpal_is_correct_address(rs, rcs->ret_begin) &&
+	       rpal_is_correct_address(rs, rcs->ret_end);
+}
+
 int rpal_enable_service(unsigned long arg)
 {
 	struct rpal_service *cur = rpal_current_service();
@@ -562,6 +569,11 @@ int rpal_enable_service(unsigned long arg)
 		goto out;
 	}
 
+	if (!rpal_check_critical_section(cur, &rsm.rcs)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	mutex_lock(&cur->mutex);
 	if (!cur->enabled) {
 		cur->rsm = rsm;
diff --git a/include/linux/rpal.h b/include/linux/rpal.h
index b24176f3f245..4f1d92053818 100644
--- a/include/linux/rpal.h
+++ b/include/linux/rpal.h
@@ -122,12 +122,18 @@ enum rpal_sender_state {
 	RPAL_SENDER_STATE_KERNEL_RET,
 };
 
+struct rpal_critical_section {
+	unsigned long ret_begin;
+	unsigned long ret_end;
+};
+
 /*
  * user_meta will be sent to other service when requested.
  */
 struct rpal_service_metadata {
 	unsigned long version;
 	void __user *user_meta;
+	struct rpal_critical_section rcs;
 };
 
 struct rpal_request_arg {
-- 
2.20.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ