[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <aed20a6acacb2646fe45ed2ba5ada800095b5dbf.1748594840.git.libo.gcs85@bytedance.com>
Date: Fri, 30 May 2025 17:27:32 +0800
From: Bo Li <libo.gcs85@...edance.com>
To: tglx@...utronix.de,
mingo@...hat.com,
bp@...en8.de,
dave.hansen@...ux.intel.com,
x86@...nel.org,
luto@...nel.org,
kees@...nel.org,
akpm@...ux-foundation.org,
david@...hat.com,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
peterz@...radead.org
Cc: dietmar.eggemann@....com,
hpa@...or.com,
acme@...nel.org,
namhyung@...nel.org,
mark.rutland@....com,
alexander.shishkin@...ux.intel.com,
jolsa@...nel.org,
irogers@...gle.com,
adrian.hunter@...el.com,
kan.liang@...ux.intel.com,
viro@...iv.linux.org.uk,
brauner@...nel.org,
jack@...e.cz,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
vbabka@...e.cz,
rppt@...nel.org,
surenb@...gle.com,
mhocko@...e.com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
jannh@...gle.com,
pfalcato@...e.de,
riel@...riel.com,
harry.yoo@...cle.com,
linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org,
duanxiongchun@...edance.com,
yinhongbo@...edance.com,
dengliang.1214@...edance.com,
xieyongji@...edance.com,
chaiwen.cc@...edance.com,
songmuchun@...edance.com,
yuanzhu@...edance.com,
chengguozhu@...edance.com,
sunjiadong.lff@...edance.com,
Bo Li <libo.gcs85@...edance.com>
Subject: [RFC v2 04/35] RPAL: add member to task_struct and mm_struct
In lazy switch and memory-related operations, there is a need to quickly
locate the corresponding rpal_service structure. Therefore, rpal_service
members are added to these two data structures.
This patch adds an rpal_service member to both task_struct and mm_struct,
and introduces initialization operations. Meanwhile, rpal_service is also
augmented with references to the task_struct and mm_struct of the
group_leader. For threads created via fork, the kernel acquires a reference
to rpal_service and assigns it to the new task_struct. References to
rpal_service are released when threads exit.
Regarding the deallocation of rpal_struct, since rpal_put_service may be
called in an atomic context (where mmdrop() cannot be invoked), this patch
uses delayed work for deallocation. The work delay is set to 30 seconds,
which ensures that IDs are not recycled and reused in the short term,
preventing other processes from confusing the reallocated ID with the
previous one due to race conditions.
Signed-off-by: Bo Li <libo.gcs85@...edance.com>
---
arch/x86/rpal/service.c | 77 +++++++++++++++++++++++++++++++++++++---
fs/exec.c | 11 ++++++
include/linux/mm_types.h | 3 ++
include/linux/rpal.h | 29 +++++++++++++++
include/linux/sched.h | 5 +++
init/init_task.c | 3 ++
kernel/exit.c | 5 +++
kernel/fork.c | 16 +++++++++
8 files changed, 145 insertions(+), 4 deletions(-)
diff --git a/arch/x86/rpal/service.c b/arch/x86/rpal/service.c
index 609c9550540d..55ecb7e0ef8c 100644
--- a/arch/x86/rpal/service.c
+++ b/arch/x86/rpal/service.c
@@ -26,9 +26,24 @@ static inline void rpal_free_service_id(int id)
static void __rpal_put_service(struct rpal_service *rs)
{
+ pr_debug("rpal: free service %d, tgid: %d\n", rs->id,
+ rs->group_leader->pid);
+
+ rs->mm->rpal_rs = NULL;
+ mmdrop(rs->mm);
+ put_task_struct(rs->group_leader);
+ rpal_free_service_id(rs->id);
kmem_cache_free(service_cache, rs);
}
+static void rpal_put_service_async_fn(struct work_struct *work)
+{
+ struct rpal_service *rs =
+ container_of(work, struct rpal_service, delayed_put_work.work);
+
+ __rpal_put_service(rs);
+}
+
static int rpal_alloc_service_id(void)
{
int id;
@@ -75,9 +90,16 @@ void rpal_put_service(struct rpal_service *rs)
{
if (!rs)
return;
-
- if (atomic_dec_and_test(&rs->refcnt))
- __rpal_put_service(rs);
+ /*
+ * Since __rpal_put_service() calls mmdrop() (which
+ * cannot be invoked in atomic context), we use
+ * delayed work to release rpal_service.
+ */
+ if (atomic_dec_and_test(&rs->refcnt)) {
+ INIT_DELAYED_WORK(&rs->delayed_put_work,
+ rpal_put_service_async_fn);
+ schedule_delayed_work(&rs->delayed_put_work, HZ * 30);
+ }
}
static u32 get_hash_key(u64 key)
@@ -128,6 +150,12 @@ struct rpal_service *rpal_register_service(void)
if (!rpal_inited)
return NULL;
+ if (!thread_group_leader(current)) {
+ rpal_err("task %d is not group leader %d\n", current->pid,
+ current->tgid);
+ goto alloc_fail;
+ }
+
rs = kmem_cache_zalloc(service_cache, GFP_KERNEL);
if (!rs)
goto alloc_fail;
@@ -140,10 +168,27 @@ struct rpal_service *rpal_register_service(void)
if (unlikely(rs->key == RPAL_INVALID_KEY))
goto key_fail;
- atomic_set(&rs->refcnt, 1);
+ current->rpal_rs = rs;
+
+ rs->group_leader = get_task_struct(current);
+ mmgrab(current->mm);
+ current->mm->rpal_rs = rs;
+ rs->mm = current->mm;
+
+ /*
+ * The reference comes from:
+ * 1. registered service always has one reference
+ * 2. leader_thread also has one reference
+ * 3. mm also hold one reference
+ */
+ atomic_set(&rs->refcnt, 3);
insert_service(rs);
+ pr_debug(
+ "rpal: register service, key: %llx, id: %d, command: %s, tgid: %d\n",
+ rs->key, rs->id, current->comm, current->tgid);
+
return rs;
key_fail:
@@ -161,7 +206,31 @@ void rpal_unregister_service(struct rpal_service *rs)
delete_service(rs);
+ pr_debug("rpal: unregister service, id: %d, tgid: %d\n", rs->id,
+ rs->group_leader->tgid);
+
+ rpal_put_service(rs);
+}
+
+void copy_rpal(struct task_struct *p)
+{
+ struct rpal_service *cur = rpal_current_service();
+
+ p->rpal_rs = rpal_get_service(cur);
+}
+
+void exit_rpal(bool group_dead)
+{
+ struct rpal_service *rs = rpal_current_service();
+
+ if (!rs)
+ return;
+
+ current->rpal_rs = NULL;
rpal_put_service(rs);
+
+ if (group_dead)
+ rpal_unregister_service(rs);
}
int __init rpal_service_init(void)
diff --git a/fs/exec.c b/fs/exec.c
index cfbb2b9ee3c9..922728aebebe 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -68,6 +68,7 @@
#include <linux/user_events.h>
#include <linux/rseq.h>
#include <linux/ksm.h>
+#include <linux/rpal.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -1076,6 +1077,16 @@ static int de_thread(struct task_struct *tsk)
/* we have changed execution domain */
tsk->exit_signal = SIGCHLD;
+#if IS_ENABLED(CONFIG_RPAL)
+ /*
+ * The rpal process is going to load another binary, we
+ * need to unregister rpal since it is going to be another
+ * process. Other threads have already exited by the time
+ * we come here, we need to set group_dead as true.
+ */
+ exit_rpal(true);
+#endif
+
BUG_ON(!thread_group_leader(tsk));
return 0;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 32ba5126e221..b29adef082c6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1172,6 +1172,9 @@ struct mm_struct {
#ifdef CONFIG_MM_ID
mm_id_t mm_id;
#endif /* CONFIG_MM_ID */
+#ifdef CONFIG_RPAL
+ struct rpal_service *rpal_rs;
+#endif
} __randomize_layout;
/*
diff --git a/include/linux/rpal.h b/include/linux/rpal.h
index 75c5acf33844..7b9d90b62b3f 100644
--- a/include/linux/rpal.h
+++ b/include/linux/rpal.h
@@ -11,6 +11,8 @@
#include <linux/sched.h>
#include <linux/types.h>
+#include <linux/sched/mm.h>
+#include <linux/workqueue.h>
#include <linux/hashtable.h>
#include <linux/atomic.h>
@@ -29,6 +31,9 @@
#define RPAL_INVALID_KEY _AC(0, UL)
/*
+ * Each RPAL process (a.k.a RPAL service) should have a pointer to
+ * struct rpal_service in all its tasks' task_struct.
+ *
* Each RPAL service has a 64-bit key as its unique identifier, and
* the 64-bit length ensures that the key will never repeat before
* the kernel reboot.
@@ -39,10 +44,23 @@
* is released, allowing newly started RPAL services to reuse the ID.
*/
struct rpal_service {
+ /* The task_struct of thread group leader. */
+ struct task_struct *group_leader;
+ /* mm_struct of thread group */
+ struct mm_struct *mm;
/* Unique identifier for RPAL service */
u64 key;
/* virtual address space id */
int id;
+
+ /*
+ * Fields above should never change after initialization.
+ * Fields below may change after initialization.
+ */
+
+ /* delayed service put work */
+ struct delayed_work delayed_put_work;
+
/* Hashtable list for this struct */
struct hlist_node hlist;
/* reference count of this struct */
@@ -68,7 +86,18 @@ struct rpal_service *rpal_get_service(struct rpal_service *rs);
*/
void rpal_put_service(struct rpal_service *rs);
+#ifdef CONFIG_RPAL
+static inline struct rpal_service *rpal_current_service(void)
+{
+ return current->rpal_rs;
+}
+#else
+static inline struct rpal_service *rpal_current_service(void) { return NULL; }
+#endif
+
void rpal_unregister_service(struct rpal_service *rs);
struct rpal_service *rpal_register_service(void);
struct rpal_service *rpal_get_service_by_key(u64 key);
+void copy_rpal(struct task_struct *p);
+void exit_rpal(bool group_dead);
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 45e5953b8f32..ad35b197543c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -72,6 +72,7 @@ struct rcu_node;
struct reclaim_state;
struct robust_list_head;
struct root_domain;
+struct rpal_service;
struct rq;
struct sched_attr;
struct sched_dl_entity;
@@ -1645,6 +1646,10 @@ struct task_struct {
struct user_event_mm *user_event_mm;
#endif
+#ifdef CONFIG_RPAL
+ struct rpal_service *rpal_rs;
+#endif
+
/* CPU-specific state of this task: */
struct thread_struct thread;
diff --git a/init/init_task.c b/init/init_task.c
index e557f622bd90..0c5b1927da41 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -220,6 +220,9 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
#ifdef CONFIG_SECCOMP_FILTER
.seccomp = { .filter_count = ATOMIC_INIT(0) },
#endif
+#ifdef CONFIG_RPAL
+ .rpal_rs = NULL,
+#endif
};
EXPORT_SYMBOL(init_task);
diff --git a/kernel/exit.c b/kernel/exit.c
index 38645039dd8f..0c8387da59da 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -70,6 +70,7 @@
#include <linux/user_events.h>
#include <linux/uaccess.h>
#include <linux/pidfs.h>
+#include <linux/rpal.h>
#include <uapi/linux/wait.h>
@@ -944,6 +945,10 @@ void __noreturn do_exit(long code)
taskstats_exit(tsk, group_dead);
trace_sched_process_exit(tsk, group_dead);
+#if IS_ENABLED(CONFIG_RPAL)
+ exit_rpal(group_dead);
+#endif
+
exit_mm();
if (group_dead)
diff --git a/kernel/fork.c b/kernel/fork.c
index 85afccfdf3b1..1d1c8484a8f2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,6 +105,7 @@
#include <uapi/linux/pidfd.h>
#include <linux/pidfs.h>
#include <linux/tick.h>
+#include <linux/rpal.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -1216,6 +1217,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->mm_cid_active = 0;
tsk->migrate_from_cpu = -1;
#endif
+
+#ifdef CONFIG_RPAL
+ tsk->rpal_rs = NULL;
+#endif
return tsk;
free_stack:
@@ -1312,6 +1317,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
#endif
mm_init_uprobes_state(mm);
hugetlb_count_init(mm);
+#ifdef CONFIG_RPAL
+ mm->rpal_rs = NULL;
+#endif
if (current->mm) {
mm->flags = mmf_init_flags(current->mm->flags);
@@ -2651,6 +2659,14 @@ __latent_entropy struct task_struct *copy_process(
current->signal->nr_threads++;
current->signal->quick_threads++;
atomic_inc(¤t->signal->live);
+#if IS_ENABLED(CONFIG_RPAL)
+ /*
+ * For rpal process, the child thread needs to
+ * inherit p->rpal_rs. Therefore, we can get the
+ * struct rpal_service for any thread of rpal process.
+ */
+ copy_rpal(p);
+#endif
refcount_inc(¤t->signal->sigcnt);
task_join_group_stop(p);
list_add_tail_rcu(&p->thread_node,
--
2.20.1
Powered by blists - more mailing lists