[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <198278a03d91ab7e0e17d782c657da85cff741bb.1748594841.git.libo.gcs85@bytedance.com>
Date: Fri, 30 May 2025 17:27:59 +0800
From: Bo Li <libo.gcs85@...edance.com>
To: tglx@...utronix.de,
mingo@...hat.com,
bp@...en8.de,
dave.hansen@...ux.intel.com,
x86@...nel.org,
luto@...nel.org,
kees@...nel.org,
akpm@...ux-foundation.org,
david@...hat.com,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
peterz@...radead.org
Cc: dietmar.eggemann@....com,
hpa@...or.com,
acme@...nel.org,
namhyung@...nel.org,
mark.rutland@....com,
alexander.shishkin@...ux.intel.com,
jolsa@...nel.org,
irogers@...gle.com,
adrian.hunter@...el.com,
kan.liang@...ux.intel.com,
viro@...iv.linux.org.uk,
brauner@...nel.org,
jack@...e.cz,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
vbabka@...e.cz,
rppt@...nel.org,
surenb@...gle.com,
mhocko@...e.com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
jannh@...gle.com,
pfalcato@...e.de,
riel@...riel.com,
harry.yoo@...cle.com,
linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org,
duanxiongchun@...edance.com,
yinhongbo@...edance.com,
dengliang.1214@...edance.com,
xieyongji@...edance.com,
chaiwen.cc@...edance.com,
songmuchun@...edance.com,
yuanzhu@...edance.com,
chengguozhu@...edance.com,
sunjiadong.lff@...edance.com,
Bo Li <libo.gcs85@...edance.com>
Subject: [RFC v2 31/35] RPAL: add receiver waker
In an RPAL call, the receiver thread is in the TASK_INTERRUPTIBLE state
and cannot be awakened, which may lead to missed wakeups. For example, if
no kernel event occurs during the entire RPAL call, the receiver thread
will remain in the TASK_INTERRUPTIBLE state after the RPAL call completes.
To address this issue, RPAL adds a flag to the receiver whenever it
encounters an unawakened state and introduces a "waker" work. The waker
work runs automatically on every tick to check for receiver threads that
have missed wakeups. If any are found, it wakes them up. For epoll, the
waker also checks for pending user mode events and wakes the receiver
thread if such events exist.
Signed-off-by: Bo Li <libo.gcs85@...edance.com>
---
arch/x86/rpal/internal.h | 4 ++
arch/x86/rpal/service.c | 98 ++++++++++++++++++++++++++++++++++++++++
arch/x86/rpal/thread.c | 3 ++
include/linux/rpal.h | 11 +++++
kernel/sched/core.c | 3 ++
5 files changed, 119 insertions(+)
diff --git a/arch/x86/rpal/internal.h b/arch/x86/rpal/internal.h
index e03f8a90619d..117357dabdec 100644
--- a/arch/x86/rpal/internal.h
+++ b/arch/x86/rpal/internal.h
@@ -22,6 +22,10 @@ int rpal_enable_service(unsigned long arg);
int rpal_disable_service(void);
int rpal_request_service(unsigned long arg);
int rpal_release_service(u64 key);
+void rpal_insert_wake_list(struct rpal_service *rs,
+ struct rpal_receiver_data *rrd);
+void rpal_remove_wake_list(struct rpal_service *rs,
+ struct rpal_receiver_data *rrd);
/* mm.c */
static inline struct rpal_shared_page *
diff --git a/arch/x86/rpal/service.c b/arch/x86/rpal/service.c
index 9fd568fa9a29..6fefb7a7729c 100644
--- a/arch/x86/rpal/service.c
+++ b/arch/x86/rpal/service.c
@@ -143,6 +143,99 @@ static void delete_service(struct rpal_service *rs)
spin_unlock_irqrestore(&hash_table_lock, flags);
}
+void rpal_insert_wake_list(struct rpal_service *rs,
+ struct rpal_receiver_data *rrd)
+{
+ unsigned long flags;
+ struct rpal_waker_struct *waker = &rs->waker;
+
+ spin_lock_irqsave(&waker->lock, flags);
+ list_add_tail(&rrd->wake_list, &waker->wake_head);
+ spin_unlock_irqrestore(&waker->lock, flags);
+ pr_debug("rpal debug: [%d] insert wake list\n", current->pid);
+}
+
+void rpal_remove_wake_list(struct rpal_service *rs,
+ struct rpal_receiver_data *rrd)
+{
+ unsigned long flags;
+ struct rpal_waker_struct *waker = &rs->waker;
+
+ spin_lock_irqsave(&waker->lock, flags);
+ list_del(&rrd->wake_list);
+ spin_unlock_irqrestore(&waker->lock, flags);
+ pr_debug("rpal debug: [%d] remove wake list\n", current->pid);
+}
+
+/* waker->lock must be hold */
+static inline void rpal_wake_all(struct rpal_waker_struct *waker)
+{
+ struct task_struct *wake_list[RPAL_MAX_RECEIVER_NUM];
+ struct list_head *list;
+ unsigned long flags;
+ int i, cnt = 0;
+
+ spin_lock_irqsave(&waker->lock, flags);
+ list_for_each(list, &waker->wake_head) {
+ struct task_struct *task;
+ struct rpal_receiver_call_context *rcc;
+ struct rpal_receiver_data *rrd;
+ int pending;
+
+ rrd = list_entry(list, struct rpal_receiver_data, wake_list);
+ task = rrd->rcd.bp_task;
+ rcc = rrd->rcc;
+
+ pending = atomic_read(&rcc->ep_pending) & RPAL_USER_PENDING;
+
+ if (rpal_test_task_thread_flag(task, RPAL_WAKE_BIT) ||
+ (pending && atomic_cmpxchg(&rcc->receiver_state,
+ RPAL_RECEIVER_STATE_WAIT,
+ RPAL_RECEIVER_STATE_RUNNING) ==
+ RPAL_RECEIVER_STATE_WAIT)) {
+ wake_list[cnt] = task;
+ cnt++;
+ }
+ }
+ spin_unlock_irqrestore(&waker->lock, flags);
+
+ for (i = 0; i < cnt; i++)
+ wake_up_process(wake_list[i]);
+}
+
+static void rpal_wake_callback(struct work_struct *work)
+{
+ struct rpal_waker_struct *waker =
+ container_of(work, struct rpal_waker_struct, waker_work.work);
+
+ rpal_wake_all(waker);
+ /* We check it every ticks */
+ schedule_delayed_work(&waker->waker_work, 1);
+}
+
+static void rpal_enable_waker(struct rpal_waker_struct *waker)
+{
+ INIT_DELAYED_WORK(&waker->waker_work, rpal_wake_callback);
+ schedule_delayed_work(&waker->waker_work, 1);
+ pr_debug("rpal debug: [%d] enable waker\n", current->pid);
+}
+
+static void rpal_disable_waker(struct rpal_waker_struct *waker)
+{
+ unsigned long flags;
+ struct list_head *p, *n;
+
+ cancel_delayed_work_sync(&waker->waker_work);
+ rpal_wake_all(waker);
+ spin_lock_irqsave(&waker->lock, flags);
+ list_for_each_safe(p, n, &waker->wake_head) {
+ list_del_init(p);
+ }
+ INIT_LIST_HEAD(&waker->wake_head);
+ spin_unlock_irqrestore(&waker->lock, flags);
+ pr_debug("rpal debug: [%d] disable waker\n", current->pid);
+}
+
static inline unsigned long calculate_base_address(int id)
{
return RPAL_ADDRESS_SPACE_LOW + RPAL_ADDR_SPACE_SIZE * id;
@@ -213,6 +306,10 @@ struct rpal_service *rpal_register_service(void)
rs->pku_on = PKU_ON_FALSE;
rpal_service_pku_init();
#endif
+ spin_lock_init(&rs->waker.lock);
+ INIT_LIST_HEAD(&rs->waker.wake_head);
+ /* receiver may miss wake up if in lazy switch, try to wake it later */
+ rpal_enable_waker(&rs->waker);
rs->bad_service = false;
rs->base = calculate_base_address(rs->id);
@@ -257,6 +354,7 @@ void rpal_unregister_service(struct rpal_service *rs)
schedule();
delete_service(rs);
+ rpal_disable_waker(&rs->waker);
pr_debug("rpal: unregister service, id: %d, tgid: %d\n", rs->id,
rs->group_leader->tgid);
diff --git a/arch/x86/rpal/thread.c b/arch/x86/rpal/thread.c
index fcc592baaac0..51c9eec639cb 100644
--- a/arch/x86/rpal/thread.c
+++ b/arch/x86/rpal/thread.c
@@ -186,6 +186,8 @@ int rpal_register_receiver(unsigned long addr)
current->rpal_rd = rrd;
rpal_set_current_thread_flag(RPAL_RECEIVER_BIT);
+ rpal_insert_wake_list(cur, rrd);
+
atomic_inc(&cur->thread_cnt);
return 0;
@@ -214,6 +216,7 @@ int rpal_unregister_receiver(void)
clear_fs_tsk_map();
rpal_put_shared_page(rrd->rsp);
+ rpal_remove_wake_list(cur, rrd);
rpal_clear_current_thread_flag(RPAL_RECEIVER_BIT);
rpal_free_thread_pending(&rrd->rcd);
kfree(rrd);
diff --git a/include/linux/rpal.h b/include/linux/rpal.h
index 16a3c80383f7..1d8c1bdc90f2 100644
--- a/include/linux/rpal.h
+++ b/include/linux/rpal.h
@@ -116,6 +116,7 @@ enum rpal_task_flag_bits {
RPAL_RECEIVER_BIT,
RPAL_CPU_LOCKED_BIT,
RPAL_LAZY_SWITCHED_BIT,
+ RPAL_WAKE_BIT,
};
enum rpal_receiver_state {
@@ -189,6 +190,12 @@ struct rpal_fsbase_tsk_map {
struct task_struct *tsk;
};
+struct rpal_waker_struct {
+ spinlock_t lock;
+ struct list_head wake_head;
+ struct delayed_work waker_work;
+};
+
/*
* Each RPAL process (a.k.a RPAL service) should have a pointer to
* struct rpal_service in all its tasks' task_struct.
@@ -255,6 +262,9 @@ struct rpal_service {
int pkey;
#endif
+ /* receiver thread waker */
+ struct rpal_waker_struct waker;
+
/* delayed service put work */
struct delayed_work delayed_put_work;
@@ -347,6 +357,7 @@ struct rpal_receiver_data {
struct fd f;
struct hrtimer_sleeper ep_sleeper;
wait_queue_entry_t ep_wait;
+ struct list_head wake_list;
};
struct rpal_sender_data {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 486d59bdd3fc..c219ada29d34 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3943,6 +3943,7 @@ static bool rpal_check_state(struct task_struct *p)
struct rpal_receiver_call_context *rcc = p->rpal_rd->rcc;
int state;
+ rpal_clear_task_thread_flag(p, RPAL_WAKE_BIT);
retry:
state = atomic_read(&rcc->receiver_state) & RPAL_RECEIVER_STATE_MASK;
switch (state) {
@@ -3957,6 +3958,7 @@ static bool rpal_check_state(struct task_struct *p)
case RPAL_RECEIVER_STATE_RUNNING:
break;
case RPAL_RECEIVER_STATE_CALL:
+ rpal_set_task_thread_flag(p, RPAL_WAKE_BIT);
ret = false;
break;
default:
@@ -4522,6 +4524,7 @@ int rpal_try_to_wake_up(struct task_struct *p)
BUG_ON(READ_ONCE(p->__state) == TASK_RUNNING);
+ rpal_clear_task_thread_flag(p, RPAL_WAKE_BIT);
scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
smp_mb__after_spinlock();
if (!ttwu_state_match(p, TASK_NORMAL, &success))
--
2.20.1
Powered by blists - more mailing lists