[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241202140735.56368-3-gmonaco@redhat.com>
Date: Mon, 2 Dec 2024 15:07:35 +0100
From: Gabriele Monaco <gmonaco@...hat.com>
To: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
linux-kernel@...r.kernel.org
Cc: Gabriele Monaco <gmonaco@...hat.com>
Subject: [PATCH 2/2] sched: Move task_mm_cid_work to RCU callback
Currently, the task_mm_cid_work function is called in a task work
triggered by a scheduler tick. This can delay the execution of the
task for the entire duration of the function.
This patch runs the task_mm_cid_work in the RCU callback thread rather
than in the task context before returning to userspace.
The main advantage of this change is that the function can be offloaded
to a different CPU and even preempted by RT tasks.
On a busy system, this may mean the function gets called less often, but
the current behaviour already doesn't provide guarantees.
Signed-off-by: Gabriele Monaco <gmonaco@...hat.com>
---
include/linux/sched.h | 1 -
kernel/sched/core.c | 17 ++++++-----------
2 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d380bffee2ef..5d141c310917 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1374,7 +1374,6 @@ struct task_struct {
int last_mm_cid; /* Most recent cid in mm */
int migrate_from_cpu;
int mm_cid_active; /* Whether cid bitmap is active */
- struct callback_head cid_work;
#endif
struct tlbflush_unmap_batch tlb_ubc;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 57b50b5952fa..0fc1a972fd4f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10520,17 +10520,15 @@ static void sched_mm_cid_remote_clear_weight(struct mm_struct *mm, int cpu,
sched_mm_cid_remote_clear(mm, pcpu_cid, cpu);
}
-static void task_mm_cid_work(struct callback_head *work)
+static void task_mm_cid_work(struct rcu_head *rhp)
{
unsigned long now = jiffies, old_scan, next_scan;
- struct task_struct *t = current;
+ struct task_struct *t = container_of(rhp, struct task_struct, rcu);
struct cpumask *cidmask;
struct mm_struct *mm;
int weight, cpu;
- SCHED_WARN_ON(t != container_of(work, struct task_struct, cid_work));
-
- work->next = work; /* Prevent double-add */
+ rhp->next = rhp; /* Prevent double-add */
if (t->flags & PF_EXITING)
return;
mm = t->mm;
@@ -10574,23 +10572,20 @@ void init_sched_mm_cid(struct task_struct *t)
if (mm_users == 1)
mm->mm_cid_next_scan = jiffies + msecs_to_jiffies(MM_CID_SCAN_DELAY);
}
- t->cid_work.next = &t->cid_work; /* Protect against double add */
- init_task_work(&t->cid_work, task_mm_cid_work);
}
void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
{
- struct callback_head *work = &curr->cid_work;
+ struct rcu_head *rhp = &curr->rcu;
unsigned long now = jiffies;
if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) ||
- work->next != work)
+ rhp->next != rhp)
return;
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
return;
- /* No page allocation under rq lock */
- task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC);
+ call_rcu(rhp, task_mm_cid_work);
}
void sched_mm_cid_exit_signals(struct task_struct *t)
--
2.47.0
Powered by blists - more mailing lists