linux-kernel - [PATCH 2/2] sched: Move task_mm_cid

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20241202140735.56368-3-gmonaco@redhat.com>
Date: Mon,  2 Dec 2024 15:07:35 +0100
From: Gabriele Monaco <gmonaco@...hat.com>
To: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Ingo Molnar <mingo@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Juri Lelli <juri.lelli@...hat.com>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>,
	linux-kernel@...r.kernel.org
Cc: Gabriele Monaco <gmonaco@...hat.com>
Subject: [PATCH 2/2] sched: Move task_mm_cid_work to RCU callback

Currently, the task_mm_cid_work function is called in a task work
triggered by a scheduler tick. This can delay the execution of the
task for the entire duration of the function.

This patch runs the task_mm_cid_work in the RCU callback thread rather
than in the task context before returning to userspace.

The main advantage of this change is that the function can be offloaded
to a different CPU and even preempted by RT tasks.

On a busy system, this may mean the function gets called less often, but
the current behaviour already doesn't provide guarantees.

Signed-off-by: Gabriele Monaco <gmonaco@...hat.com>
---
 include/linux/sched.h |  1 -
 kernel/sched/core.c   | 17 ++++++-----------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d380bffee2ef..5d141c310917 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1374,7 +1374,6 @@ struct task_struct {
 	int				last_mm_cid;	/* Most recent cid in mm */
 	int				migrate_from_cpu;
 	int				mm_cid_active;	/* Whether cid bitmap is active */
-	struct callback_head		cid_work;
 #endif
 
 	struct tlbflush_unmap_batch	tlb_ubc;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 57b50b5952fa..0fc1a972fd4f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10520,17 +10520,15 @@ static void sched_mm_cid_remote_clear_weight(struct mm_struct *mm, int cpu,
 	sched_mm_cid_remote_clear(mm, pcpu_cid, cpu);
 }
 
-static void task_mm_cid_work(struct callback_head *work)
+static void task_mm_cid_work(struct rcu_head *rhp)
 {
 	unsigned long now = jiffies, old_scan, next_scan;
-	struct task_struct *t = current;
+	struct task_struct *t = container_of(rhp, struct task_struct, rcu);
 	struct cpumask *cidmask;
 	struct mm_struct *mm;
 	int weight, cpu;
 
-	SCHED_WARN_ON(t != container_of(work, struct task_struct, cid_work));
-
-	work->next = work;	/* Prevent double-add */
+	rhp->next = rhp;	/* Prevent double-add */
 	if (t->flags & PF_EXITING)
 		return;
 	mm = t->mm;
@@ -10574,23 +10572,20 @@ void init_sched_mm_cid(struct task_struct *t)
 		if (mm_users == 1)
 			mm->mm_cid_next_scan = jiffies + msecs_to_jiffies(MM_CID_SCAN_DELAY);
 	}
-	t->cid_work.next = &t->cid_work;	/* Protect against double add */
-	init_task_work(&t->cid_work, task_mm_cid_work);
 }
 
 void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
 {
-	struct callback_head *work = &curr->cid_work;
+	struct rcu_head *rhp = &curr->rcu;
 	unsigned long now = jiffies;
 
 	if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) ||
-	    work->next != work)
+	    rhp->next != rhp)
 		return;
 	if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
 		return;
 
-	/* No page allocation under rq lock */
-	task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC);
+	call_rcu(rhp, task_mm_cid_work);
 }
 
 void sched_mm_cid_exit_signals(struct task_struct *t)
-- 
2.47.0