[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231216001801.3015832-2-chris.hyser@oracle.com>
Date: Fri, 15 Dec 2023 19:18:00 -0500
From: chris hyser <chris.hyser@...cle.com>
To: "Chris Hyser" <chris.hyser@...cle.com>,
"Peter Zijlstra" <peterz@...radead.org>,
"Mel Gorman" <mgorman@...e.de>, linux-kernel@...r.kernel.org
Cc: "Konrad Wilk" <konrad.wilk@...cle.com>
Subject: [RFC/POC 1/2] sched/numa: Adds ability to over-ride a tasks numa_preferred_nid.
EXPERIMENTAL - NOT INTENDED FOR SUBMISSION
This "patch" is a proof of concept for over-riding a tasks "Preferred Node
Affinity". It is not intended for submission, but simply to show the code
used in generating the included results.
Signed-off-by: Chris Hyser <chris.hyser@...cle.com>
---
include/linux/sched.h | 1 +
init/init_task.c | 1 +
kernel/sched/core.c | 5 ++++-
kernel/sched/debug.c | 1 +
kernel/sched/fair.c | 17 ++++++++++++++---
5 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d258162deb0..6e7290468fa5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1279,6 +1279,7 @@ struct task_struct {
short pref_node_fork;
#endif
#ifdef CONFIG_NUMA_BALANCING
+ int numa_preferred_nid_force;
int numa_scan_seq;
unsigned int numa_scan_period;
unsigned int numa_scan_period_max;
diff --git a/init/init_task.c b/init/init_task.c
index 5727d42149c3..a1797037af7e 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -174,6 +174,7 @@ struct task_struct init_task
.vtime.state = VTIME_SYS,
#endif
#ifdef CONFIG_NUMA_BALANCING
+ .numa_preferred_nid_force = NUMA_NO_NODE,
.numa_preferred_nid = NUMA_NO_NODE,
.numa_group = NULL,
.numa_faults = NULL,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index db4be4921e7f..1444dd0207aa 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9380,7 +9380,10 @@ void sched_setnuma(struct task_struct *p, int nid)
if (running)
put_prev_task(rq, p);
- p->numa_preferred_nid = nid;
+ if (p->numa_preferred_nid_force != NUMA_NO_NODE)
+ p->numa_preferred_nid = p->numa_preferred_nid_force;
+ else
+ p->numa_preferred_nid = nid;
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 168eecc209b4..fecf529c9dc7 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -978,6 +978,7 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
P(mm->numa_scan_seq);
P(numa_pages_migrated);
+ P(numa_preferred_nid_force);
P(numa_preferred_nid);
P(total_numa_faults);
SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bcea3d55d95d..988b3285f40c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2541,9 +2541,14 @@ static void numa_migrate_preferred(struct task_struct *p)
unsigned long interval = HZ;
/* This task has no NUMA fault statistics yet */
- if (unlikely(p->numa_preferred_nid == NUMA_NO_NODE || !p->numa_faults))
+ if (unlikely(p->numa_preferred_nid == NUMA_NO_NODE))
return;
+ if (p->numa_preferred_nid_force == NUMA_NO_NODE) {
+ if (unlikely(!p->numa_faults))
+ return;
+ }
+
/* Periodically retry migrating the task to the preferred node */
interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
p->numa_migrate_retry = jiffies + interval;
@@ -3462,7 +3467,10 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
/* New address space, reset the preferred nid */
if (!(clone_flags & CLONE_VM)) {
- p->numa_preferred_nid = NUMA_NO_NODE;
+ if (p->numa_preferred_nid_force == NUMA_NO_NODE)
+ p->numa_preferred_nid = NUMA_NO_NODE;
+ else
+ p->numa_preferred_nid = p->numa_preferred_nid_force;
return;
}
@@ -8828,7 +8836,10 @@ static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
if (!static_branch_likely(&sched_numa_balancing))
return -1;
- if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
+ if (p->numa_preferred_nid_force == NUMA_NO_NODE && !p->numa_faults)
+ return -1;
+
+ if (!(env->sd->flags & SD_NUMA))
return -1;
src_nid = cpu_to_node(env->src_cpu);
--
2.39.3
Powered by blists - more mailing lists