[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230208073533.715-4-bharata@amd.com>
Date: Wed, 8 Feb 2023 13:05:31 +0530
From: Bharata B Rao <bharata@....com>
To: <linux-kernel@...r.kernel.org>, <linux-mm@...ck.org>
CC: <mgorman@...e.de>, <peterz@...radead.org>, <mingo@...hat.com>,
<bp@...en8.de>, <dave.hansen@...ux.intel.com>, <x86@...nel.org>,
<akpm@...ux-foundation.org>, <luto@...nel.org>,
<tglx@...utronix.de>, <yue.li@...verge.com>,
<Ravikumar.Bangoria@....com>, Bharata B Rao <bharata@....com>
Subject: [RFC PATCH 3/5] x86/ibs: Enable per-process IBS from sched switch path
Program IBS for access profiling for threads from the
task sched switch path. IBS is programmed with a period
that corresponds to the incoming thread. Kernel threads are
excluded from this.
The sample period is currently kept at a fixed value of 10000.
Signed-off-by: Bharata B Rao <bharata@....com>
---
arch/x86/mm/ibs.c | 27 +++++++++++++++++++++++++++
include/linux/sched.h | 1 +
kernel/sched/core.c | 1 +
kernel/sched/fair.c | 1 +
kernel/sched/sched.h | 5 +++++
5 files changed, 35 insertions(+)
diff --git a/arch/x86/mm/ibs.c b/arch/x86/mm/ibs.c
index adbc587b1767..a479029e9262 100644
--- a/arch/x86/mm/ibs.c
+++ b/arch/x86/mm/ibs.c
@@ -8,6 +8,7 @@
#include <asm/perf_event.h> /* TODO: Move defns like IBS_OP_ENABLE into non-perf header */
#include <asm/apic.h>
+#define IBS_SAMPLE_PERIOD 10000
static u64 ibs_config __read_mostly;
struct ibs_access_work {
@@ -15,6 +16,31 @@ struct ibs_access_work {
u64 laddr, paddr;
};
+void hw_access_sched_in(struct task_struct *prev, struct task_struct *curr)
+{
+ u64 config = 0;
+ unsigned int period;
+
+ if (!static_branch_unlikely(&hw_access_hints))
+ return;
+
+ /* Disable IBS for kernel thread */
+ if (!curr->mm)
+ goto out;
+
+ if (curr->numa_sample_period)
+ period = curr->numa_sample_period;
+ else
+ period = IBS_SAMPLE_PERIOD;
+
+
+ config = (period >> 4) & IBS_OP_MAX_CNT;
+ config |= (period & IBS_OP_MAX_CNT_EXT_MASK);
+ config |= ibs_config;
+out:
+ wrmsrl(MSR_AMD64_IBSOPCTL, config);
+}
+
void task_ibs_access_work(struct callback_head *work)
{
struct ibs_access_work *iwork = container_of(work, struct ibs_access_work, work);
@@ -198,6 +224,7 @@ int __init ibs_access_profiling_init(void)
x86_amd_ibs_access_profile_startup,
x86_amd_ibs_access_profile_teardown);
+ static_branch_enable(&hw_access_hints);
pr_info("IBS access profiling setup for NUMA Balancing\n");
return 0;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 19dd4ee07436..66c532418d38 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1254,6 +1254,7 @@ struct task_struct {
int numa_scan_seq;
unsigned int numa_scan_period;
unsigned int numa_scan_period_max;
+ unsigned int numa_sample_period;
int numa_preferred_nid;
unsigned long numa_migrate_retry;
/* Migration stamp: */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e838feb6adc5..1c13fed8bebc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5165,6 +5165,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
prev_state = READ_ONCE(prev->__state);
vtime_task_switch(prev);
perf_event_task_sched_in(prev, current);
+ hw_access_sched_in(prev, current);
finish_task(prev);
tick_nohz_task_switch();
finish_lock_switch(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c9b9e62da779..3f617c799821 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3094,6 +3094,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
p->node_stamp = 0;
p->numa_scan_seq = mm ? mm->numa_scan_seq : 0;
p->numa_scan_period = sysctl_numa_balancing_scan_delay;
+ p->numa_sample_period = 0;
p->numa_migrate_retry = 0;
/* Protect against double add, see task_tick_numa and task_numa_work */
p->numa_work.next = &p->numa_work;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 771f8ddb7053..953d16c802d6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1723,11 +1723,16 @@ extern int migrate_task_to(struct task_struct *p, int cpu);
extern int migrate_swap(struct task_struct *p, struct task_struct *t,
int cpu, int scpu);
extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
+void hw_access_sched_in(struct task_struct *prev, struct task_struct *curr);
#else
static inline void
init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
{
}
+static inline void hw_access_sched_in(struct task_struct *prev,
+ struct task_struct *curr)
+{
+}
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_SMP
--
2.25.1
Powered by blists - more mailing lists