[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190626224718.21973-4-subhra.mazumdar@oracle.com>
Date: Wed, 26 Jun 2019 15:47:18 -0700
From: subhra mazumdar <subhra.mazumdar@...cle.com>
To: linux-kernel@...r.kernel.org
Cc: peterz@...radead.org, mingo@...hat.com, tglx@...utronix.de,
prakash.sangappa@...cle.com, dhaval.giani@...cle.com,
daniel.lezcano@...aro.org, vincent.guittot@...aro.org,
viresh.kumar@...aro.org, tim.c.chen@...ux.intel.com,
mgorman@...hsingularity.net
Subject: [RFC PATCH 3/3] sched: introduce tunables to control soft affinity
For different workloads the optimal "softness" of soft affinity can be
different. Introduce tunables sched_allowed and sched_preferred that can
be tuned via /proc. This allows to chose at what utilization difference
the scheduler will chose cpus_allowed over cpus_preferred in the first
level of search. Depending on the extent of data sharing, cache coherency
overhead of the system etc. the optimal point may vary.
Signed-off-by: subhra mazumdar <subhra.mazumdar@...cle.com>
---
include/linux/sched/sysctl.h | 2 ++
kernel/sched/fair.c | 19 ++++++++++++++++++-
kernel/sched/sched.h | 2 ++
kernel/sysctl.c | 14 ++++++++++++++
4 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 99ce6d7..0e75602 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -41,6 +41,8 @@ extern unsigned int sysctl_numa_balancing_scan_size;
#ifdef CONFIG_SCHED_DEBUG
extern __read_mostly unsigned int sysctl_sched_migration_cost;
extern __read_mostly unsigned int sysctl_sched_nr_migrate;
+extern __read_mostly unsigned int sysctl_sched_preferred;
+extern __read_mostly unsigned int sysctl_sched_allowed;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 53aa7f2..d222d78 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -85,6 +85,8 @@ unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+const_debug unsigned int sysctl_sched_preferred = 1UL;
+const_debug unsigned int sysctl_sched_allowed = 100UL;
#ifdef CONFIG_SMP
/*
@@ -6739,7 +6741,22 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
int new_cpu = prev_cpu;
int want_affine = 0;
int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
- struct cpumask *cpus = &p->cpus_preferred;
+ int cpux, cpuy;
+ struct cpumask *cpus;
+
+ if (!p->affinity_unequal) {
+ cpus = &p->cpus_allowed;
+ } else {
+ cpux = cpumask_any(&p->cpus_preferred);
+ cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ cpumask_andnot(cpus, &p->cpus_allowed, &p->cpus_preferred);
+ cpuy = cpumask_any(cpus);
+ if (sysctl_sched_preferred * cpu_rq(cpux)->cfs.avg.util_avg >
+ sysctl_sched_allowed * cpu_rq(cpuy)->cfs.avg.util_avg)
+ cpus = &p->cpus_allowed;
+ else
+ cpus = &p->cpus_preferred;
+ }
if (sd_flag & SD_BALANCE_WAKE) {
record_wakee(p);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b52ed1a..f856bdb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1863,6 +1863,8 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost;
+extern const_debug unsigned int sysctl_sched_preferred;
+extern const_debug unsigned int sysctl_sched_allowed;
#ifdef CONFIG_SCHED_HRTICK
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7d1008b..bdffb48 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -383,6 +383,20 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "sched_preferred",
+ .data = &sysctl_sched_preferred,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_allowed",
+ .data = &sysctl_sched_allowed,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_SCHEDSTATS
{
.procname = "sched_schedstats",
--
2.9.3
Powered by blists - more mailing lists