lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Fri, 08 Oct 2021 07:06:51 +0200
From:   Mike Galbraith <efault@....de>
To:     Mel Gorman <mgorman@...hsingularity.net>
Cc:     Barry Song <21cnbao@...il.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...nel.org>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Valentin Schneider <valentin.schneider@....com>,
        Aubrey Li <aubrey.li@...ux.intel.com>,
        Barry Song <song.bao.hua@...ilicon.com>,
        Srikar Dronamraju <srikar@...ux.vnet.ibm.com>,
        LKML <linux-kernel@...r.kernel.org>
Subject: Re: wakeup_affine_weight() is b0rked - was Re: [PATCH 2/2]
 sched/fair: Scale wakeup granularity relative to nr_running

On Tue, 2021-10-05 at 10:31 +0100, Mel Gorman wrote:
> Ideally, I would do some tracing to confirm that maximum runqueue depth
> is really reduced by the path.

I would expect your worst case to remain unchanged, mine does.  The
patch mitigates, it does not eradicate.

I dug up a late 2016 mitigation patch, wedged it into 2021 and added a
BFH that does eradicate my stacking depth woes.  I'll probably keep it,
at least for a while. Not because I feel anything in my desktop, rather
because meeting this again (and it being deeper than I recall) reminded
me of measuring impact on NFS etc, making it a tad difficult to ignore.
Oh well, I'll forget about it eventually.. BTDT.

(standard beloved Granny disclaimer)

sched: Add SIS stacking mitigation feature

Select the least loaded LLC CPU for cache cold tasks and kthreads.

Addendum: renamed feature, and give it a big brother.

Not-Signed-off-by: Mike Galbraith <efault@....de>
---
 kernel/sched/fair.c     |   54 ++++++++++++++++++++++++++++++++++++++++++++----
 kernel/sched/features.h |    5 ++++
 2 files changed, 55 insertions(+), 4 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6261,6 +6261,26 @@ static inline int select_idle_smt(struct

 #endif /* CONFIG_SCHED_SMT */

+static bool task_is_kthread_or_cold(struct task_struct *p)
+{
+	s64 cold = sysctl_sched_migration_cost;
+
+	if (p->flags & PF_KTHREAD)
+		return true;
+	if (cold <= 0)
+		return false;
+	return task_rq(p)->clock_task - p->se.exec_start > cold;
+}
+
+static bool cpu_load_inconsistent(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	if (rq->cfs.h_nr_running < 4)
+		return false;
+	return cpu_load(rq) << 2 < scale_load_down(rq->cfs.load.weight);
+}
+
 /*
  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
@@ -6269,7 +6289,7 @@ static inline int select_idle_smt(struct
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int i, cpu, idle_cpu = -1, nr = INT_MAX;
+	int i, cpu, idle_cpu = -1, nr = INT_MAX, ld = -1;
 	struct rq *this_rq = this_rq();
 	int this = smp_processor_id();
 	struct sched_domain *this_sd;
@@ -6309,6 +6329,21 @@ static int select_idle_cpu(struct task_s
 		time = cpu_clock(this);
 	}

+	/*
+	 * Select the least loaded CPU for kthreads and cache cold tasks
+	 * if no idle CPU is found.
+	 */
+	if ((sched_feat(SIS_SPOT) && task_is_kthread_or_cold(p)) ||
+	    (sched_feat(SIS_REXY) && cpu_load_inconsistent(target))) {
+		idle_cpu = task_cpu(p);
+		if (idle_cpu != target && !cpus_share_cache(idle_cpu, target))
+			idle_cpu = target;
+		if (unlikely(!sched_cpu_cookie_match(cpu_rq(idle_cpu), p)))
+			idle_cpu = -1;
+		else
+			ld = scale_load_down(cpu_rq(idle_cpu)->cfs.load.weight);
+	}
+
 	for_each_cpu_wrap(cpu, cpus, target + 1) {
 		if (has_idle_core) {
 			i = select_idle_core(p, cpu, cpus, &idle_cpu);
@@ -6317,10 +6352,21 @@ static int select_idle_cpu(struct task_s

 		} else {
 			if (!--nr)
-				return -1;
-			idle_cpu = __select_idle_cpu(cpu, p);
-			if ((unsigned int)idle_cpu < nr_cpumask_bits)
+				return idle_cpu;
+			i = __select_idle_cpu(cpu, p);
+			if ((unsigned int)i < nr_cpumask_bits) {
+				idle_cpu = i;
 				break;
+			}
+		}
+		if (ld > 0 && sched_cpu_cookie_match(cpu_rq(cpu), p)) {
+			i = scale_load_down(cpu_rq(cpu)->cfs.load.weight);
+			if (i < ld) {
+				idle_cpu = cpu;
+				if (i == 0)
+					break;
+				ld = i;
+			}
 		}
 	}

--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -95,3 +95,8 @@ SCHED_FEAT(LATENCY_WARN, false)

 SCHED_FEAT(ALT_PERIOD, true)
 SCHED_FEAT(BASE_SLICE, true)
+
+/* Mitigate PELT induced stacking.  */
+SCHED_FEAT(SIS_SPOT, true)
+/* Spot's 12 ton big brother. */
+SCHED_FEAT(SIS_REXY, true)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ