lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1641894961-9241-3-git-send-email-CruzZhao@linux.alibaba.com>
Date:   Tue, 11 Jan 2022 17:56:00 +0800
From:   Cruz Zhao <CruzZhao@...ux.alibaba.com>
To:     tj@...nel.org, lizefan.x@...edance.com, hannes@...xchg.org,
        mingo@...hat.com, peterz@...radead.org, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
        bristot@...hat.com, joshdon@...gle.com
Cc:     cgroups@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [PATCH v2 2/3] sched/core: Forced idle accounting per-cpu

Accounting for "forced idle" time per cpu, which is the time a cpu is
forced to idle by its SMT sibling.

As it's not accurate to measure the capacity loss only by cookie'd forced
idle time, and it's hard to trace the forced idle time caused by all the
uncookie'd tasks, we account the forced idle time from the perspective of
the cpu.

Forced idle time per cpu is displayed via /proc/schedstat, we can get the
forced idle time of cpu x from the 10th column of the row for cpu x. The
unit is ns. It also requires that schedstats is enabled.

Signed-off-by: Cruz Zhao <CruzZhao@...ux.alibaba.com>
---
 kernel/sched/core.c       |  7 ++++++-
 kernel/sched/core_sched.c |  7 +++++--
 kernel/sched/sched.h      |  4 ++++
 kernel/sched/stats.c      | 17 +++++++++++++++--
 4 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e8187e7..a224b916 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -285,8 +285,10 @@ static void __sched_core_flip(bool enabled)
 
 		sched_core_lock(cpu, &flags);
 
-		for_each_cpu(t, smt_mask)
+		for_each_cpu(t, smt_mask) {
 			cpu_rq(t)->core_enabled = enabled;
+			cpu_rq(t)->in_forcedidle = false;
+		}
 
 		cpu_rq(cpu)->core->core_forceidle_start = 0;
 
@@ -5690,6 +5692,7 @@ static inline struct task_struct *pick_task(struct rq *rq)
 		 * another cpu during offline.
 		 */
 		rq->core_pick = NULL;
+		rq->in_forcedidle = false;
 		return __pick_next_task(rq, prev, rf);
 	}
 
@@ -5810,9 +5813,11 @@ static inline struct task_struct *pick_task(struct rq *rq)
 
 		rq_i->core_pick = p;
 
+		rq->in_forcedidle = false;
 		if (p == rq_i->idle) {
 			if (rq_i->nr_running) {
 				rq->core->core_forceidle_count++;
+				rq_i->in_forcedidle = true;
 				if (!fi_before)
 					rq->core->core_forceidle_seq++;
 			}
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index c8746a9..fe04805 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -242,7 +242,7 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
 void __sched_core_account_forceidle(struct rq *rq)
 {
 	const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
-	u64 delta, now = rq_clock(rq->core);
+	u64 delta_per_idlecpu, delta, now = rq_clock(rq->core);
 	struct rq *rq_i;
 	struct task_struct *p;
 	int i;
@@ -254,7 +254,7 @@ void __sched_core_account_forceidle(struct rq *rq)
 	if (rq->core->core_forceidle_start == 0)
 		return;
 
-	delta = now - rq->core->core_forceidle_start;
+	delta_per_idlecpu = delta = now - rq->core->core_forceidle_start;
 	if (unlikely((s64)delta <= 0))
 		return;
 
@@ -277,6 +277,9 @@ void __sched_core_account_forceidle(struct rq *rq)
 		rq_i = cpu_rq(i);
 		p = rq_i->core_pick ?: rq_i->curr;
 
+		if (rq_i->in_forcedidle)
+			rq->rq_forceidle_time += delta_per_idlecpu;
+
 		if (p == rq_i->idle)
 			continue;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index de53be9..9377d91 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1086,6 +1086,9 @@ struct rq {
 	/* try_to_wake_up() stats */
 	unsigned int		ttwu_count;
 	unsigned int		ttwu_local;
+#ifdef CONFIG_SCHED_CORE
+	u64			rq_forceidle_time;
+#endif
 #endif
 
 #ifdef CONFIG_CPU_IDLE
@@ -1115,6 +1118,7 @@ struct rq {
 	unsigned int		core_forceidle_seq;
 	unsigned int		core_forceidle_occupation;
 	u64			core_forceidle_start;
+	bool			in_forcedidle;
 #endif
 };
 
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index 07dde29..ea22a8c 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -108,6 +108,16 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
 	}
 }
 
+#ifdef CONFIG_SCHED_CORE
+static inline u64 get_rq_forceidle_time(struct rq *rq) {
+	return rq->rq_forceidle_time;
+}
+#else
+static inline u64 get_rq_forceidle_time(struct rq *rq) {
+	return 0;
+}
+#endif
+
 /*
  * Current schedstat API version.
  *
@@ -125,21 +135,24 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		seq_printf(seq, "timestamp %lu\n", jiffies);
 	} else {
 		struct rq *rq;
+		u64 rq_forceidle_time;
 #ifdef CONFIG_SMP
 		struct sched_domain *sd;
 		int dcount = 0;
 #endif
 		cpu = (unsigned long)(v - 2);
 		rq = cpu_rq(cpu);
+		rq_forceidle_time = get_rq_forceidle_time(rq);
 
 		/* runqueue-specific stats */
 		seq_printf(seq,
-		    "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
+		    "cpu%d %u 0 %u %u %u %u %llu %llu %lu %llu",
 		    cpu, rq->yld_count,
 		    rq->sched_count, rq->sched_goidle,
 		    rq->ttwu_count, rq->ttwu_local,
 		    rq->rq_cpu_time,
-		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
+		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount,
+		    rq_forceidle_time);
 
 		seq_printf(seq, "\n");
 
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ