lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240228192355.290114-2-axboe@kernel.dk>
Date: Wed, 28 Feb 2024 12:16:56 -0700
From: Jens Axboe <axboe@...nel.dk>
To: linux-kernel@...r.kernel.org
Cc: peterz@...radead.org,
	mingo@...hat.com,
	Jens Axboe <axboe@...nel.dk>
Subject: [PATCH 1/2] sched/core: switch struct rq->nr_iowait to a normal int

In 3 of the 4 spots where we modify rq->nr_iowait we already hold the
rq lock, and hence don't need atomics to modify the current per-rq
iowait count. In the 4th case, where we are scheduling in on a different
CPU than the task was previously on, we do not hold the previous rq lock,
and hence still need to use an atomic to increment the iowait count.

Rename the existing nr_iowait to nr_iowait_remote, and use that for the
4th case. The other three cases can simply inc/dec in a non-atomic
fashion under the held rq lock.

The per-rq iowait now becomes the difference between the two, the local
count minus the remote count.

Signed-off-by: Jens Axboe <axboe@...nel.dk>
---
 kernel/sched/core.c    | 15 ++++++++++-----
 kernel/sched/cputime.c |  3 +--
 kernel/sched/sched.h   |  8 +++++++-
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..48d15529a777 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3789,7 +3789,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 #endif
 	if (p->in_iowait) {
 		delayacct_blkio_end(p);
-		atomic_dec(&task_rq(p)->nr_iowait);
+		task_rq(p)->nr_iowait--;
 	}
 
 	activate_task(rq, p, en_flags);
@@ -4354,8 +4354,10 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 		cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
 		if (task_cpu(p) != cpu) {
 			if (p->in_iowait) {
+				struct rq *__rq = task_rq(p);
+
 				delayacct_blkio_end(p);
-				atomic_dec(&task_rq(p)->nr_iowait);
+				atomic_inc(&__rq->nr_iowait_remote);
 			}
 
 			wake_flags |= WF_MIGRATED;
@@ -5463,7 +5465,9 @@ unsigned long long nr_context_switches(void)
 
 unsigned int nr_iowait_cpu(int cpu)
 {
-	return atomic_read(&cpu_rq(cpu)->nr_iowait);
+	struct rq *rq = cpu_rq(cpu);
+
+	return rq->nr_iowait - atomic_read(&rq->nr_iowait_remote);
 }
 
 /*
@@ -6681,7 +6685,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
 			deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
 
 			if (prev->in_iowait) {
-				atomic_inc(&rq->nr_iowait);
+				rq->nr_iowait++;
 				delayacct_blkio_start();
 			}
 		}
@@ -10029,7 +10033,8 @@ void __init sched_init(void)
 #endif
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
-		atomic_set(&rq->nr_iowait, 0);
+		rq->nr_iowait = 0;
+		atomic_set(&rq->nr_iowait_remote, 0);
 
 #ifdef CONFIG_SCHED_CORE
 		rq->core = rq;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index af7952f12e6c..0ed81c2d3c3b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -222,9 +222,8 @@ void account_steal_time(u64 cputime)
 void account_idle_time(u64 cputime)
 {
 	u64 *cpustat = kcpustat_this_cpu->cpustat;
-	struct rq *rq = this_rq();
 
-	if (atomic_read(&rq->nr_iowait) > 0)
+	if (nr_iowait_cpu(smp_processor_id()) > 0)
 		cpustat[CPUTIME_IOWAIT] += cputime;
 	else
 		cpustat[CPUTIME_IDLE] += cputime;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 001fe047bd5d..91fa5b4d45ed 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1049,7 +1049,13 @@ struct rq {
 	u64			clock_idle_copy;
 #endif
 
-	atomic_t		nr_iowait;
+	/*
+	 * Total per-cpu iowait is the difference of the two below. One is
+	 * modified under the rq lock (nr_iowait), and if we don't have the rq
+	 * lock, then nr_iowait_remote is used.
+	 */
+	unsigned int		nr_iowait;
+	atomic_t		nr_iowait_remote;
 
 #ifdef CONFIG_SCHED_DEBUG
 	u64 last_seen_need_resched_ns;
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ