lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 14 Jun 2023 10:22:23 +0000
From:   Swapnil Sapkal <swapnil.sapkal@....com>
To:     <mingo@...hat.com>, <peterz@...radead.org>,
        <juri.lelli@...hat.com>, <vincent.guittot@...aro.org>
CC:     <dietmar.eggemann@....com>, <rostedt@...dmis.org>,
        <bsegall@...gle.com>, <mgorman@...e.de>, <bristot@...hat.com>,
        <vschneid@...hat.com>, <iamjoonsoo.kim@....com>,
        <linux-kernel@...r.kernel.org>, <gautham.shenoy@....com>,
        <kprateek.nayak@....com>, <wyes.karny@....com>,
        Swapnil Sapkal <swapnil.sapkal@....com>
Subject: [PATCH 1/2] sched/fair: Fix value reported by hot tasks pulled in /proc/schedstat

In /proc/schedstat, lb_hot_gained reports the number hot tasks pulled
during load balance. This value is incremented in can_migrate_task()
if the task is migratable and hot. After incrementing the value,
load balancer can still decide not to migrate this task leading to wrong
accounting. Fix this by incrementing stats when hot tasks are detached.
This issue only exits in detach_tasks() where we can decide to not
migrate hot task even if it is migratable. However, in detach_one_task(),
we migrate it unconditionally.

Fixes: d31980846f96 ("sched: Move up affinity check to mitigate useless redoing overhead")
Reported-by: Gautham R. Shenoy <gautham.shenoy@....com>
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@....com>
---
 kernel/sched/fair.c | 47 +++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 373ff5f55884..9a8e5dcbe7e6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8507,9 +8507,9 @@ static inline int migrate_degrades_locality(struct task_struct *p,
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
-int can_migrate_task(struct task_struct *p, struct lb_env *env)
+int can_migrate_task(struct task_struct *p, struct lb_env *env, int *tsk_cache_hot)
 {
-	int tsk_cache_hot;
+	int degrades_locality;
 
 	lockdep_assert_rq_held(env->src_rq);
 
@@ -8578,18 +8578,19 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	if (env->flags & LBF_ACTIVE_LB)
 		return 1;
 
-	tsk_cache_hot = migrate_degrades_locality(p, env);
-	if (tsk_cache_hot == -1)
-		tsk_cache_hot = task_hot(p, env);
+	degrades_locality = migrate_degrades_locality(p, env);
+	if (degrades_locality == -1)
+		*tsk_cache_hot = task_hot(p, env);
+	else
+		*tsk_cache_hot = degrades_locality;
 
-	if (tsk_cache_hot <= 0 ||
-	    env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
-		if (tsk_cache_hot == 1) {
-			schedstat_inc(env->sd->lb_hot_gained[env->idle]);
-			schedstat_inc(p->stats.nr_forced_migrations);
-		}
+	/*
+	 * Can migrate a hot task only after the attempts to reach balance
+	 * without the task have exceeded the cache_nice_tries threshold.
+	 */
+	if (!(*tsk_cache_hot) ||
+		env->sd->nr_balance_failed > env->sd->cache_nice_tries)
 		return 1;
-	}
 
 	schedstat_inc(p->stats.nr_failed_migrations_hot);
 	return 0;
@@ -8598,10 +8599,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 /*
  * detach_task() -- detach the task for the migration specified in env
  */
-static void detach_task(struct task_struct *p, struct lb_env *env)
+static void detach_task(struct task_struct *p, struct lb_env *env, int tsk_cache_hot)
 {
 	lockdep_assert_rq_held(env->src_rq);
 
+	if (tsk_cache_hot == 1) {
+		schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+		schedstat_inc(p->stats.nr_forced_migrations);
+	}
+
 	deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
 	set_task_cpu(p, env->dst_cpu);
 }
@@ -8620,10 +8626,12 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 
 	list_for_each_entry_reverse(p,
 			&env->src_rq->cfs_tasks, se.group_node) {
-		if (!can_migrate_task(p, env))
+		int tsk_cache_hot = 0;
+
+		if (!can_migrate_task(p, env, &tsk_cache_hot))
 			continue;
 
-		detach_task(p, env);
+		detach_task(p, env, tsk_cache_hot);
 
 		/*
 		 * Right now, this is only the second place where
@@ -8665,6 +8673,8 @@ static int detach_tasks(struct lb_env *env)
 		return 0;
 
 	while (!list_empty(tasks)) {
+		int tsk_cache_hot = 0;
+
 		/*
 		 * We don't want to steal all, otherwise we may be treated likewise,
 		 * which could at worst lead to a livelock crash.
@@ -8690,7 +8700,7 @@ static int detach_tasks(struct lb_env *env)
 
 		p = list_last_entry(tasks, struct task_struct, se.group_node);
 
-		if (!can_migrate_task(p, env))
+		if (!can_migrate_task(p, env, &tsk_cache_hot))
 			goto next;
 
 		switch (env->migration_type) {
@@ -8742,7 +8752,7 @@ static int detach_tasks(struct lb_env *env)
 			break;
 		}
 
-		detach_task(p, env);
+		detach_task(p, env, tsk_cache_hot);
 		list_add(&p->se.group_node, &env->tasks);
 
 		detached++;
@@ -8766,6 +8776,9 @@ static int detach_tasks(struct lb_env *env)
 
 		continue;
 next:
+		if (tsk_cache_hot == 1)
+			schedstat_inc(p->stats.nr_failed_migrations_hot);
+
 		list_move(&p->se.group_node, tasks);
 	}
 
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ