lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <bd9977efff8cc3e002c4b2db02f611167905a99f.1686239016.git.bristot@kernel.org>
Date:   Thu,  8 Jun 2023 17:58:18 +0200
From:   Daniel Bristot de Oliveira <bristot@...nel.org>
To:     Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>
Cc:     Dietmar Eggemann <dietmar.eggemann@....com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        Valentin Schneider <vschneid@...hat.com>,
        linux-kernel@...r.kernel.org,
        Luca Abeni <luca.abeni@...tannapisa.it>,
        Tommaso Cucinotta <tommaso.cucinotta@...tannapisa.it>,
        Thomas Gleixner <tglx@...utronix.de>,
        Joel Fernandes <joel@...lfernandes.org>,
        Vineeth Pillai <vineeth@...byteword.org>,
        Shuah Khan <skhan@...uxfoundation.org>,
        Daniel Bristot de Oliveira <bristot@...nel.org>
Subject: [RFC PATCH V3 6/6] sched/fair: Implement starvation monitor

From: Juri Lelli <juri.lelli@...hat.com>

Starting deadline server for lower priority classes right away when
first task is enqueued might break guarantees, as tasks belonging to
intermediate priority classes could be uselessly preempted. E.g., a well
behaving (non hog) FIFO task can be preempted by NORMAL tasks even if
there are still CPU cycles available for NORMAL tasks to run, as they'll
be running inside the fair deadline server for some period of time.

To prevent this issue, implement a starvation monitor mechanism that
starts the deadline server only if a (fair in this case) task hasn't
been scheduled for some interval of time after it has been enqueued.
Use pick/put functions to manage starvation monitor status.

Signed-off-by: Juri Lelli <juri.lelli@...hat.com>
Signed-off-by: Daniel Bristot de Oliveira <bristot@...nel.org>
---
 kernel/sched/fair.c  | 57 ++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/sched.h |  4 ++++
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f493f05c1f84..75eadd85e2b3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6315,6 +6315,53 @@ static int sched_idle_cpu(int cpu)
 }
 #endif
 
+
+static void fair_server_watchdog(struct timer_list *list)
+{
+	struct rq *rq = container_of(list, struct rq, fair_server_wd);
+	struct rq_flags rf;
+
+	rq_lock_irqsave(rq, &rf);
+	rq->fair_server_wd_running = 0;
+
+	if (!rq->cfs.h_nr_running)
+		goto out;
+
+	update_rq_clock(rq);
+	dl_server_start(&rq->fair_server);
+	rq->fair_server_active = 1;
+	resched_curr(rq);
+
+out:
+	rq_unlock_irqrestore(rq, &rf);
+}
+
+static inline void fair_server_watchdog_start(struct rq *rq)
+{
+	if (rq->fair_server_wd_running || rq->fair_server_active)
+		return;
+
+	timer_setup(&rq->fair_server_wd, fair_server_watchdog, 0);
+	rq->fair_server_wd.expires = jiffies + FAIR_SERVER_WATCHDOG_INTERVAL;
+	add_timer_on(&rq->fair_server_wd, cpu_of(rq));
+	rq->fair_server_active = 0;
+	rq->fair_server_wd_running = 1;
+}
+
+static inline void fair_server_watchdog_stop(struct rq *rq, bool stop_server)
+{
+	if (!rq->fair_server_wd_running && !stop_server)
+		return;
+
+	del_timer(&rq->fair_server_wd);
+	rq->fair_server_wd_running = 0;
+
+	if (stop_server && rq->fair_server_active) {
+		dl_server_stop(&rq->fair_server);
+		rq->fair_server_active = 0;
+	}
+}
+
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
@@ -6337,7 +6384,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	util_est_enqueue(&rq->cfs, p);
 
 	if (!rq->cfs.h_nr_running)
-		dl_server_start(&rq->fair_server);
+		fair_server_watchdog_start(rq);
 
 	/*
 	 * If in_iowait is set, the code below may not trigger any cpufreq
@@ -6484,7 +6531,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 dequeue_throttle:
 	if (!rq->cfs.h_nr_running)
-		dl_server_stop(&rq->fair_server);
+		fair_server_watchdog_stop(rq, true);
 
 	util_est_update(&rq->cfs, p, task_sleep);
 	hrtick_update(rq);
@@ -8193,6 +8240,7 @@ done: __maybe_unused;
 		hrtick_start_fair(rq, p);
 
 	update_misfit_status(p, rq);
+	fair_server_watchdog_stop(rq, false);
 
 	return p;
 
@@ -8248,6 +8296,8 @@ void fair_server_init(struct rq *rq)
 	dl_se->dl_period = 20 * TICK_NSEC;
 
 	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
+
+	rq->fair_server_wd_running = 0;
 }
 
 /*
@@ -8262,6 +8312,9 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
 		cfs_rq = cfs_rq_of(se);
 		put_prev_entity(cfs_rq, se);
 	}
+
+	if (rq->cfs.h_nr_running)
+		fair_server_watchdog_start(rq);
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d4a7c0823c53..cab5d2b1e71f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -353,6 +353,7 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
 		    dl_server_has_tasks_f has_tasks,
 		    dl_server_pick_f pick);
 
+#define FAIR_SERVER_WATCHDOG_INTERVAL (HZ >> 1)
 extern void fair_server_init(struct rq *);
 
 #ifdef CONFIG_CGROUP_SCHED
@@ -1018,6 +1019,9 @@ struct rq {
 	struct dl_rq		dl;
 
 	struct sched_dl_entity	fair_server;
+	int			fair_server_active;
+	struct timer_list	fair_server_wd;
+	int			fair_server_wd_running;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this CPU: */
-- 
2.40.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ