[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250422-b4-eevdf-tests-v1-post-v1-3-d005d1c7cf61@gianis.ca>
Date: Wed, 23 Apr 2025 00:11:15 +0000
From: "Dhaval Giani (AMD)" <dhaval@...nis.ca>
Cc: linux-kernel@...r.kernel.org, Dhaval Giani <dhaval.giani@....com>, Gautham Shenoy <gautham.shenoy@....com>, K Prateek Nayak <kprateek.nayak@....com>, "Dhaval Giani (AMD)" <dhaval@...nis.ca>
Subject: [PATCH 3/3] sched/fair: Test that the average lag across the system is zero
Lemma 2 of the EEVDF paper says that the sum of lags of all the
tasks in the system is zero.
The test is slightly different - the sum of lag across a runqueue is
zero.
The linux EEVDF implementation doesn't track a global vruntime.
Instead it tracks the "zero-lag" vruntime. This can be obtained by
calling avg_vruntime(cfs_rq).
Walk through every single CFS runqueue (per CPU as well as per-cgroup),
and add up the vruntimes. The average should be the same as
avg_vruntime.
Signed-off-by: Dhaval Giani (AMD) <dhaval@...nis.ca>
---
include/linux/sched.h | 7 ++
kernel/sched/eevdf-tests.c | 174 +++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 3 +
3 files changed, 184 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac198289349199b9c671240a20fc7826228ad..72788d51912657919adfad7f451983e80be4fa39 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -610,6 +610,13 @@ struct sched_entity {
*/
struct sched_avg avg;
#endif
+#ifdef CONFIG_SCHED_EEVDF_TESTING
+ /*
+ * Add a list element so that we don't recurse
+ * in the EEVDF unit test
+ */
+ struct list_head tg_entry;
+#endif
};
struct sched_rt_entity {
diff --git a/kernel/sched/eevdf-tests.c b/kernel/sched/eevdf-tests.c
index 8532330769bcc93dbf9cd98ebba75c838f62c045..c343e94b9a44ad32d01a0eedcb61c1bbf5fdbaf6 100644
--- a/kernel/sched/eevdf-tests.c
+++ b/kernel/sched/eevdf-tests.c
@@ -24,6 +24,35 @@
bool eevdf_positive_lag_test;
u8 eevdf_positive_lag_count = 10;
+static int test_total_zero_lag(void *);
+static void launch_test_zero_lag(void);
+
+static int eevdf_zero_lag_show(struct seq_file *m, void *v)
+{
+ return 0;
+}
+
+static int eevdf_zero_lag_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, eevdf_zero_lag_show, NULL);
+}
+
+static ssize_t eevdf_zero_lag_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ launch_test_zero_lag();
+ return 1;
+
+}
+
+static const struct file_operations eevdf_zero_lag_fops = {
+ .open = eevdf_zero_lag_open,
+ .write = eevdf_zero_lag_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static struct dentry *debugfs_eevdf_testing;
void debugfs_eevdf_testing_init(struct dentry *debugfs_sched)
{
@@ -33,6 +62,8 @@ void debugfs_eevdf_testing_init(struct dentry *debugfs_sched)
debugfs_eevdf_testing, &eevdf_positive_lag_test);
debugfs_create_u8("eevdf_positive_lag_test_count", 0600,
debugfs_eevdf_testing, &eevdf_positive_lag_count);
+ debugfs_create_file("eevdf_zero_lag_test", 0700, debugfs_eevdf_testing,
+ NULL, &eevdf_zero_lag_fops);
}
@@ -65,4 +96,147 @@ void test_eevdf_positive_lag(struct cfs_rq *cfs, struct sched_entity *se)
}
}
+/*
+ * we do, what we need to do
+ */
+#define __node_2_se(node) \
+ rb_entry((node), struct sched_entity, run_node)
+
+static bool test_eevdf_cfs_rq_zero_lag(struct cfs_rq *cfs, struct list_head *tg_se)
+{
+ u64 cfs_avg_vruntime;
+ u64 calculated_avg_vruntime;
+
+ u64 total_vruntime = 0;
+ u64 nr_tasks = 0;
+
+ struct sched_entity *se;
+ struct rb_node *node;
+ struct rb_root *root;
+
+ cfs_avg_vruntime = avg_vruntime(cfs);
+
+ /*
+ * Walk through the rb tree -> look at the se->vruntime value and add it
+ */
+
+ total_vruntime = 0;
+ nr_tasks = 0;
+
+ root = &cfs->tasks_timeline.rb_root;
+
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ se = __node_2_se(node);
+ WARN_ON_ONCE(__builtin_add_overflow(total_vruntime,
+ se->vruntime, &total_vruntime));
+ /*
+ * if it is a task group, add to a list to look at later
+ */
+ if (!entity_is_task(se))
+ list_add_tail(&se->tg_entry, tg_se);
+ nr_tasks++;
+ }
+
+ if (cfs->curr) {
+ WARN_ON_ONCE(__builtin_add_overflow(total_vruntime,
+ cfs->curr->vruntime, &total_vruntime));
+ nr_tasks++;
+ }
+
+ /* If there are no tasks, there is no lag :-) */
+ if (!nr_tasks)
+ return true;
+
+ calculated_avg_vruntime = total_vruntime / nr_tasks;
+
+ return (calculated_avg_vruntime == cfs_avg_vruntime);
+
+}
+
+/*
+ * Call with rq lock held
+ *
+ * return false on failure
+ */
+static bool test_eevdf_zero_lag(struct cfs_rq *cfs)
+{
+ struct list_head tg_se = LIST_HEAD_INIT(tg_se);
+ struct list_head *se_entry;
+
+ /*
+ * The base CFS runqueue will always have sched entities queued.
+ * Test it, and start populating the tg_se list.
+ *
+ * If it fails, short circuit and return fail.
+ */
+
+ if (!test_eevdf_cfs_rq_zero_lag(cfs, &tg_se))
+ return false;
+
+ /*
+ * We made it here, let's walk through the list. Since it is
+ * setup as a queue, as we continue calling the rq test, it
+ * will add new task_groups to the list. Once drained, if we
+ * haven't failed, we will return true.
+ */
+
+ list_for_each(se_entry, &tg_se) {
+ struct sched_entity *se = list_entry(se_entry, struct sched_entity, tg_entry);
+
+ if (!test_eevdf_cfs_rq_zero_lag(group_cfs_rq(se), &tg_se))
+ return false;
+ }
+
+ /*
+ * WOOT! We succeeded!
+ */
+ return true;
+
+}
+
+/*
+ * The average vruntime of the entire cfs_rq should be equal
+ * to the avg_vruntime(cfs_rq)
+ */
+static int test_total_zero_lag(void *data)
+{
+ int cpu;
+ struct rq *rq;
+ struct cfs_rq *cfs;
+ bool success = false;
+
+ for_each_online_cpu(cpu) {
+
+ rq = cpu_rq(cpu);
+ guard(rq_lock_irq)(rq);
+
+ cfs = &rq->cfs;
+
+ success = test_eevdf_zero_lag(cfs);
+
+ if (!success)
+ break;
+ }
+ if (!success) {
+ trace_printk("FAILED: tracked average vruntime doesn't match calculated average vruntime\n");
+ return -1;
+ }
+ trace_printk("PASS: Tracked average runtime matches calculated average vruntime\n");
+ return 0;
+}
+
+static void launch_test_zero_lag(void)
+{
+ struct task_struct *kt;
+
+ kt = kthread_create(&test_total_zero_lag, NULL, "eevdf-tester-%d",
+ smp_processor_id());
+ if (!kt) {
+ trace_printk("Failed to launch kthread\n");
+ return;
+ }
+
+ wake_up_process(kt);
+}
+
#endif /* CONFIG_SCHED_EEVDF_TESTING */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 924d9d35c2aa937bc0f4ca9565ba774397b90f77..858c4e1b8fac661996d879a8dcab2776db09d1c8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -13473,6 +13473,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
/* guarantee group entities always have weight */
update_load_set(&se->load, NICE_0_LOAD);
se->parent = parent;
+#ifdef CONFIG_SCHED_EEVDF_TESTING
+ INIT_LIST_HEAD(&(se->group_node));
+#endif
}
static DEFINE_MUTEX(shares_mutex);
--
2.49.0
Powered by blists - more mailing lists