lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260108023554.1735052-1-suzhidao@xiaomi.com>
Date: Thu,  8 Jan 2026 10:35:54 +0800
From: zhidao su <soolaugust@...il.com>
To: tj@...nel.org,
	void@...ifault.com,
	arighi@...dia.com,
	changwoo@...lia.com
Cc: sched-ext@...ts.linux.dev,
	linux-kernel@...r.kernel.org,
	zhidao su <suzhidao@...omi.com>
Subject: [PATCH] sched_ext: Add DSQ statistics collection functionality

This patch adds statistics collection to the Dispatch Queue (DSQ) system in
the sched_ext scheduler. Statistics tracked:

1. Total enqueue count (number of tasks enqueued to a DSQ)
2. Total dequeue count (number of tasks dequeued from a DSQ)
3. Peak queue length (maximum number of tasks simultaneously queued)

New BPF interface functions:
- scx_bpf_dsq_enqueue_count(dsq_id): Get total enqueue count
- scx_bpf_dsq_dequeue_count(dsq_id): Get total dequeue count
- scx_bpf_dsq_peak_nr(dsq_id): Get peak queue length

Implementation uses atomic operations for concurrency safety and follows
existing kernel patterns for statistics collection. Compatibility macros
are provided in compat.bpf.h for backward compatibility.

A comprehensive test case (dsq_stats) is included to verify functionality.

Signed-off-by: zhidao su <suzhidao@...omi.com>
---
 include/linux/sched/ext.h                     |   6 +
 kernel/sched/ext.c                            | 156 +++++++++++++++++-
 tools/sched_ext/include/scx/compat.bpf.h      |  17 ++
 tools/testing/selftests/sched_ext/Makefile    |   1 +
 .../selftests/sched_ext/dsq_stats.bpf.c       | 101 ++++++++++++
 tools/testing/selftests/sched_ext/dsq_stats.c | 147 +++++++++++++++++
 6 files changed, 427 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/sched_ext/dsq_stats.bpf.c
 create mode 100644 tools/testing/selftests/sched_ext/dsq_stats.c

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index bcb962d5ee7d..b79bedc26388 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -13,6 +13,7 @@
 
 #include <linux/llist.h>
 #include <linux/rhashtable-types.h>
+#include <linux/atomic.h>
 
 enum scx_public_consts {
 	SCX_OPS_NAME_LEN	= 128,
@@ -79,6 +80,11 @@ struct scx_dispatch_q {
 	struct rhash_head	hash_node;
 	struct llist_node	free_node;
 	struct rcu_head		rcu;
+
+	/* Statistics fields */
+	atomic64_t		enqueue_count;
+	atomic64_t		dequeue_count;
+	atomic_t		peak_nr;
 };
 
 /* scx_entity.flags */
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 136b01950a62..a025d68acdef 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -972,8 +972,20 @@ static bool scx_dsq_priq_less(struct rb_node *node_a,
 
 static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta)
 {
+	u32 new_nr;
+
 	/* scx_bpf_dsq_nr_queued() reads ->nr without locking, use WRITE_ONCE() */
-	WRITE_ONCE(dsq->nr, dsq->nr + delta);
+	new_nr = dsq->nr + delta;
+	WRITE_ONCE(dsq->nr, new_nr);
+
+	/* Update peak queue length */
+	if (delta > 0) {
+
+		u32 peak = atomic_read(&dsq->peak_nr);
+
+		if (new_nr > peak)
+			atomic_set(&dsq->peak_nr, new_nr);
+	}
 }
 
 static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p)
@@ -1099,6 +1111,7 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
 	p->scx.dsq_seq = dsq->seq;
 
 	dsq_mod_nr(dsq, 1);
+	atomic64_inc(&dsq->enqueue_count);  /* Increment enqueue count */
 	p->scx.dsq = dsq;
 
 	/*
@@ -1136,6 +1149,7 @@ static void task_unlink_from_dsq(struct task_struct *p,
 
 	list_del_init(&p->scx.dsq_list.node);
 	dsq_mod_nr(dsq, -1);
+	atomic64_inc(&dsq->dequeue_count);  /* Increment dequeue count */
 
 	if (!(dsq->id & SCX_DSQ_FLAG_BUILTIN) && dsq->first_task == p) {
 		struct task_struct *first_task;
@@ -3396,6 +3410,11 @@ static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id)
 	raw_spin_lock_init(&dsq->lock);
 	INIT_LIST_HEAD(&dsq->list);
 	dsq->id = dsq_id;
+
+	/* Initialize statistics */
+	atomic64_set(&dsq->enqueue_count, 0);
+	atomic64_set(&dsq->dequeue_count, 0);
+	atomic_set(&dsq->peak_nr, 0);
 }
 
 static void free_dsq_irq_workfn(struct irq_work *irq_work)
@@ -6476,6 +6495,138 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id)
 	return ret;
 }
 
+/**
+ * scx_bpf_dsq_enqueue_count - Return the total number of enqueued tasks
+ * @dsq_id: id of the DSQ
+ *
+ * Return the total number of tasks that have been enqueued to the DSQ
+ * matching @dsq_id. If not found, -%ENOENT is returned.
+ */
+__bpf_kfunc s64 scx_bpf_dsq_enqueue_count(u64 dsq_id)
+{
+	struct scx_sched *sch;
+	struct scx_dispatch_q *dsq;
+	s64 ret;
+
+	preempt_disable();
+
+	sch = rcu_dereference_sched(scx_root);
+	if (unlikely(!sch)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (dsq_id == SCX_DSQ_LOCAL) {
+		ret = atomic64_read(&this_rq()->scx.local_dsq.enqueue_count);
+		goto out;
+	} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
+		s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
+
+		if (ops_cpu_valid(sch, cpu, NULL)) {
+			ret = atomic64_read(&cpu_rq(cpu)->scx.local_dsq.enqueue_count);
+			goto out;
+		}
+	} else {
+		dsq = find_user_dsq(sch, dsq_id);
+		if (dsq) {
+			ret = atomic64_read(&dsq->enqueue_count);
+			goto out;
+		}
+	}
+	ret = -ENOENT;
+out:
+	preempt_enable();
+	return ret;
+}
+
+/**
+ * scx_bpf_dsq_dequeue_count - Return the total number of dequeued tasks
+ * @dsq_id: id of the DSQ
+ *
+ * Return the total number of tasks that have been dequeued from the DSQ
+ * matching @dsq_id. If not found, -%ENOENT is returned.
+ */
+__bpf_kfunc s64 scx_bpf_dsq_dequeue_count(u64 dsq_id)
+{
+	struct scx_sched *sch;
+	struct scx_dispatch_q *dsq;
+	s64 ret;
+
+	preempt_disable();
+
+	sch = rcu_dereference_sched(scx_root);
+	if (unlikely(!sch)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (dsq_id == SCX_DSQ_LOCAL) {
+		ret = atomic64_read(&this_rq()->scx.local_dsq.dequeue_count);
+		goto out;
+	} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
+		s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
+
+		if (ops_cpu_valid(sch, cpu, NULL)) {
+			ret = atomic64_read(&cpu_rq(cpu)->scx.local_dsq.dequeue_count);
+			goto out;
+		}
+	} else {
+		dsq = find_user_dsq(sch, dsq_id);
+		if (dsq) {
+			ret = atomic64_read(&dsq->dequeue_count);
+			goto out;
+		}
+	}
+	ret = -ENOENT;
+out:
+	preempt_enable();
+	return ret;
+}
+
+/**
+ * scx_bpf_dsq_peak_nr - Return the peak number of queued tasks
+ * @dsq_id: id of the DSQ
+ *
+ * Return the peak number of tasks that have been simultaneously queued in
+ * the DSQ matching @dsq_id. If not found, -%ENOENT is returned.
+ */
+__bpf_kfunc s32 scx_bpf_dsq_peak_nr(u64 dsq_id)
+{
+	struct scx_sched *sch;
+	struct scx_dispatch_q *dsq;
+	s32 ret;
+
+	preempt_disable();
+
+	sch = rcu_dereference_sched(scx_root);
+	if (unlikely(!sch)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (dsq_id == SCX_DSQ_LOCAL) {
+		ret = atomic_read(&this_rq()->scx.local_dsq.peak_nr);
+		goto out;
+	} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
+		s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
+
+		if (ops_cpu_valid(sch, cpu, NULL)) {
+			ret = atomic_read(&cpu_rq(cpu)->scx.local_dsq.peak_nr);
+			goto out;
+		}
+	} else {
+		dsq = find_user_dsq(sch, dsq_id);
+		if (dsq) {
+			ret = atomic_read(&dsq->peak_nr);
+			goto out;
+		}
+	}
+	ret = -ENOENT;
+out:
+	preempt_enable();
+	return ret;
+}
+
 /**
  * scx_bpf_destroy_dsq - Destroy a custom DSQ
  * @dsq_id: DSQ to destroy
@@ -7221,6 +7372,9 @@ BTF_ID_FLAGS(func, scx_bpf_task_set_slice, KF_RCU);
 BTF_ID_FLAGS(func, scx_bpf_task_set_dsq_vtime, KF_RCU);
 BTF_ID_FLAGS(func, scx_bpf_kick_cpu)
 BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued)
+BTF_ID_FLAGS(func, scx_bpf_dsq_enqueue_count)
+BTF_ID_FLAGS(func, scx_bpf_dsq_dequeue_count)
+BTF_ID_FLAGS(func, scx_bpf_dsq_peak_nr)
 BTF_ID_FLAGS(func, scx_bpf_destroy_dsq)
 BTF_ID_FLAGS(func, scx_bpf_dsq_peek, KF_RCU_PROTECTED | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_scx_dsq_new, KF_ITER_NEW | KF_RCU_PROTECTED)
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index f2969c3061a7..8222ef8055df 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -375,6 +375,23 @@ static inline void scx_bpf_reenqueue_local(void)
 		scx_bpf_reenqueue_local___v1();
 }
 
+/*
+ * v6.20: Add DSQ statistics functions.
+ *
+ * Preserve the following macros until v6.22.
+ */
+#define __COMPAT_scx_bpf_dsq_enqueue_count(dsq_id)		\
+	(bpf_ksym_exists(scx_bpf_dsq_enqueue_count) ?		\
+	 scx_bpf_dsq_enqueue_count(dsq_id) : -ENOENT)
+
+#define __COMPAT_scx_bpf_dsq_dequeue_count(dsq_id)		\
+	(bpf_ksym_exists(scx_bpf_dsq_dequeue_count) ?		\
+	 scx_bpf_dsq_dequeue_count(dsq_id) : -ENOENT)
+
+#define __COMPAT_scx_bpf_dsq_peak_nr(dsq_id)			\
+	(bpf_ksym_exists(scx_bpf_dsq_peak_nr) ?			\
+	 scx_bpf_dsq_peak_nr(dsq_id) : -ENOENT)
+
 /*
  * Define sched_ext_ops. This may be expanded to define multiple variants for
  * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 5fe45f9c5f8f..546534159d74 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -161,6 +161,7 @@ all_test_bpfprogs := $(foreach prog,$(wildcard *.bpf.c),$(INCLUDE_DIR)/$(patsubs
 
 auto-test-targets :=			\
 	create_dsq			\
+	dsq_stats			\
 	enq_last_no_enq_fails		\
 	ddsp_bogus_dsq_fail		\
 	ddsp_vtimelocal_fail		\
diff --git a/tools/testing/selftests/sched_ext/dsq_stats.bpf.c b/tools/testing/selftests/sched_ext/dsq_stats.bpf.c
new file mode 100644
index 000000000000..bd3f8ac78456
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/dsq_stats.bpf.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A BPF program for testing DSQ statistics functionality.
+ *
+ * Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
+ */
+
+#include <scx/common.bpf.h>
+#include <scx/compat.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei); /* Error handling */
+
+#define TEST_DSQ_ID 1234
+
+/* Test variables to store results */
+s64 test_enqueue_count = -1;
+s64 test_dequeue_count = -1;
+s32 test_peak_nr = -1;
+s32 current_nr = -1;
+
+/* Test completion flag */
+int test_completed;
+
+/* Test task */
+int test_pid = -1;
+
+static void test_dsq_stats(void)
+{
+	s64 enqueue_count, dequeue_count;
+	s32 peak_nr, nr;
+
+	/* Test the new statistics functions */
+	enqueue_count = __COMPAT_scx_bpf_dsq_enqueue_count(TEST_DSQ_ID);
+	dequeue_count = __COMPAT_scx_bpf_dsq_dequeue_count(TEST_DSQ_ID);
+	peak_nr = __COMPAT_scx_bpf_dsq_peak_nr(TEST_DSQ_ID);
+	nr = scx_bpf_dsq_nr_queued(TEST_DSQ_ID);
+
+	/* Store results */
+	test_enqueue_count = enqueue_count;
+	test_dequeue_count = dequeue_count;
+	test_peak_nr = peak_nr;
+	current_nr = nr;
+
+	test_completed = 1;
+}
+
+void BPF_STRUCT_OPS(dsq_stats_enqueue, struct task_struct *p, u64 enq_flags)
+{
+	/* Create DSQ on first task */
+	if (test_pid == -1) {
+		test_pid = p->pid;
+
+		/* Create test DSQ */
+		if (scx_bpf_create_dsq(TEST_DSQ_ID, -1)) {
+			bpf_printk("Failed to create test DSQ\n");
+			return;
+		}
+
+		/* Insert task into DSQ to test statistics */
+		scx_bpf_dsq_insert(p, TEST_DSQ_ID, 0, enq_flags);
+	}
+}
+
+void BPF_STRUCT_OPS(dsq_stats_dispatch, s32 cpu, struct task_struct *prev)
+{
+	/* Run test if not completed */
+	if (!test_completed && test_pid != -1) {
+		test_dsq_stats();
+
+		/* Consume the task to complete the test */
+		scx_bpf_dsq_move_to_local(TEST_DSQ_ID);
+	}
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(dsq_stats_init)
+{
+	/* Initialize test variables */
+	test_pid = -1;
+	test_completed = 0;
+
+	return 0;
+}
+
+void BPF_STRUCT_OPS(dsq_stats_exit, struct scx_exit_info *ei)
+{
+	/* Destroy test DSQ */
+	scx_bpf_destroy_dsq(TEST_DSQ_ID);
+
+	UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops dsq_stats_ops = {
+	.enqueue = (void *)dsq_stats_enqueue,
+	.dispatch = (void *)dsq_stats_dispatch,
+	.init = (void *)dsq_stats_init,
+	.exit = (void *)dsq_stats_exit,
+	.name = "dsq_stats",
+};
diff --git a/tools/testing/selftests/sched_ext/dsq_stats.c b/tools/testing/selftests/sched_ext/dsq_stats.c
new file mode 100644
index 000000000000..b54912a0d2ca
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/dsq_stats.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for DSQ statistics functionality.
+ *
+ * Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+#include <sched.h>
+#include "dsq_stats.bpf.skel.h"
+#include "scx_test.h"
+
+#define NUM_WORKERS 2
+
+static bool workload_running = true;
+static pthread_t workload_threads[NUM_WORKERS];
+
+/**
+ * Background workload thread that exercises the scheduler to trigger
+ * DSQ operations and statistics collection.
+ */
+static void *workload_thread_fn(void *arg)
+{
+	while (workload_running) {
+		/* Sleep for a very short time to trigger scheduler activity */
+		usleep(1000); /* 1ms sleep */
+		/* Yield to ensure we go through the scheduler */
+		sched_yield();
+	}
+	return NULL;
+}
+
+static enum scx_test_status setup(void **ctx)
+{
+	struct dsq_stats *skel;
+	int i, ret;
+
+	skel = dsq_stats__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(dsq_stats__load(skel), "Failed to load skel");
+
+	/* Start background workload threads */
+	for (i = 0; i < NUM_WORKERS; i++) {
+		ret = pthread_create(&workload_threads[i], NULL, workload_thread_fn, NULL);
+		SCX_FAIL_IF(ret, "Failed to create workload thread %d", i);
+	}
+
+	*ctx = skel;
+
+	return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+	struct dsq_stats *skel = ctx;
+	struct bpf_link *link;
+	int duration = 2; /* Run test for 2 seconds */
+
+	link = bpf_map__attach_struct_ops(skel->maps.dsq_stats_ops);
+	SCX_FAIL_IF(!link, "Failed to attach scheduler");
+
+	/* Let the scheduler run for a while to collect statistics */
+	sleep(duration);
+
+	workload_running = false;
+
+	bpf_link__destroy(link);
+
+	return SCX_TEST_PASS;
+}
+
+static enum scx_test_status check_results(void *ctx)
+{
+	struct dsq_stats *skel = ctx;
+
+	/* Wait for test to complete */
+	int timeout = 50; /* 5 seconds timeout */
+
+	while (!skel->bss->test_completed && timeout > 0) {
+		usleep(100000); /* 100ms */
+		timeout--;
+	}
+
+	SCX_FAIL_IF(timeout <= 0, "Test timed out waiting for completion");
+
+	/* Check that statistics were collected */
+	SCX_FAIL_IF(skel->bss->test_enqueue_count < 0,
+		   "Enqueue count not collected: %lld",
+		   (long long)skel->bss->test_enqueue_count);
+
+	SCX_FAIL_IF(skel->bss->test_dequeue_count < 0,
+		   "Dequeue count not collected: %lld",
+		   (long long)skel->bss->test_dequeue_count);
+
+	SCX_FAIL_IF(skel->bss->test_peak_nr < 0,
+		   "Peak NR not collected: %d",
+		   skel->bss->test_peak_nr);
+
+	/* Basic sanity checks */
+	SCX_FAIL_IF(skel->bss->test_enqueue_count != skel->bss->test_dequeue_count,
+		   "Enqueue count (%lld) != Dequeue count (%lld)",
+		   (long long)skel->bss->test_enqueue_count,
+		   (long long)skel->bss->test_dequeue_count);
+
+	SCX_FAIL_IF(skel->bss->test_peak_nr < skel->bss->current_nr,
+		   "Peak NR (%d) < Current NR (%d)",
+		   skel->bss->test_peak_nr, skel->bss->current_nr);
+
+	bpf_printk("DSQ Stats Test Results:\n");
+	bpf_printk("  Enqueue Count: %lld\n", (long long)skel->bss->test_enqueue_count);
+	bpf_printk("  Dequeue Count: %lld\n", (long long)skel->bss->test_dequeue_count);
+	bpf_printk("  Peak NR: %d\n", skel->bss->test_peak_nr);
+	bpf_printk("  Current NR: %d\n", skel->bss->current_nr);
+
+	return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+	struct dsq_stats *skel = ctx;
+	int i;
+
+	/* Stop workload threads */
+	workload_running = false;
+	for (i = 0; i < NUM_WORKERS; i++) {
+		if (workload_threads[i])
+			pthread_join(workload_threads[i], NULL);
+	}
+
+	dsq_stats__destroy(skel);
+}
+
+struct scx_test dsq_stats = {
+	.name = "dsq_stats",
+	.description = "Test DSQ statistics functionality",
+	.setup = setup,
+	.run = run,
+	.check_results = check_results,
+	.cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&dsq_stats)
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ