linux-kernel - Re: [PATCH v2 5/5] rv: Add rts monitor

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <957660bfb5c291b5bece9e557f30866728b9aed0.camel@redhat.com>
Date: Thu, 07 Aug 2025 15:33:35 +0200
From: Gabriele Monaco <gmonaco@...hat.com>
To: Nam Cao <namcao@...utronix.de>
Cc: Steven Rostedt <rostedt@...dmis.org>, Masami Hiramatsu
 <mhiramat@...nel.org>,  Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
 linux-trace-kernel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 5/5] rv: Add rts monitor

On Wed, 2025-08-06 at 10:01 +0200, Nam Cao wrote:
> Add "real-time scheduling" monitor, which validates that SCHED_RR and
> SCHED_FIFO tasks are scheduled before tasks with normal and
> extensible scheduling policies
> 
> Signed-off-by: Nam Cao <namcao@...utronix.de>
> ---
> v2:
>   - use the new tracepoints
>   - move to be under the rtapp container monitor
>   - re-generate with the modified scripts
>   - fixup incorrect enqueued status
> ---
>  Documentation/trace/rv/monitor_sched.rst |  19 +++
>  tools/verification/models/sched/rts.ltl  |   5 +

You moved it under rtapp, you probably want to move the LTL model and
the descriptions there too.

Thanks,
Gabriele

> 
> diff --git a/Documentation/trace/rv/monitor_sched.rst
> b/Documentation/trace/rv/monitor_sched.rst
> index 3f8381ad9ec7..2f9d62a1af1f 100644
> --- a/Documentation/trace/rv/monitor_sched.rst
> +++ b/Documentation/trace/rv/monitor_sched.rst
> @@ -396,6 +396,25 @@ preemption is always disabled. On non-
> ``PREEMPT_RT`` kernels, the interrupts
>  might invoke a softirq to set ``need_resched`` and wake up a task.
> This is
>  another special case that is currently not supported by the monitor.
>  
> +Monitor rts
> +-----------
> +
> +The real-time scheduling monitor validates that tasks with real-time
> scheduling
> +policies (`SCHED_FIFO` and `SCHED_RR`) are always scheduled before
> tasks with
> +normal and extensible scheduling policies (`SCHED_OTHER`,
> `SCHED_BATCH`,
> +`SCHED_IDLE`, `SCHED_EXT`):
> +
> +.. literalinclude:: ../../../tools/verification/models/sched/rts.ltl
> +
> +Note that this monitor may report errors if real-time throttling or
> fair
> +deadline server is enabled. These mechanisms prevent real-time tasks
> from
> +monopolying the CPU by giving tasks with normal and extensible
> scheduling
> +policies a chance to run. They give system administrators a chance
> to kill a
> +misbehaved real-time task. However, they violate the scheduling
> priorities and
> +may cause latency to well-behaved real-time tasks. Thus, if you see
> errors from
> +this monitor, consider disabling real-time throttling and the fair
> deadline
> +server.
> +
>  References
>  ----------
>  
> diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
> index 7ef89006ed50..e9007ed32aea 100644
> --- a/kernel/trace/rv/Kconfig
> +++ b/kernel/trace/rv/Kconfig
> @@ -67,6 +67,7 @@ source "kernel/trace/rv/monitors/opid/Kconfig"
>  source "kernel/trace/rv/monitors/rtapp/Kconfig"
>  source "kernel/trace/rv/monitors/pagefault/Kconfig"
>  source "kernel/trace/rv/monitors/sleep/Kconfig"
> +source "kernel/trace/rv/monitors/rts/Kconfig"
>  # Add new rtapp monitors here
>  
>  # Add new monitors here
> diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
> index 750e4ad6fa0f..d7bfc7ae6677 100644
> --- a/kernel/trace/rv/Makefile
> +++ b/kernel/trace/rv/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o
>  obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o
>  obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o
>  obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
> +obj-$(CONFIG_RV_MON_RTS) += monitors/rts/rts.o
>  # Add new monitors here
>  obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
>  obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
> diff --git a/kernel/trace/rv/monitors/rts/Kconfig
> b/kernel/trace/rv/monitors/rts/Kconfig
> new file mode 100644
> index 000000000000..5481b371bce1
> --- /dev/null
> +++ b/kernel/trace/rv/monitors/rts/Kconfig
> @@ -0,0 +1,17 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +#
> +config RV_MON_RTS
> +	depends on RV
> +	select RV_LTL_MONITOR
> +	depends on RV_MON_RTAPP
> +	default y
> +	select LTL_MON_EVENTS_CPU
> +	bool "rts monitor"
> +	help
> +	  Add support for RTS (real-time scheduling) monitor which
> validates
> +	  that real-time-priority tasks are scheduled before
> SCHED_OTHER tasks.
> +
> +	  This monitor may report an error if RT throttling or
> deadline server
> +	  is enabled.
> +
> +	  Say Y if you are debugging or testing a real-time system.
> diff --git a/kernel/trace/rv/monitors/rts/rts.c
> b/kernel/trace/rv/monitors/rts/rts.c
> new file mode 100644
> index 000000000000..b4c3d3a4671d
> --- /dev/null
> +++ b/kernel/trace/rv/monitors/rts/rts.c
> @@ -0,0 +1,144 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/ftrace.h>
> +#include <linux/tracepoint.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/rv.h>
> +#include <linux/sched/deadline.h>
> +#include <linux/sched/rt.h>
> +#include <rv/instrumentation.h>
> +
> +#define MODULE_NAME "rts"
> +
> +#include <trace/events/sched.h>
> +#include <rv_trace.h>
> +#include <monitors/rtapp/rtapp.h>
> +
> +#include "rts.h"
> +#include <rv/ltl_monitor.h>
> +
> +static DEFINE_PER_CPU(unsigned int, nr_queued);
> +
> +static void ltl_atoms_fetch(unsigned int cpu, struct ltl_monitor
> *mon)
> +{
> +}
> +
> +static void ltl_atoms_init(unsigned int cpu, struct ltl_monitor
> *mon,
> +			   bool target_creation)
> +{
> +	ltl_atom_set(mon, LTL_SCHED_SWITCH, false);
> +	ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, false);
> +	ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, false);
> +
> +	/*
> +	 * This may not be accurate, there may be enqueued RT tasks.
> But that's
> +	 * okay, the worst we get is a false negative. It will be
> accurate as
> +	 * soon as the CPU no longer has any queued RT task.
> +	 */
> +	ltl_atom_set(mon, LTL_RT_TASK_ENQUEUED, false);
> +}
> +
> +static void handle_enqueue_task(void *data, int cpu, struct
> task_struct *task)
> +{
> +	unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
> +
> +	if (!rt_task(task))
> +		return;
> +
> +	(*queued)++;
> +	ltl_atom_update(cpu, LTL_RT_TASK_ENQUEUED, true);
> +}
> +
> +static void handle_dequeue_task(void *data, int cpu, struct
> task_struct *task)
> +{
> +	unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
> +
> +	if (!rt_task(task))
> +		return;
> +
> +	/*
> +	 * This may not be accurate for a short time after the
> monitor is
> +	 * enabled, because there may be enqueued RT tasks which are
> not counted
> +	 * torward nr_queued. But that's okay, the worst we get is a
> false
> +	 * negative. nr_queued will be accurate as soon as the CPU
> no longer has
> +	 * any queued RT task.
> +	 */
> +	if (*queued)
> +		(*queued)--;
> +	if (!*queued)
> +		ltl_atom_update(cpu, LTL_RT_TASK_ENQUEUED, false);
> +}
> +
> +static void handle_sched_switch(void *data, bool preempt, struct
> task_struct *prev,
> +				struct task_struct *next, unsigned
> int prev_state)
> +{
> +	unsigned int cpu = smp_processor_id();
> +	struct ltl_monitor *mon = ltl_get_monitor(cpu);
> +
> +	ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, rt_task(next));
> +	ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, dl_task(next));
> +	ltl_atom_update(cpu, LTL_SCHED_SWITCH, true);
> +
> +	ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, false);
> +	ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, false);
> +	ltl_atom_update(cpu, LTL_SCHED_SWITCH, false);
> +}
> +
> +static int enable_rts(void)
> +{
> +	unsigned int cpu;
> +	int retval;
> +
> +	retval = ltl_monitor_init();
> +	if (retval)
> +		return retval;
> +
> +	for_each_possible_cpu(cpu) {
> +		unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
> +
> +		*queued = 0;
> +	}
> +
> +	rv_attach_trace_probe("rts", dequeue_task_tp,
> handle_dequeue_task);
> +	rv_attach_trace_probe("rts", enqueue_task_tp,
> handle_enqueue_task);
> +	rv_attach_trace_probe("rts", sched_switch,
> handle_sched_switch);
> +
> +	return 0;
> +}
> +
> +static void disable_rts(void)
> +{
> +	rv_detach_trace_probe("rts", sched_switch,
> handle_sched_switch);
> +	rv_detach_trace_probe("rts", enqueue_task_tp,
> handle_enqueue_task);
> +	rv_detach_trace_probe("rts", dequeue_task_tp,
> handle_dequeue_task);
> +
> +	ltl_monitor_destroy();
> +}
> +
> +/*
> + * This is the monitor register section.
> + */
> +static struct rv_monitor rv_rts = {
> +	.name = "rts",
> +	.description = "Validate that real-time tasks are scheduled
> before lower-priority tasks",
> +	.enable = enable_rts,
> +	.disable = disable_rts,
> +};
> +
> +static int __init register_rts(void)
> +{
> +	return rv_register_monitor(&rv_rts, &rv_rtapp);
> +}
> +
> +static void __exit unregister_rts(void)
> +{
> +	rv_unregister_monitor(&rv_rts);
> +}
> +
> +module_init(register_rts);
> +module_exit(unregister_rts);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Nam Cao <namcao@...utronix.de>");
> +MODULE_DESCRIPTION("rts: Validate that real-time tasks are scheduled
> before lower-priority tasks");
> diff --git a/kernel/trace/rv/monitors/rts/rts.h
> b/kernel/trace/rv/monitors/rts/rts.h
> new file mode 100644
> index 000000000000..5881f30a38ce
> --- /dev/null
> +++ b/kernel/trace/rv/monitors/rts/rts.h
> @@ -0,0 +1,126 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * C implementation of Buchi automaton, automatically generated by
> + * tools/verification/rvgen from the linear temporal logic
> specification.
> + * For further information, see kernel documentation:
> + *   Documentation/trace/rv/linear_temporal_logic.rst
> + */
> +
> +#include <linux/rv.h>
> +
> +#define MONITOR_NAME rts
> +
> +#define LTL_MONITOR_TYPE RV_MON_PER_CPU
> +
> +enum ltl_atom {
> +	LTL_RT_TASK_ENQUEUED,
> +	LTL_SCHED_SWITCH,
> +	LTL_SCHED_SWITCH_DL,
> +	LTL_SCHED_SWITCH_RT,
> +	LTL_NUM_ATOM
> +};
> +static_assert(LTL_NUM_ATOM <= RV_MAX_LTL_ATOM);
> +
> +static const char *ltl_atom_str(enum ltl_atom atom)
> +{
> +	static const char *const names[] = {
> +		"rt_ta_en",
> +		"sc_sw",
> +		"sc_sw_dl",
> +		"sc_sw_rt",
> +	};
> +
> +	return names[atom];
> +}
> +
> +enum ltl_buchi_state {
> +	S0,
> +	S1,
> +	S2,
> +	S3,
> +	S4,
> +	RV_NUM_BA_STATES
> +};
> +static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES);
> +
> +static void ltl_start(unsigned int cpu, struct ltl_monitor *mon)
> +{
> +	bool sched_switch_rt = test_bit(LTL_SCHED_SWITCH_RT, mon-
> >atoms);
> +	bool sched_switch_dl = test_bit(LTL_SCHED_SWITCH_DL, mon-
> >atoms);
> +	bool sched_switch = test_bit(LTL_SCHED_SWITCH, mon->atoms);
> +	bool rt_task_enqueued = test_bit(LTL_RT_TASK_ENQUEUED, mon-
> >atoms);
> +	bool val13 = !rt_task_enqueued;
> +	bool val8 = sched_switch_dl || val13;
> +	bool val9 = sched_switch_rt || val8;
> +	bool val6 = !sched_switch;
> +	bool val1 = !rt_task_enqueued;
> +
> +	if (val1)
> +		__set_bit(S0, mon->states);
> +	if (val6)
> +		__set_bit(S1, mon->states);
> +	if (val9)
> +		__set_bit(S4, mon->states);
> +}
> +
> +static void
> +ltl_possible_next_states(struct ltl_monitor *mon, unsigned int
> state, unsigned long *next)
> +{
> +	bool sched_switch_rt = test_bit(LTL_SCHED_SWITCH_RT, mon-
> >atoms);
> +	bool sched_switch_dl = test_bit(LTL_SCHED_SWITCH_DL, mon-
> >atoms);
> +	bool sched_switch = test_bit(LTL_SCHED_SWITCH, mon->atoms);
> +	bool rt_task_enqueued = test_bit(LTL_RT_TASK_ENQUEUED, mon-
> >atoms);
> +	bool val13 = !rt_task_enqueued;
> +	bool val8 = sched_switch_dl || val13;
> +	bool val9 = sched_switch_rt || val8;
> +	bool val6 = !sched_switch;
> +	bool val1 = !rt_task_enqueued;
> +
> +	switch (state) {
> +	case S0:
> +		if (val1)
> +			__set_bit(S0, next);
> +		if (val6)
> +			__set_bit(S1, next);
> +		if (val9)
> +			__set_bit(S4, next);
> +		break;
> +	case S1:
> +		if (val6)
> +			__set_bit(S1, next);
> +		if (val1 && val6)
> +			__set_bit(S2, next);
> +		if (val1 && val9)
> +			__set_bit(S3, next);
> +		if (val9)
> +			__set_bit(S4, next);
> +		break;
> +	case S2:
> +		if (val6)
> +			__set_bit(S1, next);
> +		if (val1 && val6)
> +			__set_bit(S2, next);
> +		if (val1 && val9)
> +			__set_bit(S3, next);
> +		if (val9)
> +			__set_bit(S4, next);
> +		break;
> +	case S3:
> +		if (val1)
> +			__set_bit(S0, next);
> +		if (val6)
> +			__set_bit(S1, next);
> +		if (val9)
> +			__set_bit(S4, next);
> +		break;
> +	case S4:
> +		if (val1)
> +			__set_bit(S0, next);
> +		if (val6)
> +			__set_bit(S1, next);
> +		if (val9)
> +			__set_bit(S4, next);
> +		break;
> +	}
> +}
> diff --git a/kernel/trace/rv/monitors/rts/rts_trace.h
> b/kernel/trace/rv/monitors/rts/rts_trace.h
> new file mode 100644
> index 000000000000..ac4ea84162f7
> --- /dev/null
> +++ b/kernel/trace/rv/monitors/rts/rts_trace.h
> @@ -0,0 +1,15 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * Snippet to be included in rv_trace.h
> + */
> +
> +#ifdef CONFIG_RV_MON_RTS
> +DEFINE_EVENT(event_ltl_monitor_cpu, event_rts,
> +	TP_PROTO(unsigned int cpu, char *states, char *atoms, char
> *next),
> +	TP_ARGS(cpu, states, atoms, next));
> +
> +DEFINE_EVENT(error_ltl_monitor_cpu, error_rts,
> +	TP_PROTO(unsigned int cpu),
> +	TP_ARGS(cpu));
> +#endif /* CONFIG_RV_MON_RTS */
> diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
> index bf7cca6579ec..7b3a6fb8ca6f 100644
> --- a/kernel/trace/rv/rv_trace.h
> +++ b/kernel/trace/rv/rv_trace.h
> @@ -221,6 +221,7 @@ DECLARE_EVENT_CLASS(error_ltl_monitor_cpu,
>  
>  	TP_printk("cpu%u: violation detected", __entry->cpu)
>  );
> +#include <monitors/rts/rts_trace.h>
>  // Add new monitors based on CONFIG_LTL_MON_EVENTS_CPU here
>  
>  #endif /* CONFIG_LTL_MON_EVENTS_CPU */
> diff --git a/tools/verification/models/sched/rts.ltl
> b/tools/verification/models/sched/rts.ltl
> new file mode 100644
> index 000000000000..90872bca46b1
> --- /dev/null
> +++ b/tools/verification/models/sched/rts.ltl
> @@ -0,0 +1,5 @@
> +RULE = always (RT_TASK_ENQUEUED imply SCHEDULE_RT_NEXT)
> +
> +SCHEDULE_RT_NEXT = (not SCHED_SWITCH) until (SCHED_SWITCH_RT or
> EXCEPTIONS)
> +
> +EXCEPTIONS = SCHED_SWITCH_DL or not RT_TASK_ENQUEUED