lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <000000000000cdb895061c52bf0e@google.com>
Date: Wed, 03 Jul 2024 00:26:31 -0700
From: syzbot <syzbot+c4c6c3dc10cc96bcf723@...kaller.appspotmail.com>
To: linux-kernel@...r.kernel.org, syzkaller-bugs@...glegroups.com
Subject: Re: [syzbot] Test

For archival purposes, forwarding an incoming command email to
linux-kernel@...r.kernel.org, syzkaller-bugs@...glegroups.com.

***

Subject: Test
Author: radoslaw.zielonek@...il.com

#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git fe46a7dd189e25604716c03576d05ac8a5209743

---
 include/linux/sched.h |  7 +++++++
 kernel/sched/core.c   |  1 +
 kernel/sched/rt.c     | 26 +++++++++++++++++++++++---
 kernel/signal.c       |  9 +++++++++
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17cb0761ff65..123bc16ad3d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1121,6 +1121,13 @@ struct task_struct {
 	size_t				sas_ss_size;
 	unsigned int			sas_ss_flags;
 
+	/*
+	 * Number of signals received by an RT task between scheduling ticks.
+	 * This counter is used to throttle RT tasks when too many signals
+	 * (e.g., POSIX timers) are sent to the task, which can cause an RCU stall.
+	 */
+	atomic_t rt_signals_recv_count; /* used outside of the rq lock */
+
 	struct callback_head		*task_works;
 
 #ifdef CONFIG_AUDIT
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d44efa0d0611..9def826bd35f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4779,6 +4779,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 			p->policy = SCHED_NORMAL;
 			p->static_prio = NICE_TO_PRIO(0);
 			p->rt_priority = 0;
+			atomic_set(&p->rt_signals_recv_count, 0);
 		} else if (PRIO_TO_NICE(p->static_prio) < 0)
 			p->static_prio = NICE_TO_PRIO(0);
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3261b067b67e..9b22d67d1746 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -24,6 +24,15 @@ int sysctl_sched_rt_period = 1000000;
  */
 int sysctl_sched_rt_runtime = 950000;
 
+/*
+ * To avoid an RCU stall due to a large number of signals received by RT tasks
+ * (e.g., POSIX timers), the RT task needs to be throttled.
+ * When the number of signals received by an RT task during a scheduling
+ * tick period exceeds the threshold, the RT task will be throttled.
+ * The value of 100 has not been thoroughly tested and may need adjustment.
+ */
+#define RT_RECV_SGINAL_THROTTLE_THRESHOLD 100
+
 #ifdef CONFIG_SYSCTL
 static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
 static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
@@ -951,7 +960,7 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 	return rt_task_of(rt_se)->prio;
 }
 
-static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
+static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq, int rt_signal_recv)
 {
 	u64 runtime = sched_rt_runtime(rt_rq);
 
@@ -966,7 +975,15 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 	if (runtime == RUNTIME_INF)
 		return 0;
 
-	if (rt_rq->rt_time > runtime) {
+	/*
+	 * When a large number of signals are sent to this task (e.g., POSIX timers)
+	 * the delta time deviates significantly from real time due to the overhead
+	 * of handling signals. For RT tasks, this can cause an RCU stall.
+	 * To avoid this, throttle the task when the number of signals received
+	 * exceeds a certain threshold.
+	 */
+	if (rt_rq->rt_time > runtime ||
+		rt_signal_recv >= RT_RECV_SGINAL_THROTTLE_THRESHOLD) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 
 		/*
@@ -1021,7 +1038,9 @@ static void update_curr_rt(struct rq *rq)
 		if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
 			raw_spin_lock(&rt_rq->rt_runtime_lock);
 			rt_rq->rt_time += delta_exec;
-			exceeded = sched_rt_runtime_exceeded(rt_rq);
+			exceeded = sched_rt_runtime_exceeded(
+						rt_rq,
+						atomic_read(&curr->rt_signals_recv_count));
 			if (exceeded)
 				resched_curr(rq);
 			raw_spin_unlock(&rt_rq->rt_runtime_lock);
@@ -1029,6 +1048,7 @@ static void update_curr_rt(struct rq *rq)
 				do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
 		}
 	}
+	atomic_set(&curr->rt_signals_recv_count, 0);
 }
 
 static void
diff --git a/kernel/signal.c b/kernel/signal.c
index bdca529f0f7b..d58e0ba9336c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -629,6 +629,15 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask,
 	bool resched_timer = false;
 	int signr;
 
+	/*
+	 * To prevent an RCU stall due to receiving too many signals by RT tasks,
+	 * count all signals regardless of their type.
+	 * Based on this counter, the RT scheduler will decide whether the task
+	 * should be throttled or not.
+	 */
+	if (tsk->policy == SCHED_FIFO || tsk->policy == SCHED_RR)
+		atomic_inc(&tsk->rt_signals_recv_count);
+
 	/* We only dequeue private signals from ourselves, we don't let
 	 * signalfd steal them
 	 */
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ