[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260125135848.3356585-2-atomlin@atomlin.com>
Date: Sun, 25 Jan 2026 08:58:47 -0500
From: Aaron Tomlin <atomlin@...mlin.com>
To: akpm@...ux-foundation.org,
lance.yang@...ux.dev,
mhiramat@...nel.org,
gregkh@...uxfoundation.org,
pmladek@...e.com,
joel.granados@...nel.org
Cc: neelx@...e.com,
sean@...e.io,
mproche@...il.com,
chjohnst@...il.com,
nick.lange@...il.com,
linux-kernel@...r.kernel.org
Subject: [v7 PATCH 1/2] hung_task: Refactor detection logic and atomicise detection count
The check_hung_task() function currently conflates two distinct
responsibilities: validating whether a task is hung and handling the
subsequent reporting (printing warnings, triggering panics, or
tracepoints).
This patch refactors the logic by introducing hung_task_info(), a
function dedicated solely to reporting. The actual detection check,
task_is_hung(), is hoisted into the primary loop within
check_hung_uninterruptible_tasks(). This separation clearly decouples
the mechanism of detection from the policy of reporting.
Furthermore, to facilitate future support for concurrent hung task
detection, the global sysctl_hung_task_detect_count variable is
converted from unsigned long to atomic_long_t. Consequently, the
counting logic is updated to accumulate the number of hung tasks locally
(this_round_count) during the iteration. The global counter is then
updated atomically via atomic_long_cmpxchg_relaxed() once the loop
concludes, rather than incrementally during the scan.
These changes are strictly preparatory and introduce no functional
change to the system's runtime behaviour.
Signed-off-by: Aaron Tomlin <atomlin@...mlin.com>
---
kernel/hung_task.c | 58 ++++++++++++++++++++++++++--------------------
1 file changed, 33 insertions(+), 25 deletions(-)
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index d2254c91450b..df10830ed9ef 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -36,7 +36,7 @@ static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
/*
* Total number of tasks detected as hung since boot:
*/
-static unsigned long __read_mostly sysctl_hung_task_detect_count;
+static atomic_long_t sysctl_hung_task_detect_count = ATOMIC_LONG_INIT(0);
/*
* Limit number of tasks checked in a batch.
@@ -223,31 +223,29 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
}
#endif
-static void check_hung_task(struct task_struct *t, unsigned long timeout,
- unsigned long prev_detect_count)
+/**
+ * hung_task_info - Print diagnostic details for a hung task
+ * @t: Pointer to the detected hung task.
+ * @timeout: Timeout threshold for detecting hung tasks
+ * @this_round_count: Count of hung tasks detected in the current iteration
+ *
+ * Print structured information about the specified hung task, if warnings
+ * are enabled or if the panic batch threshold is exceeded.
+ */
+static void hung_task_info(struct task_struct *t, unsigned long timeout,
+ unsigned long this_round_count)
{
- unsigned long total_hung_task;
-
- if (!task_is_hung(t, timeout))
- return;
-
- /*
- * This counter tracks the total number of tasks detected as hung
- * since boot.
- */
- sysctl_hung_task_detect_count++;
-
- total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
trace_sched_process_hang(t);
- if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
+ if (sysctl_hung_task_panic && this_round_count >= sysctl_hung_task_panic) {
console_verbose();
hung_task_call_panic = true;
}
/*
- * Ok, the task did not get scheduled for more than 2 minutes,
- * complain:
+ * The given task did not get scheduled for more than
+ * CONFIG_DEFAULT_HUNG_TASK_TIMEOUT. Therefore, complain
+ * accordingly
*/
if (sysctl_hung_task_warnings || hung_task_call_panic) {
if (sysctl_hung_task_warnings > 0)
@@ -297,18 +295,18 @@ static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
/*
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
- * a really long time (120 seconds). If that happens, print out
- * a warning.
+ * a really long time. If that happens, print out a warning.
*/
static void check_hung_uninterruptible_tasks(unsigned long timeout)
{
int max_count = sysctl_hung_task_check_count;
unsigned long last_break = jiffies;
struct task_struct *g, *t;
- unsigned long prev_detect_count = sysctl_hung_task_detect_count;
+ unsigned long total_count, this_round_count;
int need_warning = sysctl_hung_task_warnings;
unsigned long si_mask = hung_task_si_mask;
+ total_count = atomic_long_read(&sysctl_hung_task_detect_count);
/*
* If the system crashed already then all bets are off,
* do not report extra hung tasks:
@@ -316,10 +314,9 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;
-
+ this_round_count = 0;
rcu_read_lock();
for_each_process_thread(g, t) {
-
if (!max_count--)
goto unlock;
if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
@@ -328,14 +325,25 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
last_break = jiffies;
}
- check_hung_task(t, timeout, prev_detect_count);
+ if (task_is_hung(t, timeout)) {
+ this_round_count++;
+ hung_task_info(t, timeout, this_round_count);
+ }
}
unlock:
rcu_read_unlock();
- if (!(sysctl_hung_task_detect_count - prev_detect_count))
+ if (!this_round_count)
return;
+ /*
+ * This counter tracks the total number of tasks detected as hung
+ * since boot.
+ */
+ atomic_long_cmpxchg_relaxed(&sysctl_hung_task_detect_count,
+ total_count, total_count +
+ this_round_count);
+
if (need_warning || hung_task_call_panic) {
si_mask |= SYS_INFO_LOCKS;
--
2.51.0
Powered by blists - more mailing lists