[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250511085254.61446-3-feng.tang@linux.alibaba.com>
Date: Sun, 11 May 2025 16:52:53 +0800
From: Feng Tang <feng.tang@...ux.alibaba.com>
To: Andrew Morton <akpm@...ux-foundation.org>,
Petr Mladek <pmladek@...e.com>,
Steven Rostedt <rostedt@...dmis.org>,
Lance Yang <lance.yang@...ux.dev>,
linux-kernel@...r.kernel.org
Cc: mhiramat@...nel.org,
llong@...hat.com,
Feng Tang <feng.tang@...ux.alibaba.com>
Subject: [PATCH v1 2/3] kernel/hung_task: add option to dump system info when hung task detected
Kernel panic code utilizes sys_show_info() to dump needed system
information to help debugging. Similarly, add this debug option for
task hung case, and 'hungtask_print_mask' is the control knob and a
bitmask to control what information should be printed out:
bit 0: print all tasks info
bit 1: print system memory info
bit 2: print timer info
bit 3: print locks info if CONFIG_LOCKDEP is on
bit 4: print ftrace buffer
bit 5: print all printk messages in buffer
bit 6: print all CPUs backtrace (if available in the arch)
bit 7: print only tasks in uninterruptible (blocked) state
Also simplify the code about dumping locks and triggering backtrace
for all CPUs by leveraging sys_show_info().
Signed-off-by: Feng Tang <feng.tang@...ux.alibaba.com>
---
.../admin-guide/kernel-parameters.txt | 5 +++
kernel/hung_task.c | 42 +++++++++++++------
2 files changed, 34 insertions(+), 13 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d9fd26b95b34..d35d8101bee9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4488,6 +4488,11 @@
Use this option carefully, maybe worth to setup a
bigger log buffer with "log_buf_len" along with this.
+ hungtask_print_mask=
+ Bitmask for printing system info when hung task is detected.
+ Details of bits definition is the same as panic_print's
+ definition above.
+
parkbd.port= [HW] Parallel port number the keyboard adapter is
connected to, default is 0.
Format: <parport#>
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index dc898ec93463..3907e3c6fefa 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -58,12 +58,29 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
static int __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
-static bool hung_task_show_lock;
static bool hung_task_call_panic;
-static bool hung_task_show_all_bt;
static struct task_struct *watchdog_task;
+/*
+ * A bitmask to control what kinds of system info to be printed when a
+ * hung task is detected, it could be task, memory, lock etc. And the bit
+ * definition (from panic.h) is:
+ *
+ * #define SYS_PRINT_TASK_INFO 0x00000001
+ * #define SYS_PRINT_MEM_INFO 0x00000002
+ * #define SYS_PRINT_TIMER_INFO 0x00000004
+ * #define SYS_PRINT_LOCK_INFO 0x00000008
+ * #define SYS_PRINT_FTRACE_INFO 0x00000010
+ * #define SYS_PRINT_ALL_PRINTK_MSG 0x00000020
+ * #define SYS_PRINT_ALL_CPU_BT 0x00000040
+ * #define SYS_PRINT_BLOCKED_TASKS 0x00000080
+ */
+unsigned long hungtask_print_mask;
+core_param(hungtask_print_mask, hungtask_print_mask, ulong, 0644);
+
+static unsigned long cur_print_mask;
+
#ifdef CONFIG_SMP
/*
* Should we dump all CPUs backtraces in a hung task event?
@@ -163,11 +180,16 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
*/
sysctl_hung_task_detect_count++;
+ cur_print_mask = hungtask_print_mask;
+
+ if (!sysctl_hung_task_all_cpu_backtrace)
+ cur_print_mask &= ~SYS_PRINT_ALL_CPU_BT;
+
trace_sched_process_hang(t);
if (sysctl_hung_task_panic) {
console_verbose();
- hung_task_show_lock = true;
+ cur_print_mask |= SYS_PRINT_LOCK_INFO;
hung_task_call_panic = true;
}
@@ -190,10 +212,10 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
" disables this message.\n");
sched_show_task(t);
debug_show_blocker(t);
- hung_task_show_lock = true;
+ cur_print_mask |= SYS_PRINT_LOCK_INFO;
if (sysctl_hung_task_all_cpu_backtrace)
- hung_task_show_all_bt = true;
+ cur_print_mask |= SYS_PRINT_ALL_CPU_BT;
if (!sysctl_hung_task_warnings)
pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
}
@@ -242,7 +264,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;
- hung_task_show_lock = false;
+ cur_print_mask = 0;
rcu_read_lock();
for_each_process_thread(g, t) {
unsigned int state;
@@ -266,14 +288,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
}
unlock:
rcu_read_unlock();
- if (hung_task_show_lock)
- debug_show_all_locks();
-
- if (hung_task_show_all_bt) {
- hung_task_show_all_bt = false;
- trigger_all_cpu_backtrace();
- }
+ sys_show_info(cur_print_mask);
if (hung_task_call_panic)
panic("hung_task: blocked tasks");
}
--
2.39.5 (Apple Git-154)
Powered by blists - more mailing lists