lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1620997011-106951-1-git-send-email-zhouchuangao@vivo.com>
Date:   Fri, 14 May 2021 05:56:49 -0700
From:   zhouchuangao <zhouchuangao@...o.com>
To:     Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Kees Cook <keescook@...omium.org>,
        Stephen Rothwell <sfr@...b.auug.org.au>,
        "Guilherme G. Piccoli" <gpiccoli@...onical.com>,
        Michal Hocko <mhocko@...e.com>,
        Tetsuo Handa <penguin-kernel@...ove.sakura.ne.jp>,
        Lukas Bulwahn <lukas.bulwahn@...il.com>,
        zhouchuangao <zhouchuangao@...o.com>,
        Vlastimil Babka <vbabka@...e.cz>, linux-kernel@...r.kernel.org
Subject: [PATCH] kernel/hung_task: Report top CPU consumers

1. If the task did not get scheduled for more than 2 minutes,
report top 3(By default) CPU consumers.

2. By default, the CPU utilization of each process in one minute
is calculated.

3. Add a new member last_cpu_time to task_struct to record the CPU
usage of the process at the beginning of the computation.

Signed-off-by: zhouchuangao <zhouchuangao@...o.com>
---
 include/linux/sched.h |   1 +
 kernel/hung_task.c    | 161 +++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 160 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d5264b..103f98f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -973,6 +973,7 @@ struct task_struct {
 	unsigned long			last_switch_count;
 	unsigned long			last_switch_time;
 	unsigned long			killed_time;
+	u64				last_cpu_time;
 #endif
 	/* Filesystem information: */
 	struct fs_struct		*fs;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index bb2e3e1..fb5f944 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -10,6 +10,7 @@
 #include <linux/cpu.h>
 #include <linux/nmi.h>
 #include <linux/init.h>
+#include <linux/tick.h>
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
@@ -21,7 +22,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/sysctl.h>
-
+#include <linux/sched/cputime.h>
 #include <trace/events/sched.h>
 
 /*
@@ -55,6 +56,16 @@ static bool hung_task_show_lock;
 static bool hung_task_call_panic;
 static bool hung_task_show_all_bt;
 
+static u64	last_cpu_usage;
+static u64	interval_cpu_usage;
+
+#define NUM_CONSUMERS   3
+struct cpu_consumer {
+	char	comm[TASK_COMM_LEN];
+	pid_t	pid;
+	u64	cpu_used;
+};
+
 static struct task_struct *watchdog_task;
 
 #ifdef CONFIG_SMP
@@ -72,6 +83,145 @@ unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
 unsigned int __read_mostly sysctl_hung_task_panic =
 				CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
 
+#ifdef arch_idle_time
+static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 idle;
+
+	idle = kcs->cpustat[CPUTIME_IDLE];
+	if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
+		idle += arch_idle_time(cpu);
+	return idle;
+}
+
+static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 iowait;
+
+	iowait = kcs->cpustat[CPUTIME_IOWAIT];
+	if (cpu_online(cpu) && nr_iowait_cpu(cpu))
+		iowait += arch_idle_time(cpu);
+	return iowait;
+}
+#else
+static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 idle, idle_usecs = -1ULL;
+
+	if (cpu_online(cpu))
+		idle_usecs = get_cpu_idle_time_us(cpu, NULL);
+
+	if (idle_usecs == -1ULL)
+		/* !NO_HZ or cpu offline so we can rely on cpustat.idle */
+		idle = kcs->cpustat[CPUTIME_IDLE];
+	else
+		idle = idle_usecs * NSEC_PER_USEC;
+
+	return idle;
+}
+
+static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 iowait, iowait_usecs = -1ULL;
+
+	if (cpu_online(cpu))
+		iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
+
+	if (iowait_usecs == -1ULL)
+		/* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
+		iowait = kcs->cpustat[CPUTIME_IOWAIT];
+	else
+		iowait = iowait_usecs * NSEC_PER_USEC;
+
+	return iowait;
+}
+#endif
+
+static void show_top_cpu_consumers(bool compute)
+{
+	int i, j;
+	struct task_struct *g, *t;
+	struct signal_struct *sig;
+	u64 cutime, cstime, utime, stime;
+	u64 task_cpu_time, interval_time;
+	struct cpu_consumer tcc[NUM_CONSUMERS];
+
+	memset(tcc, 0, sizeof(struct cpu_consumer) * NUM_CONSUMERS);
+
+	for_each_process_thread(g, t) {
+		sig = t->signal;
+		cutime = sig->cutime;
+		cstime = sig->cstime;
+		task_cputime_adjusted(t, &utime, &stime);
+		task_cpu_time = cutime + cstime + utime + stime;
+
+		if (compute) {
+			interval_time = task_cpu_time - t->last_cpu_time;
+			for (i = 0; i < NUM_CONSUMERS; i++) {
+				if (interval_time > tcc[i].cpu_used) {
+					for (j = NUM_CONSUMERS - 1; j > i; j--) {
+						strcpy(tcc[j].comm, tcc[j-1].comm);
+						tcc[j].pid = tcc[j-1].pid;
+						tcc[j].cpu_used = tcc[j-1].cpu_used;
+					}
+					strcpy(tcc[i].comm, t->comm);
+					tcc[i].pid = t->pid;
+					tcc[i].cpu_used = interval_time;
+					break;
+				}
+			}
+		} else
+			t->last_cpu_time = task_cpu_time;
+	}
+
+	if (compute) {
+		pr_info("hung task report top %d CPU consumers:\n", NUM_CONSUMERS);
+		pr_info("TOP    COMM    PID    [TASK_CPU_TIME/ALL_CPU_TIME]\n");
+		for (i = 0; i < NUM_CONSUMERS; i++)
+			pr_info("Top%d   %s    %d    [%lld/%lld]\n", i,
+				tcc[i].comm,
+				tcc[i].pid,
+				nsec_to_clock_t(tcc[i].cpu_used),
+				nsec_to_clock_t(interval_cpu_usage));
+	}
+}
+
+static void all_cpu_usage(bool compute)
+{
+	int i;
+	u64 user, nice, system, idle, iowait, irq, softirq, steal;
+	u64 guest, guest_nice;
+	u64 current_cpu_usage = 0;
+
+	user = nice = system = idle = iowait = irq = 0;
+	softirq = steal = guest = guest_nice = 0;
+
+	for_each_possible_cpu(i) {
+		struct kernel_cpustat kcpustat;
+		u64 *cpustat = kcpustat.cpustat;
+
+		kcpustat_cpu_fetch(&kcpustat, i);
+
+		user += cpustat[CPUTIME_USER];
+		nice += cpustat[CPUTIME_NICE];
+		system += cpustat[CPUTIME_SYSTEM];
+		idle += get_idle_time(&kcpustat, i);
+		iowait += get_iowait_time(&kcpustat, i);
+		irq += cpustat[CPUTIME_IRQ];
+		softirq += cpustat[CPUTIME_SOFTIRQ];
+		steal += cpustat[CPUTIME_STEAL];
+		guest += cpustat[CPUTIME_GUEST];
+		guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+	}
+	current_cpu_usage = user + nice + system + idle + iowait +
+				irq + softirq + steal + guest + guest_nice;
+
+	if (compute)
+		interval_cpu_usage = current_cpu_usage - last_cpu_usage;
+	else
+		last_cpu_usage = current_cpu_usage;
+}
+
 static int
 hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
 {
@@ -253,8 +403,15 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 		trigger_all_cpu_backtrace();
 	}
 
-	if (hung_task_call_panic)
+	if (hung_task_call_panic) {
+		all_cpu_usage(false);
+		show_top_cpu_consumers(false);
+		msleep(1000);
+		all_cpu_usage(true);
+		show_top_cpu_consumers(true);
+
 		panic("hung_task: blocked tasks");
+	}
 }
 
 static long hung_timeout_jiffies(unsigned long last_checked,
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ