linux-kernel - Re: [RFC] hung task: check specific tasks for long uninterruptible sleep state

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20171109015429.GI22894@wotan.suse.de>
Date:   Thu, 9 Nov 2017 02:54:29 +0100
From:   "Luis R. Rodriguez" <mcgrof@...nel.org>
To:     Lingutla Chandrasekhar <clingutla@...eaurora.org>
Cc:     mingo@...nel.org, peterz@...radead.org, mcgrof@...nel.org,
        keescook@...omium.org, shile.zhang@...ia.com,
        matt@...eblueprint.co.uk, akpm@...ux-foundation.org,
        vegard.nossum@...cle.com, penguin-kernel@...ove.SAKURA.ne.jp,
        jsiddle@...hat.com, linux-kernel@...r.kernel.org,
        linux-fsdevel@...r.kernel.org, Imran Khan <kimran@...eaurora.org>
Subject: Re: [RFC] hung task: check specific tasks for long uninterruptible
 sleep state

On Wed, Nov 08, 2017 at 11:58:13AM +0530, Lingutla Chandrasekhar wrote:
> khungtask by default monitors all tasks for long unterruptible sleep.
> This change introduces a sysctl option, /proc/sys/kernel/
> hung_task_selective_monitoring, to enable monitoring selected tasks.
> If this sysctl option is enabled then only the tasks with
> /proc/$PID/hang_detection_enabled 

You did precisely the opposite as to what Peter had suggested, which was to
have an per pid exclude flag [0] and this is precisely what I was going to
suggest as well. Exclusion should be an exemption, not the norm, the way
you currently have it right now each process would have to be white-listed,
and that's a lot of work.

I'll in your previous iteration you had tried adding a
/proc/hung_task_monitor_list and only PIDs added there were monitored
otherwise all tasks are monitored, just like the default case, so this
is your V2 attempt, please clarify that in your next iteration it will
be v3.

Upon suspend we try to address putting to sleep such processes, in fact we have
a CAP_BLOCK_SUSPEND which can enable to employ features that can block system
suspend (epoll(7) EPOLLWAKEUP, /proc/sys/wake_lock), so shouldn't we
have a check for CAP_BLOCK_SUSPEND before enabling this?

This gets me wondering if this should instead be controlled via capabilities
and prtctl() such as with PR_CAPBSET_DROP.

What if the process forks?

[0] https://lkml.kernel.org/r/20170822203632.GQ32112@worktop.programming.kicks-ass.net

  Luis

> set are to be monitored,
> otherwise all tasks are monitored as default case.
> 
> Some tasks may intentionally moves to uninterruptable sleep state,
> which shouldn't leads to khungtask panics, as those are recoverable
> hungs. So to avoid false hung reports, add an option to select tasks
> to be monitored and report/panic them only.
> 
> Signed-off-by: Imran Khan <kimran@...eaurora.org>
> Signed-off-by: Lingutla Chandrasekhar <clingutla@...eaurora.org>
> 
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 9d357b2ea6cb..810f9a5e209e 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2733,6 +2733,52 @@ static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
>  }
>  #endif /* CONFIG_TASK_IO_ACCOUNTING */
>  
> +#ifdef CONFIG_DETECT_HUNG_TASK
> +static ssize_t proc_hung_task_detection_enabled_read(struct file *file,
> +				char __user *buf, size_t count, loff_t *ppos)
> +{
> +	struct task_struct *task = get_proc_task(file_inode(file));
> +	char buffer[PROC_NUMBUF];
> +	size_t len;
> +	bool hang_detection_enabled;
> +
> +	if (!task)
> +		return -ESRCH;
> +	hang_detection_enabled = task->hang_detection_enabled;
> +	put_task_struct(task);
> +
> +	len = snprintf(buffer, sizeof(buffer), "%d\n", hang_detection_enabled);
> +
> +	return simple_read_from_buffer(buf, sizeof(buffer), ppos, buffer, len);
> +}
> +
> +static ssize_t proc_hung_task_detection_enabled_write(struct file *file,
> +			const char __user *buf, size_t count, loff_t *ppos)
> +{
> +	struct task_struct *task;
> +	bool hang_detection_enabled;
> +	int rv;
> +
> +	rv = kstrtobool_from_user(buf, count, &hang_detection_enabled);
> +	if (rv < 0)
> +		return rv;
> +
> +	task = get_proc_task(file_inode(file));
> +	if (!task)
> +		return -ESRCH;
> +	task->hang_detection_enabled = hang_detection_enabled;
> +	put_task_struct(task);
> +
> +	return count;
> +}
> +
> +static const struct file_operations proc_hung_task_detection_enabled_operations = {
> +	.read		= proc_hung_task_detection_enabled_read,
> +	.write		= proc_hung_task_detection_enabled_write,
> +	.llseek		= generic_file_llseek,
> +};
> +#endif
> +
>  #ifdef CONFIG_USER_NS
>  static int proc_id_map_open(struct inode *inode, struct file *file,
>  	const struct seq_operations *seq_ops)
> @@ -2976,6 +3022,10 @@ static const struct pid_entry tgid_base_stuff[] = {
>  #ifdef CONFIG_HARDWALL
>  	ONE("hardwall",   S_IRUGO, proc_pid_hardwall),
>  #endif
> +#ifdef CONFIG_DETECT_HUNG_TASK
> +	REG("hang_detection_enabled", 0666,
> +		proc_hung_task_detection_enabled_operations),
> +#endif
>  #ifdef CONFIG_USER_NS
>  	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
>  	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
> @@ -3367,6 +3417,10 @@ static const struct pid_entry tid_base_stuff[] = {
>  #ifdef CONFIG_HARDWALL
>  	ONE("hardwall",   S_IRUGO, proc_pid_hardwall),
>  #endif
> +#ifdef CONFIG_DETECT_HUNG_TASK
> +	REG("hang_detection_enabled", 0666,
> +		proc_hung_task_detection_enabled_operations),
> +#endif
>  #ifdef CONFIG_USER_NS
>  	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
>  	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index fdf74f27acf1..1280df0a6708 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -767,6 +767,7 @@ struct task_struct {
>  #endif
>  #ifdef CONFIG_DETECT_HUNG_TASK
>  	unsigned long			last_switch_count;
> +	bool				hang_detection_enabled;
>  #endif
>  	/* Filesystem information: */
>  	struct fs_struct		*fs;
> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
> index d6a18a3839cc..09d9f1c65fd2 100644
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -11,6 +11,7 @@ extern int	     sysctl_hung_task_check_count;
>  extern unsigned int  sysctl_hung_task_panic;
>  extern unsigned long sysctl_hung_task_timeout_secs;
>  extern int sysctl_hung_task_warnings;
> +extern int sysctl_hung_task_selective_monitoring;
>  extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
>  					 void __user *buffer,
>  					 size_t *lenp, loff_t *ppos);
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 751593ed7c0b..321bcfa2e34a 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -20,12 +20,21 @@
>  #include <linux/sched/debug.h>
>  
>  #include <trace/events/sched.h>
> +#include <linux/sched/sysctl.h>
>  
>  /*
>   * The number of tasks checked:
>   */
>  int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
>  
> +/*
> + * Selective monitoring of hung tasks.
> + *
> + * if set to 1, khungtaskd only monitors tasks with 'hang_detection_enabled'
> + * value set, else monitors all tasks.
> + */
> +int sysctl_hung_task_selective_monitoring;
> +
>  /*
>   * Limit number of tasks checked in a batch.
>   *
> @@ -187,7 +196,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
>  		}
>  		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
>  		if (t->state == TASK_UNINTERRUPTIBLE)
> -			check_hung_task(t, timeout);
> +			/* Check for selective monitoring */
> +			if (!sysctl_hung_task_selective_monitoring ||
> +			    t->hang_detection_enabled)
> +				check_hung_task(t, timeout);
>  	}
>   unlock:
>  	rcu_read_unlock();
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index d9c31bc2eaea..30cc108ce2cc 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1093,6 +1093,16 @@ static struct ctl_table kern_table[] = {
>  		.proc_handler	= proc_dointvec_minmax,
>  		.extra1		= &neg_one,
>  	},
> +	{
> +		.procname	= "hung_task_selective_monitoring",
> +		.data		= &sysctl_hung_task_selective_monitoring,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &one,
> +	},
> +
>  #endif
>  #ifdef CONFIG_RT_MUTEXES
>  	{
> -- 
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
>  a Linux Foundation Collaborative Project.
> 
> 

-- 
Luis Rodriguez, SUSE LINUX GmbH
Maxfeldstrasse 5; D-90409 Nuernberg