lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20180110192513.GB3837@localhost.localdomain>
Date:   Wed, 10 Jan 2018 11:25:14 -0800
From:   Eduardo Valentin <edubezval@...il.com>
To:     Viresh Kumar <viresh.kumar@...aro.org>
Cc:     Zhang Rui <rui.zhang@...el.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        linux-kernel@...r.kernel.org, linux-pm@...r.kernel.org
Subject: Re: [PATCH] thermal: Add debugfs support for cooling devices

On Thu, Jan 04, 2018 at 12:32:04PM +0530, Viresh Kumar wrote:
> This implements the debugfs interface for thermal cooling devices and
> exposes some pretty useful statistics. These statistics have proven to
> be quite useful specially while doing benchmarks related to the task
> scheduler, where we want to make sure that nothing has disrupted the
> test, specially the cooling device which may have put constraints on the
> CPUs. The information exposed here tells us to what extent the CPUs were
> constrained by the thermal framework.
> 
> The read-only "transitions" file is per cooling device and it shows the
> total number of cooling state transitions the device has gone through
> since the time the cooling device is registered or the time when
> statistics were reset last.
> 
> The read-only "time_in_state/stateN" file is per cooling state and it
> shows the time spent by the device in the respective cooling state.
> 
> The write-only "reset" file is used to reset the statistics.
> 
> This is how the directory structure looks like for a single cooling
> device:
> 
> $ ls -R /sys/kernel/debug/thermal/
> /sys/kernel/debug/thermal/:
> cooling_device0
> 
> /sys/kernel/debug/thermal/cooling_device0:
> reset  time_in_state_ms  transitions
> 
> /sys/kernel/debug/thermal/cooling_device0/time_in_state_ms:
> state0  state1  state2  state3

I would prefer this to go into sysfs. Reason is mainly because
such stats are also useful on production systems, specially for
collecting how a policy / deployment behaves across production
population.

Cheers,

> 
> This is tested on ARM 32-bit Hisilicon hikey620 board running Ubuntu and
> ARM 64-bit Hisilicon hikey960 board running Android.
> 
> Signed-off-by: Viresh Kumar <viresh.kumar@...aro.org>
> ---
>  drivers/thermal/Makefile          |   1 +
>  drivers/thermal/thermal_core.c    |   6 ++
>  drivers/thermal/thermal_core.h    |  18 ++++
>  drivers/thermal/thermal_debugfs.c | 167 ++++++++++++++++++++++++++++++++++++++
>  drivers/thermal/thermal_helpers.c |   5 +-
>  drivers/thermal/thermal_sysfs.c   |   1 +
>  include/linux/thermal.h           |   1 +
>  7 files changed, 198 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/thermal/thermal_debugfs.c
> 
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 610344eb3e03..629f74e73871 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -6,6 +6,7 @@
>  obj-$(CONFIG_THERMAL)		+= thermal_sys.o
>  thermal_sys-y			+= thermal_core.o thermal_sysfs.o \
>  					thermal_helpers.o
> +obj-$(CONFIG_DEBUG_FS)		+= thermal_debugfs.o
>  
>  # interface to/from other layers providing sensors
>  thermal_sys-$(CONFIG_THERMAL_HWMON)		+= thermal_hwmon.o
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 2b1b0ba393a4..bcc34648580f 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -997,6 +997,8 @@ __thermal_cooling_device_register(struct device_node *np,
>  						   THERMAL_EVENT_UNSPECIFIED);
>  	mutex_unlock(&thermal_list_lock);
>  
> +	thermal_cooling_device_setup_debugfs(cdev);
> +
>  	return cdev;
>  }
>  
> @@ -1104,6 +1106,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
>  
>  	mutex_unlock(&thermal_list_lock);
>  
> +	thermal_cooling_device_remove_debugfs(cdev);
>  	ida_simple_remove(&thermal_cdev_ida, cdev->id);
>  	device_unregister(&cdev->device);
>  }
> @@ -1544,6 +1547,8 @@ static int __init thermal_init(void)
>  		pr_warn("Thermal: Can not register suspend notifier, return %d\n",
>  			result);
>  
> +	thermal_debugfs_register();
> +
>  	return 0;
>  
>  exit_netlink:
> @@ -1563,6 +1568,7 @@ static int __init thermal_init(void)
>  
>  static void __exit thermal_exit(void)
>  {
> +	thermal_debugfs_unregister();
>  	unregister_pm_notifier(&thermal_pm_nb);
>  	of_thermal_destroy_zones();
>  	genetlink_exit();
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index 27e3b1df7360..3a8d50aa32dc 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -151,4 +151,22 @@ of_thermal_get_trip_points(struct thermal_zone_device *tz)
>  }
>  #endif
>  
> +#ifdef CONFIG_DEBUG_FS
> +void thermal_debugfs_register(void);
> +void thermal_debugfs_unregister(void);
> +void thermal_cooling_device_setup_debugfs(struct thermal_cooling_device *cdev);
> +void thermal_cooling_device_remove_debugfs(struct thermal_cooling_device *cdev);
> +void thermal_cooling_device_debugfs_update(struct thermal_cooling_device *cdev,
> +					   unsigned long new_state);
> +#else
> +static inline void thermal_debugfs_register(void) {}
> +static inline void thermal_debugfs_unregister(void) {}
> +static inline void
> +thermal_cooling_device_setup_debugfs(struct thermal_cooling_device *cdev) {}
> +static inline void
> +thermal_cooling_device_remove_debugfs(struct thermal_cooling_device *cdev) {}
> +static inline void
> +thermal_cooling_device_debugfs_update(struct thermal_cooling_device *cdev,
> +				      unsigned long new_state) {}
> +#endif /* debugfs */
>  #endif /* __THERMAL_CORE_H__ */
> diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c
> new file mode 100644
> index 000000000000..077684197250
> --- /dev/null
> +++ b/drivers/thermal/thermal_debugfs.c
> @@ -0,0 +1,167 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Manages debugfs interface for thermal devices.
> + *
> + * Copyright (C) 2018 Linaro.
> + * Viresh Kumar <viresh.kumar@...aro.org>
> + */
> +
> +#include <linux/device.h>
> +#include <linux/debugfs.h>
> +#include <linux/ktime.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +
> +#include "thermal_core.h"
> +
> +struct cooling_time_in_state {
> +	ktime_t time;
> +	struct cooling_dev_debugfs_data *data;
> +};
> +
> +struct cooling_dev_debugfs_data {
> +	struct dentry *dentry;
> +	unsigned int total_trans;
> +	unsigned long state;
> +	unsigned long max_states;
> +	ktime_t last_time;
> +	struct cooling_time_in_state *time_in_state;
> +};
> +
> +static struct dentry *rootdir;
> +static DEFINE_SPINLOCK(lock);
> +
> +static void update_time_in_state(struct cooling_dev_debugfs_data *data)
> +{
> +	ktime_t now = ktime_get(), delta;
> +
> +	delta = ktime_sub(now, data->last_time);
> +	data->time_in_state[data->state].time =
> +		ktime_add(data->time_in_state[data->state].time, delta);
> +	data->last_time = now;
> +}
> +
> +void thermal_cooling_device_debugfs_update(struct thermal_cooling_device *cdev,
> +					   unsigned long new_state)
> +{
> +	struct cooling_dev_debugfs_data *data = cdev->debugfs;
> +
> +	if (!data)
> +		return;
> +
> +	spin_lock(&lock);
> +	update_time_in_state(data);
> +	data->state = new_state;
> +	data->total_trans++;
> +	spin_unlock(&lock);
> +}
> +
> +static int read_time_in_state(void *ptr, u64 *val)
> +{
> +	struct cooling_time_in_state *time_in_state = ptr;
> +
> +	spin_lock(&lock);
> +	update_time_in_state(time_in_state->data);
> +	*val = ktime_to_ms(time_in_state->time);
> +	spin_unlock(&lock);
> +
> +	return 0;
> +}
> +
> +DEFINE_DEBUGFS_ATTRIBUTE(time_in_state_fops, read_time_in_state, NULL, "%llu\n");
> +
> +static int reset_stats(void *ptr, u64 val)
> +{
> +	struct cooling_dev_debugfs_data *data = ptr;
> +	int i;
> +
> +	spin_lock(&lock);
> +
> +	data->total_trans = 0;
> +	data->last_time = ktime_get();
> +
> +	for (i = 0; i < data->max_states; i++)
> +		data->time_in_state[i].time = ktime_set(0, 0);
> +
> +	spin_unlock(&lock);
> +
> +	return 0;
> +}
> +
> +DEFINE_DEBUGFS_ATTRIBUTE(reset_fops, NULL, reset_stats, "%llu\n");
> +
> +void thermal_cooling_device_setup_debugfs(struct thermal_cooling_device *cdev)
> +{
> +	struct cooling_dev_debugfs_data *data;
> +	struct dentry *dentry;
> +	unsigned long states;
> +	int ret, size, i;
> +	char name[NAME_MAX];
> +
> +	ret = cdev->ops->get_max_state(cdev, &states);
> +	if (ret)
> +		return;
> +
> +	size = sizeof(*data);
> +	size += sizeof(*data->time_in_state) * states;
> +
> +	data = kzalloc(size, GFP_KERNEL);
> +	if (!data)
> +		return;
> +
> +	data->time_in_state = (struct cooling_time_in_state *)(data + 1);
> +	cdev->debugfs = data;
> +	data->last_time = ktime_get();
> +	data->max_states = states;
> +
> +	data->dentry = debugfs_create_dir(dev_name(&cdev->device), rootdir);
> +	if (!data->dentry)
> +		goto free_data;
> +
> +	debugfs_create_file_unsafe("reset", 0666, data->dentry, data, &reset_fops);
> +	debugfs_create_u32("transitions", 0444, data->dentry,
> +			   &data->total_trans);
> +
> +	dentry = debugfs_create_dir("time_in_state_ms", data->dentry);
> +	if (!dentry)
> +		goto free_data_dentry;
> +
> +	for (i = 0; i < states; i++) {
> +		data->time_in_state[i].data = data;
> +		snprintf(name, NAME_MAX, "state%u", i);
> +		debugfs_create_file_unsafe(name, 0444, dentry,
> +					   &data->time_in_state[i],
> +					   &time_in_state_fops);
> +	}
> +
> +	return;
> +
> +free_data_dentry:
> +	debugfs_remove_recursive(data->dentry);
> +free_data:
> +	kfree(data);
> +	cdev->debugfs = NULL;
> +}
> +
> +void thermal_cooling_device_remove_debugfs(struct thermal_cooling_device *cdev)
> +{
> +	struct cooling_dev_debugfs_data *data = cdev->debugfs;
> +
> +	if (!data)
> +		return;
> +
> +	debugfs_remove_recursive(data->dentry);
> +	kfree(data);
> +	cdev->debugfs = NULL;
> +}
> +
> +void thermal_debugfs_register(void)
> +{
> +	rootdir = debugfs_create_dir("thermal", NULL);
> +}
> +
> +void thermal_debugfs_unregister(void)
> +{
> +	debugfs_remove_recursive(rootdir);
> +	rootdir = NULL;
> +}
> diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c
> index 8cdf75adcce1..8cfb2f4e7ea3 100644
> --- a/drivers/thermal/thermal_helpers.c
> +++ b/drivers/thermal/thermal_helpers.c
> @@ -187,7 +187,10 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
>  		if (instance->target > target)
>  			target = instance->target;
>  	}
> -	cdev->ops->set_cur_state(cdev, target);
> +
> +	if (!cdev->ops->set_cur_state(cdev, target))
> +		thermal_cooling_device_debugfs_update(cdev, target);
> +
>  	cdev->updated = true;
>  	mutex_unlock(&cdev->lock);
>  	trace_cdev_update(cdev, target);
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index fb80c96d8f73..be997ce3f32d 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -722,6 +722,7 @@ thermal_cooling_device_cur_state_store(struct device *dev,
>  	result = cdev->ops->set_cur_state(cdev, state);
>  	if (result)
>  		return result;
> +	thermal_cooling_device_debugfs_update(cdev, state);
>  	return count;
>  }
>  
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 8c5302374eaa..338d165cefef 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -148,6 +148,7 @@ struct thermal_cooling_device {
>  	struct device device;
>  	struct device_node *np;
>  	void *devdata;
> +	void *debugfs;
>  	const struct thermal_cooling_device_ops *ops;
>  	bool updated; /* true if the cooling device does not need update */
>  	struct mutex lock; /* protect thermal_instances list */
> -- 
> 2.15.0.194.g9af6a3dea062
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ