lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20191126080315.zc5rllbsc5utuhzq@vireshk-i7>
Date:   Tue, 26 Nov 2019 13:33:15 +0530
From:   Viresh Kumar <viresh.kumar@...aro.org>
To:     Daniel Lezcano <daniel.lezcano@...aro.org>
Cc:     rjw@...ysocki.net, edubezval@...il.com, rui.zhang@...el.com,
        linux-pm@...r.kernel.org, amit.kucheria@...aro.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH 3/3] thermal/drivers/cpu_cooling: Introduce the cpu idle
 cooling driver

On 13-11-19, 09:40, Daniel Lezcano wrote:
> The cpu idle cooling device offers a new method to cool down a CPU by
> injecting idle cycles at runtime.
> 
> It has some similarities with the intel power clamp driver but it is
> actually designed to be more generic and relying on the idle injection
> powercap framework.
> 
> The idle injection cycle is fixed while the running cycle is variable. That
> allows to have control on the device reactivity for the user experience.
> 
> An idle state powering down the CPU or the cluster will allow to drop
> the static leakage, thus restoring the heat capacity of the SoC. It
> can be set with a trip point between the hot and the critical points,
> giving the opportunity to prevent a hard reset of the system when the
> cpufreq cooling fails to cool down the CPU.
> 
> With more sophisticated boards having a per core sensor, the idle
> cooling device allows to cool down a single core without throttling
> the compute capacity of several cpus belonging to the same clock line,
> so it could be used in collaboration with the cpufreq cooling device.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@...aro.org>
> ---
>  drivers/thermal/Kconfig           |   7 +
>  drivers/thermal/Makefile          |   1 +
>  drivers/thermal/cpuidle_cooling.c | 233 ++++++++++++++++++++++++++++++
>  include/linux/cpu_cooling.h       |  22 +++
>  4 files changed, 263 insertions(+)
>  create mode 100644 drivers/thermal/cpuidle_cooling.c
> 
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index 2b82c4861091..00d69906c508 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -168,6 +168,13 @@ config CPU_FREQ_THERMAL
>  	  This will be useful for platforms using the generic thermal interface
>  	  and not the ACPI interface.
>  
> +config CPU_IDLE_THERMAL
> +	bool "CPU idle cooling device"
> +	depends on IDLE_INJECT
> +	help
> +	  This implements the CPU cooling mechanism through
> +	  idle injection. This will throttle the CPU by injecting
> +	  idle cycle.
>  endif
>  
>  config CLOCK_THERMAL
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index d3b01cc96981..9c8aa2d4bd28 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -20,6 +20,7 @@ thermal_sys-$(CONFIG_THERMAL_GOV_POWER_ALLOCATOR)	+= power_allocator.o
>  
>  # cpufreq cooling
>  thermal_sys-$(CONFIG_CPU_FREQ_THERMAL)	+= cpu_cooling.o

We should really rename this as cpufreq_cooling now :)

> +thermal_sys-$(CONFIG_CPU_IDLE_THERMAL)	+= cpuidle_cooling.o
>  
>  # clock cooling
>  thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
> diff --git a/drivers/thermal/cpuidle_cooling.c b/drivers/thermal/cpuidle_cooling.c
> new file mode 100644
> index 000000000000..6e911fa87c47
> --- /dev/null
> +++ b/drivers/thermal/cpuidle_cooling.c
> @@ -0,0 +1,233 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + *  Copyright (C) 2019 Linaro Limited.
> + *
> + *  Author: Daniel Lezcano <daniel.lezcano@...aro.org>
> + *
> + */
> +#include <linux/cpu_cooling.h>
> +#include <linux/cpuidle.h>
> +#include <linux/err.h>
> +#include <linux/idle_inject.h>
> +#include <linux/idr.h>
> +#include <linux/slab.h>
> +#include <linux/thermal.h>
> +
> +/**
> + * struct cpuidle_cooling_device - data for the idle cooling device
> + * @ii_dev: an atomic to keep track of the last task exiting the idle cycle
> + * @idle_duration_us: an integer defining the duration of the idle injection
> + * @state: an normalized integer giving the state of the cooling device
> + */
> +struct cpuidle_cooling_device {
> +	struct idle_inject_device *ii_dev;
> +	unsigned int idle_duration_us;

This field is set with TICK_USEC and nothing else. Why not just use TICK_USEC
instead at all the places and remove this field?

> +	unsigned long state;
> +};
> +
> +static DEFINE_IDA(cpuidle_ida);
> +
> +/**
> + * cpuidle_cooling_runtime - Running time computation
> + * @idle_duration_us: the idle cooling device
> + * @state: a percentile based number
> + *
> + * The running duration is computed from the idle injection duration
> + * which is fixed. If we reach 100% of idle injection ratio, that
> + * means the running duration is zero. If we have a 50% ratio
> + * injection, that means we have equal duration for idle and for
> + * running duration.
> + *
> + * The formula is deduced as the following:
> + *
> + *  running = idle x ((100 / ratio) - 1)
> + *
> + * For precision purpose for integer math, we use the following:
> + *
> + *  running = (idle x 100) / ratio - idle
> + *
> + * For example, if we have an injected duration of 50%, then we end up
> + * with 10ms of idle injection and 10ms of running duration.
> + *
> + * Returns an unsigned int for an usec based runtime duration.
> + */
> +static unsigned int cpuidle_cooling_runtime(unsigned int idle_duration_us,
> +					    unsigned long state)
> +{
> +	if (!state)
> +		return 0;
> +
> +	return ((idle_duration_us * 100) / state) - idle_duration_us;
> +}
> +
> +/**
> + * cpuidle_cooling_get_max_state - Get the maximum state
> + * @cdev  : the thermal cooling device
> + * @state : a pointer to the state variable to be filled
> + *
> + * The function always gives 100 as the injection ratio is percentile
> + * based for consistency accros different platforms.
> + *
> + * The function can not fail, it always returns zero.
> + */
> +static int cpuidle_cooling_get_max_state(struct thermal_cooling_device *cdev,
> +					 unsigned long *state)
> +{
> +	/*
> +	 * Depending on the configuration or the hardware, the running
> +	 * cycle and the idle cycle could be different. We want unify
> +	 * that to an 0..100 interval, so the set state interface will
> +	 * be the same whatever the platform is.
> +	 *
> +	 * The state 100% will make the cluster 100% ... idle. A 0%
> +	 * injection ratio means no idle injection at all and 50%
> +	 * means for 10ms of idle injection, we have 10ms of running
> +	 * time.
> +	 */
> +	*state = 100;
> +
> +	return 0;
> +}
> +
> +/**
> + * cpuidle_cooling_get_cur_state - Get the current cooling state
> + * @cdev: the thermal cooling device
> + * @state: a pointer to the state
> + *
> + * The function just copy the state value from the private thermal
> + * cooling device structure, the mapping is 1 <-> 1.
> + *
> + * The function can not fail, it always returns zero.
> + */
> +static int cpuidle_cooling_get_cur_state(struct thermal_cooling_device *cdev,
> +					 unsigned long *state)
> +{
> +	struct cpuidle_cooling_device *idle_cdev = cdev->devdata;
> +
> +	*state = idle_cdev->state;
> +
> +	return 0;
> +}
> +
> +/**
> + * cpuidle_cooling_set_cur_state - Set the current cooling state
> + * @cdev: the thermal cooling device
> + * @state: the target state
> + *
> + * The function checks first if we are initiating the mitigation which
> + * in turn wakes up all the idle injection tasks belonging to the idle
> + * cooling device. In any case, it updates the internal state for the
> + * cooling device.
> + *
> + * The function can not fail, it always returns zero.
> + */
> +static int cpuidle_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> +					 unsigned long state)
> +{
> +	struct cpuidle_cooling_device *idle_cdev = cdev->devdata;
> +	struct idle_inject_device *ii_dev = idle_cdev->ii_dev;
> +	unsigned long current_state = idle_cdev->state;
> +	unsigned int runtime_us;
> +
> +	idle_cdev->state = state;
> +
> +	runtime_us = cpuidle_cooling_runtime(idle_cdev->idle_duration_us,
> +					     state);
> +
> +	idle_inject_set_duration(ii_dev, runtime_us,
> +				 idle_cdev->idle_duration_us);
> +
> +	if (current_state == 0 && state > 0) {
> +		idle_inject_start(ii_dev);
> +	} else if (current_state > 0 && !state)  {
> +		idle_inject_stop(ii_dev);
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * cpuidle_cooling_ops - thermal cooling device ops
> + */
> +static struct thermal_cooling_device_ops cpuidle_cooling_ops = {
> +	.get_max_state = cpuidle_cooling_get_max_state,
> +	.get_cur_state = cpuidle_cooling_get_cur_state,
> +	.set_cur_state = cpuidle_cooling_set_cur_state,
> +};
> +
> +/**
> + * cpuidle_of_cooling_register - Idle cooling device initialization function
> + * @drv: a cpuidle driver structure pointer
> + *
> + * This function is in charge of creating a cooling device per cpuidle
> + * driver and register it to thermal framework.
> + *
> + * Returns a valid pointer to a thermal cooling device, a PTR_ERR
> + * corresponding to the error detected in the underlying subsystems.
> + */
> +struct thermal_cooling_device *
> +__init cpuidle_of_cooling_register(struct device_node *np,
> +				   struct cpuidle_driver *drv)
> +{
> +	struct idle_inject_device *ii_dev;
> +	struct cpuidle_cooling_device *idle_cdev;
> +	struct thermal_cooling_device *cdev;
> +	char dev_name[THERMAL_NAME_LENGTH];
> +	int id, ret;
> +
> +	ii_dev = idle_inject_register(drv->cpumask);
> +	if (IS_ERR(ii_dev)) {
> +		ret = PTR_ERR(ii_dev);
> +		goto out;
> +	}
> +

I am not sure what's the best way of doing this, but I would have done this
after allocating idle_cdev and id.

> +	idle_cdev = kzalloc(sizeof(*idle_cdev), GFP_KERNEL);
> +	if (!idle_cdev) {
> +		ret = -ENOMEM;
> +		goto out_unregister;
> +	}
> +
> +	id = ida_simple_get(&cpuidle_ida, 0, 0, GFP_KERNEL);
> +	if (id < 0) {
> +		ret = id;
> +		goto out_kfree;
> +	}
> +
> +	idle_cdev->ii_dev = ii_dev;
> +	idle_cdev->idle_duration_us = TICK_USEC;
> +
> +	snprintf(dev_name, sizeof(dev_name), "thermal-idle-%d", id);
> +
> +	cdev = thermal_of_cooling_device_register(np, dev_name, idle_cdev,
> +						  &cpuidle_cooling_ops);
> +	if (IS_ERR(cdev)) {
> +		ret = PTR_ERR(cdev);
> +		goto out_id;
> +	}
> +
> +	return cdev;
> +out_id:
> +	ida_simple_remove(&cpuidle_ida, id);
> +out_kfree:
> +	kfree(idle_cdev);
> +out_unregister:
> +	idle_inject_unregister(ii_dev);
> +out:
> +	return ERR_PTR(ret);
> +}
> +
> +/**
> + * cpuidle_cooling_register - Idle cooling device initialization function
> + * @drv: a cpuidle driver structure pointer
> + *
> + * This function is in charge of creating a cooling device per cpuidle
> + * driver and register it to thermal framework.
> + *
> + * Returns a valid pointer to a thermal cooling device, a PTR_ERR
> + * corresponding to the error detected in the underlying subsystems.
> + */
> +struct thermal_cooling_device *
> +__init cpuidle_cooling_register(struct cpuidle_driver *drv)
> +{
> +	return cpuidle_of_cooling_register(NULL, drv);
> +}
> diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
> index 3cdd85f987d7..7873ac2f740b 100644
> --- a/include/linux/cpu_cooling.h
> +++ b/include/linux/cpu_cooling.h
> @@ -60,4 +60,26 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
>  }
>  #endif /* CONFIG_CPU_FREQ_THERMAL */
>  
> +struct cpuidle_driver;
> +
> +#ifdef CONFIG_CPU_IDLE_THERMAL
> +extern struct thermal_cooling_device *
> +__init cpuidle_cooling_register(struct cpuidle_driver *drv);
> +extern struct thermal_cooling_device *
> +__init cpuidle_of_cooling_register(struct device_node *np,
> +				   struct cpuidle_driver *drv);
> +#else /* CONFIG_CPU_IDLE_THERMAL */
> +static inline struct thermal_cooling_device *
> +__init cpuidle_cooling_register(struct cpuidle_driver *drv)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +extern struct thermal_cooling_device *
> +__init cpuidle_of_cooling_register(struct device_node *np,
> +				   struct cpuidle_driver *drv)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +#endif /* CONFIG_CPU_IDLE_THERMAL */
> +
>  #endif /* __CPU_COOLING_H__ */
> -- 
> 2.17.1

-- 
viresh

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ