lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 10 Nov 2020 12:50:16 +0000
From:   Lukasz Luba <lukasz.luba@....com>
To:     Daniel Lezcano <daniel.lezcano@...aro.org>
Cc:     rafael@...nel.org, srinivas.pandruvada@...ux.intel.com,
        linux-kernel@...r.kernel.org, linux-pm@...r.kernel.org,
        rui.zhang@...el.com, "Rafael J. Wysocki" <rjw@...ysocki.net>,
        Palmer Dabbelt <palmerdabbelt@...gle.com>,
        Anup Patel <anup.patel@....com>,
        Atish Patra <atish.patra@....com>,
        Marc Zyngier <maz@...nel.org>,
        Andrew Jones <drjones@...hat.com>,
        Michael Kelley <mikelley@...rosoft.com>,
        Mike Leach <mike.leach@...aro.org>,
        Kajol Jain <kjain@...ux.ibm.com>,
        Daniel Jordan <daniel.m.jordan@...cle.com>,
        Steven Price <steven.price@....com>
Subject: Re: [PATCH 4/4] powercap/drivers/dtpm: Add CPU energy model based
 support



On 10/6/20 1:20 PM, Daniel Lezcano wrote:
> With the powercap dtpm controller, we are able to plug devices with
> power limitation features in the tree.
> 
> The following patch introduces the CPU power limitation based on the
> energy model and the performance states.
> 
> The power limitation is done at the performance domain level. If some
> CPUs are unplugged, the corresponding power will be substracted from

s/substracted/subtracted


> the performance domain total power.
> 
> It is up to the platform to initialize the dtpm tree and add the CPU.
> 
> Here is an example to create a simple tree with one root node called
> "pkg" and the cpu's performance domains.

s/cpu/CPU to be aligned with previous 'CPU'

> 
> int dtpm_register_pkg(struct dtpm_descr *descr)
> {
> 	struct dtpm *pkg;
> 	int ret;
> 
> 	pkg = dtpm_alloc();
> 	if (!pkg)
> 		return -ENOMEM;
> 
> 	ret = dtpm_register_parent(descr->name, pkg, descr->parent);
> 	if (ret)
> 		return ret;
> 
> 	return dtpm_register_cpu(pkg);
> }
> 
> struct dtpm_descr descr = {
> 	.name = "pkg",
> 	.init = dtpm_register_pkg,
> };
> DTPM_DECLARE(descr);
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@...aro.org>
> ---
>   drivers/powercap/Kconfig    |   8 ++
>   drivers/powercap/Makefile   |   1 +
>   drivers/powercap/dtpm_cpu.c | 242 ++++++++++++++++++++++++++++++++++++
>   include/linux/cpuhotplug.h  |   1 +
>   include/linux/dtpm.h        |   3 +
>   5 files changed, 255 insertions(+)
>   create mode 100644 drivers/powercap/dtpm_cpu.c
> 
> diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
> index 777cf60300b8..240dc09e8dc2 100644
> --- a/drivers/powercap/Kconfig
> +++ b/drivers/powercap/Kconfig
> @@ -49,4 +49,12 @@ config DTPM
>   	help
>   	  This enables support for the power capping for the dynamic
>   	  thermal management userspace engine.
> +
> +config DTPM_CPU
> +	bool "Add CPU power capping based on the energy model"
> +	depends on DTPM && ENERGY_MODEL
> +	help
> +	  This enables support for CPU power limitation based on
> +	  energy model.
> +
>   endif
> diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
> index 6482ac52054d..fabcf388a8d3 100644
> --- a/drivers/powercap/Makefile
> +++ b/drivers/powercap/Makefile
> @@ -1,5 +1,6 @@
>   # SPDX-License-Identifier: GPL-2.0-only
>   obj-$(CONFIG_DTPM) += dtpm.o
> +obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o
>   obj-$(CONFIG_POWERCAP)	+= powercap_sys.o
>   obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
>   obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
> diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
> new file mode 100644
> index 000000000000..23ebf704c599
> --- /dev/null
> +++ b/drivers/powercap/dtpm_cpu.c
> @@ -0,0 +1,242 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright 2020 Linaro Limited
> + *
> + * Author: Daniel Lezcano <daniel.lezcano@...aro.org>
> + *
> + */
> +#include <linux/cpumask.h>
> +#include <linux/cpufreq.h>
> +#include <linux/cpuhotplug.h>
> +#include <linux/dtpm.h>
> +#include <linux/energy_model.h>
> +#include <linux/pm_qos.h>
> +#include <linux/slab.h>
> +#include <linux/units.h>
> +
> +static struct dtpm *__parent;
> +
> +static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
> +
> +struct dtpm_cpu {
> +	struct freq_qos_request qos_req;
> +	int cpu;
> +};
> +
> +static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
> +{
> +	u64 power_min, power_max;
> +
> +	power_min = em->table[0].power;
> +	power_min *= MICROWATT_PER_MILLIWATT;
> +	power_min += dtpm->power_min;
> +
> +	power_max = em->table[em->nr_perf_states - 1].power;
> +	power_max *= MICROWATT_PER_MILLIWATT;
> +	power_max += dtpm->power_max;
> +
> +	return dtpm_update_power(dtpm, power_min, power_max);
> +}
> +
> +static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
> +{
> +	u64 power_min, power_max;
> +
> +	power_min = em->table[0].power;
> +	power_min *= MICROWATT_PER_MILLIWATT;
> +	power_min = dtpm->power_min - power_min;
> +
> +	power_max = em->table[em->nr_perf_states - 1].power;
> +	power_max *= MICROWATT_PER_MILLIWATT;
> +	power_max = dtpm->power_max - power_max;
> +
> +	return dtpm_update_power(dtpm, power_min, power_max);
> +}
> +
> +static int set_pd_power_limit(struct powercap_zone *pcz, int cid,
> +			      u64 power_limit)
> +{
> +	struct dtpm *dtpm = to_dtpm(pcz);
> +	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> +	struct em_perf_domain *pd;
> +	unsigned long freq;
> +	int i, nr_cpus;
> +
> +	spin_lock(&dtpm->lock);
> +
> +	power_limit = clamp_val(power_limit, dtpm->power_min, dtpm->power_max);
> +
> +	pd = em_cpu_get(dtpm_cpu->cpu);
> +
> +	nr_cpus = cpumask_weight(to_cpumask(pd->cpus));
> +
> +	for (i = 0; i < pd->nr_perf_states; i++) {
> +
> +		u64 power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
> +
> +		if ((power * nr_cpus) > power_limit)
> +			break;
> +	}
> +
> +	freq = pd->table[i - 1].frequency;
> +
> +	freq_qos_update_request(&dtpm_cpu->qos_req, freq);
> +
> +	dtpm->power_limit = power_limit;
> +
> +	spin_unlock(&dtpm->lock);
> +
> +	return 0;
> +}
> +
> +static int get_pd_power_limit(struct powercap_zone *pcz, int cid, u64 *data)
> +{
> +	struct dtpm *dtpm = to_dtpm(pcz);
> +
> +	spin_lock(&dtpm->lock);
> +	*data = dtpm->power_max;
> +	spin_unlock(&dtpm->lock);
> +
> +	return 0;
> +}
> +
> +static int get_pd_power_uw(struct powercap_zone *pcz, u64 *power_uw)
> +{
> +	struct dtpm *dtpm = to_dtpm(pcz);
> +	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> +	struct em_perf_domain *pd;
> +	unsigned long freq;
> +	int i, nr_cpus;
> +
> +	freq = cpufreq_quick_get(dtpm_cpu->cpu);
> +	pd = em_cpu_get(dtpm_cpu->cpu);
> +	nr_cpus = cpumask_weight(to_cpumask(pd->cpus));
> +
> +	for (i = 0; i < pd->nr_perf_states; i++) {
> +
> +		if (pd->table[i].frequency < freq)
> +			continue;
> +
> +		*power_uw = pd->table[i].power *
> +			MICROWATT_PER_MILLIWATT * nr_cpus;
> +
> +		return 0;
> +	}
> +
> +	return -EINVAL;
> +}
> +
> +static int cpu_release_zone(struct powercap_zone *pcz)
> +{
> +	struct dtpm *dtpm = to_dtpm(pcz);
> +	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> +
> +	freq_qos_remove_request(&dtpm_cpu->qos_req);
> +
> +	return dtpm_release_zone(pcz);

The dtpm_cpu should be freed somewhere, maybe here or below.

> +}
> +
> +static struct powercap_zone_constraint_ops pd_constraint_ops = {
> +	.set_power_limit_uw = set_pd_power_limit,
> +	.get_power_limit_uw = get_pd_power_limit,
> +};
> +
> +static struct powercap_zone_ops pd_zone_ops = {
> +	.get_power_uw = get_pd_power_uw,
> +	.release = cpu_release_zone,
> +};
> +
> +static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
> +{
> +	struct cpufreq_policy *policy;
> +	struct em_perf_domain *pd;
> +	struct dtpm *dtpm;
> +
> +	policy = cpufreq_cpu_get(cpu);
> +
> +	if (!policy)
> +		return 0;
> +
> +	pd = em_cpu_get(cpu);
> +	if (!pd)
> +		return -EINVAL;
> +
> +	dtpm = per_cpu(dtpm_per_cpu, cpu);
> +
> +	power_sub(dtpm, pd);
> +
> +	if (cpumask_weight(policy->cpus) != 1)
> +		return 0;
> +
> +	for_each_cpu(cpu, policy->related_cpus)
> +		per_cpu(dtpm_per_cpu, cpu) = NULL;
> +
> +	dtpm_unregister(dtpm);

Is it the right place to call kfree(dtpm_cpu)?

> +
> +	return 0;
> +}
> +
> +static int cpuhp_dtpm_cpu_online(unsigned int cpu)
> +{
> +        struct dtpm *dtpm;
> +	struct dtpm_cpu *dtpm_cpu;
> +	struct cpufreq_policy *policy;
> +	struct em_perf_domain *pd;
> +	char name[CPUFREQ_NAME_LEN];
> +	int ret;
> +
> +	policy = cpufreq_cpu_get(cpu);
> +
> +	if (!policy)
> +		return 0;
> +
> +	pd = em_cpu_get(cpu);
> +	if (!pd)
> +		return -EINVAL;
> +
> +	dtpm = per_cpu(dtpm_per_cpu, cpu);
> +	if (dtpm)
> +		return power_add(dtpm, pd);
> +
> +	dtpm = dtpm_alloc();
> +	if (!dtpm)
> +		return -EINVAL;
> +
> +	dtpm_cpu = kzalloc(sizeof(dtpm_cpu), GFP_KERNEL);

We have to free this dtpm_cpu somewhere.

> +	if (!dtpm_cpu)
> +		return -ENOMEM;
> +
> +	dtpm->private = dtpm_cpu;
> +	dtpm_cpu->cpu = cpu;
> +
> +	for_each_cpu(cpu, policy->related_cpus)
> +		per_cpu(dtpm_per_cpu, cpu) = dtpm;
> +
> +	ret = power_add(dtpm, pd);
> +	if (ret)
> +		return ret;
> +
> +	dtpm->power_limit = dtpm->power_max;
> +
> +	sprintf(name, "cpu%d", dtpm_cpu->cpu);
> +
> +	ret = dtpm_register(name, dtpm, __parent, &pd_zone_ops,
> +			    1, &pd_constraint_ops);
> +	if (ret)
> +		return ret;
> +
> +	ret = freq_qos_add_request(&policy->constraints,
> +				   &dtpm_cpu->qos_req, FREQ_QOS_MAX,
> +				   pd->table[pd->nr_perf_states - 1].frequency);
> +	return ret;
> +}
> +
> +int dtpm_register_cpu(struct dtpm *parent)
> +{
> +	__parent = parent;
> +
> +	return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE,
> +				 "dtpm_cpu:online",
> +				 cpuhp_dtpm_cpu_online,
> +				 cpuhp_dtpm_cpu_offline);

Shouldn't be the DTPM_CPU dependent on HOTPLUG_CPU in Kconfig?
Most of platforms enable it by default, though.

> +}
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index bf9181cef444..6792bad4a435 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -190,6 +190,7 @@ enum cpuhp_state {
>   	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
>   	CPUHP_AP_X86_HPET_ONLINE,
>   	CPUHP_AP_X86_KVM_CLK_ONLINE,
> +	CPUHP_AP_DTPM_CPU_ONLINE,
>   	CPUHP_AP_ACTIVE,
>   	CPUHP_ONLINE,
>   };
> diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
> index 6696bdcfdb87..b62215a13baa 100644
> --- a/include/linux/dtpm.h
> +++ b/include/linux/dtpm.h
> @@ -70,4 +70,7 @@ int dtpm_register_parent(const char *name, struct dtpm *dtpm,
>   int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent,
>   		  struct powercap_zone_ops *ops, int nr_constraints,
>   		  struct powercap_zone_constraint_ops *const_ops);
> +
> +int dtpm_register_cpu(struct dtpm *parent);
> +

This function needs a sibling under in #ifdef CONFIG_DTPM_CPU #else.

>   #endif
> 

The code might a few print debugs which helps with experimenting.

In general, it looks good.

Regards,
Lukasz

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ