lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130402134255.GD5488@pd.tnic>
Date:	Tue, 2 Apr 2013 15:42:55 +0200
From:	Borislav Petkov <bp@...en8.de>
To:	Jacob Shin <jacob.shin@....com>
Cc:	"Rafael J. Wysocki" <rjw@...k.pl>, cpufreq@...r.kernel.org,
	linux-pm@...r.kernel.org, linux-kernel@...r.kernel.org,
	Viresh Kumar <viresh.kumar@...aro.org>
Subject: Re: [PATCH V2 2/2] cpufreq: AMD "frequency sensitivity feedback"
 powersave bias for ondemand governor

On Thu, Mar 28, 2013 at 01:24:17PM -0500, Jacob Shin wrote:
> Future AMD processors, starting with Family 16h, can provide software
> with feedback on how the workload may respond to frequency change --
> memory-bound workloads will not benefit from higher frequency, where
> as compute-bound workloads will. This patch enables this "frequency
> sensitivity feedback" to aid the ondemand governor to make better
> frequency change decisions by hooking into the powersave bias.
> 
> Signed-off-by: Jacob Shin <jacob.shin@....com>
> ---
>  arch/x86/include/uapi/asm/msr-index.h  |    1 +
>  drivers/cpufreq/Kconfig.x86            |   10 +++
>  drivers/cpufreq/Makefile               |    1 +
>  drivers/cpufreq/amd_freq_sensitivity.c |  147 ++++++++++++++++++++++++++++++++
>  4 files changed, 159 insertions(+)
>  create mode 100644 drivers/cpufreq/amd_freq_sensitivity.c
> 
> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
> index 7a060f4..b2e6c49 100644
> --- a/arch/x86/include/uapi/asm/msr-index.h
> +++ b/arch/x86/include/uapi/asm/msr-index.h
> @@ -173,6 +173,7 @@
>  #define MSR_AMD64_TSC_RATIO		0xc0000104
>  #define MSR_AMD64_NB_CFG		0xc001001f
>  #define MSR_AMD64_PATCH_LOADER		0xc0010020
> +#define MSR_AMD64_FREQ_SENSITIVITY	0xc0010080
>  #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
>  #define MSR_AMD64_OSVW_STATUS		0xc0010141
>  #define MSR_AMD64_DC_CFG		0xc0011022

My guess is, this MSR won't be used outside of cpufreq so you probably
want to define it there, in amd_freq_sensitivity.c

> diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
> index d7dc0ed..6c714b0 100644
> --- a/drivers/cpufreq/Kconfig.x86
> +++ b/drivers/cpufreq/Kconfig.x86
> @@ -129,6 +129,16 @@ config X86_POWERNOW_K8
>  
>  	  For details, take a look at <file:Documentation/cpu-freq/>.
>  
> +config X86_AMD_FREQ_SENSITIVITY
> +	tristate "AMD 'frequency sensitivity feedback' powersave bias"

Why in ' '? Isn't that the final name?

> +	depends on CPU_FREQ_GOV_ONDEMAND && X86_ACPI_CPUFREQ

depends on CPU_SUP_AMD

> +	help
> +	  This adds support for 'frequency sensitivity feedback' feature on
> +	  supported AMD processors, which hooks into the ondemand governor's
> +	  powersave bias to influence frequency change decisions.

Your description about the feature in the 0/2 message is much better
than this one here. How about adding it here too?

> +
> +	  If in doubt, say N.
> +
>  config X86_GX_SUSPMOD
>  	tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
>  	depends on X86_32 && PCI
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index 863fd18..01dfdaf 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -41,6 +41,7 @@ obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
>  obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
>  obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
>  obj-$(CONFIG_X86_INTEL_PSTATE)		+= intel_pstate.o
> +obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY)	+= amd_freq_sensitivity.o
>  
>  ##################################################################################
>  # ARM SoC drivers
> diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
> new file mode 100644
> index 0000000..997feb0
> --- /dev/null
> +++ b/drivers/cpufreq/amd_freq_sensitivity.c
> @@ -0,0 +1,147 @@
> +/*
> + * amd_freq_sensitivity.c: AMD "frequency sensitivity feedback" powersave bias
> + *                         for ondemand governor.
> + *
> + * Copyright (C) 2013 Advanced Micro Devices, Inc.

You probably want to leave an email address in here for contacting you
when it is b0rked. :-)

> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/percpu-defs.h>
> +#include <linux/init.h>
> +
> +#include "cpufreq_governor.h"
> +
> +#define PROC_FEEDBACK_INTERFACE_SHIFT		11

Yeah, that's a bit cumbersome. Just define a normal x86 feature bit in
cpufeature.h and then you can use static_cpu_has below:

	if (!static_cpu_has(AMD_PROC_FEEDBACK_INTERFACE))
		return -ENODEV;


> +#define CLASS_CODE_SHIFT			56
> +#define CLASS_CODE_MASK				0xff
> +#define CLASS_CODE_CORE_FREQUENCY_SENSITIVITY	0x01
> +
> +static u32 msr_addr;
> +
> +struct cpu_data_t {
> +	u64 actual;
> +	u64 reference;
> +	unsigned int freq_prev;
> +};
> +
> +static DEFINE_PER_CPU(struct cpu_data_t, cpu_data);
> +
> +static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
> +		unsigned int freq_next, unsigned int relation)

arg alignment.

> +{
> +	int sensitivity;
> +	long d_actual, d_reference;
> +	struct msr actual, reference;
> +	struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu);
> +	struct dbs_data *od_data = policy->governor_data;
> +	struct od_dbs_tuners *od_tuners = od_data->tuners;
> +	struct od_cpu_dbs_info_s *od_info =
> +		od_data->cdata->get_cpu_dbs_info_s(policy->cpu);
> +
> +	rdmsr_on_cpu(policy->cpu, msr_addr, &actual.l, &actual.h);
> +	rdmsr_on_cpu(policy->cpu, msr_addr + 1, &reference.l, &reference.h);
> +	actual.h &= 0x00ffffff;
> +	reference.h &= 0x00ffffff;
> +
> +	if (!od_info->freq_table)
> +		goto out;

Ok, this check is definitely misplaced. So if we don't have
->freq_table, we can save us the MSR accesses above and simply return
freq_next, right? So basically you want to push the check before the MSR
accesses and do:

	if (!od_info->freq_table)
		return freq_next;

Or am I missing something?

> +
> +	/* counter wrapped around, so push until next check */
> +	if (actual.q < data->actual || reference.q < data->reference) {
> +		freq_next = policy->cur;
> +		goto out;
> +	}
> +
> +	d_actual = actual.q - data->actual;
> +	d_reference = reference.q - data->reference;
> +
> +	/* divide by 0, so push as well */

What do you mean by "push as well"? No change, right?

> +	if (d_reference == 0) {
> +		freq_next = policy->cur;
> +		goto out;
> +	}
> +
> +	sensitivity = 1000 - (1000 * (d_reference - d_actual) / d_reference);

Ok, now this naked 1000 could very well use a define here like you do
for your other values you're using :-).

> +	if (sensitivity > 1000)
> +		sensitivity = 1000;
> +	else if (sensitivity < 0)
> +		sensitivity = 0;

	clamp(sensitivity, 0, 1000);

> +
> +	/* this workload is not CPU bound, so choose a lower freq */
> +	if (sensitivity < od_tuners->powersave_bias) {

Yeah, this ->powersave_bias usage needs more discussion, as Thomas said
earlier.

> +		if (data->freq_prev == policy->cur)
> +			freq_next = policy->cur;
> +
> +		if (freq_next > policy->cur)
> +			freq_next = policy->cur;
> +		else if (freq_next < policy->cur)
> +			freq_next = policy->min;
> +		else {
> +			unsigned int index = 0;
> +
> +			cpufreq_frequency_table_target(policy,
> +				od_info->freq_table, policy->cur - 1,
> +				CPUFREQ_RELATION_H, &index);
> +			freq_next = od_info->freq_table[index].frequency;
> +		}
> +
> +		data->freq_prev = freq_next;
> +	} else
> +		data->freq_prev = 0;
> +
> +out:
> +	data->actual = actual.q;
> +	data->reference = reference.q;
> +	return freq_next;
> +}
> +
> +static int __init amd_freq_sensitivity_init(void)
> +{
> +	int i;
> +	u32 eax, edx, dummy;
> +
> +	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
> +		return -ENODEV;
> +
> +	cpuid(0x80000007, &eax, &dummy, &dummy, &edx);
> +
> +	if (!(edx & (1 << PROC_FEEDBACK_INTERFACE_SHIFT)))
> +		return -ENODEV;
> +
> +	for (i = 0; i < (eax & 0xf); i++) {
> +		u64 val;
> +		u32 addr = MSR_AMD64_FREQ_SENSITIVITY + (i * 2);
> +
> +		rdmsrl(addr, val);

Pls use rdmsrl_safe variant for virtualization's sake and check its
retval befor using it below.

> +
> +		if (((val >> CLASS_CODE_SHIFT) & CLASS_CODE_MASK)
> +			== CLASS_CODE_CORE_FREQUENCY_SENSITIVITY) {
> +			msr_addr = addr;
> +			break;
> +		}
> +	}

What is this thing doing? There's a whole range of MSRs which can give
you freq feedback? Or only the two as it is done above for actual and
reference?

Also, you can simplify the check provided that bits [63:56] denote
support is present if they're not 0:

	if (val >> CLASS_CODE_SHIFT)
		msr_addr = addr;

> +
> +	if (!msr_addr)
> +		return -ENODEV;
> +
> +	od_register_powersave_bias_function(amd_powersave_bias_target);
> +	return 0;
> +}
> +module_init(amd_freq_sensitivity_init);
> +
> +static void __exit amd_freq_sensitivity_exit(void)
> +{
> +	od_unregister_powersave_bias_function();
> +}
> +module_exit(amd_freq_sensitivity_exit);
> +
> +MODULE_AUTHOR("Jacob Shin <jacob.shin@....com>");
> +MODULE_DESCRIPTION("AMD 'frequency sensitivity feedback' powersave bias for "
> +		"the ondemand governor.");
> +MODULE_LICENSE("GPL");

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ