lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Wed, 19 Jun 2024 03:23:07 +0000
From: "Yuan, Perry" <Perry.Yuan@....com>
To: "Limonciello, Mario" <Mario.Limonciello@....com>,
	"rafael.j.wysocki@...el.com" <rafael.j.wysocki@...el.com>,
	"viresh.kumar@...aro.org" <viresh.kumar@...aro.org>, "Huang, Ray"
	<Ray.Huang@....com>, "Shenoy, Gautham Ranjal" <gautham.shenoy@....com>,
	"Petkov, Borislav" <Borislav.Petkov@....com>
CC: "Deucher, Alexander" <Alexander.Deucher@....com>, "Huang, Shimmer"
	<Shimmer.Huang@....com>, "Du, Xiaojian" <Xiaojian.Du@....com>, "Meng, Li
 (Jassmine)" <Li.Meng@....com>, "linux-pm@...r.kernel.org"
	<linux-pm@...r.kernel.org>, "linux-kernel@...r.kernel.org"
	<linux-kernel@...r.kernel.org>
Subject: RE: [PATCH v4 09/11] cpufreq: amd-pstate: implement heterogeneous
 core topology for highest performance initialization

[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello@....com>
> Sent: Wednesday, June 19, 2024 3:23 AM
> To: Yuan, Perry <Perry.Yuan@....com>; rafael.j.wysocki@...el.com;
> viresh.kumar@...aro.org; Huang, Ray <Ray.Huang@....com>; Shenoy,
> Gautham Ranjal <gautham.shenoy@....com>; Petkov, Borislav
> <Borislav.Petkov@....com>
> Cc: Deucher, Alexander <Alexander.Deucher@....com>; Huang, Shimmer
> <Shimmer.Huang@....com>; Du, Xiaojian <Xiaojian.Du@....com>; Meng,
> Li (Jassmine) <Li.Meng@....com>; linux-pm@...r.kernel.org; linux-
> kernel@...r.kernel.org
> Subject: Re: [PATCH v4 09/11] cpufreq: amd-pstate: implement
> heterogeneous core topology for highest performance initialization
>
> On 6/17/2024 01:59, Perry Yuan wrote:
> > Introduces an optimization to the AMD-Pstate driver by implementing a
> > heterogeneous core topology for the initialization of the highest
> > performance value while driver loading.
> > The two core types supported are "performance" and "efficiency".
> > Each core type has different highest performance and frequency values
> > configured by the platform.  The `amd_pstate` driver needs to identify
> > the type of core to correctly set an appropriate highest perf value.
> >
> > X86_FEATURE_HETERO_CORE_TOPOLOGY is used to identify whether the
> > processor support heterogeneous core type by reading CPUID leaf
> > Fn_0x80000026_EAX and bit 30. if the bit is set as one, then
> > amd_pstate driver will check EBX 30:28 bits to get the core type.
> >
> > Reference:
> > See the page 119 of PPR for AMD Family 19h Model 61h B1, docID 56713
> >
> > Signed-off-by: Perry Yuan <perry.yuan@....com>
> > ---
> >   arch/x86/include/asm/processor.h |  2 ++
> >   arch/x86/kernel/cpu/amd.c        | 19 ++++++++++++
> >   drivers/cpufreq/amd-pstate.c     | 53 ++++++++++++++++++++++++++++++-
> -
> >   3 files changed, 71 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/processor.h
> > b/arch/x86/include/asm/processor.h
> > index cb4f6c513c48..223aa58e2d5c 100644
> > --- a/arch/x86/include/asm/processor.h
> > +++ b/arch/x86/include/asm/processor.h
> > @@ -694,10 +694,12 @@ static inline u32 per_cpu_l2c_id(unsigned int
> cpu)
> >   extern u32 amd_get_highest_perf(void);
> >   extern void amd_clear_divider(void);
> >   extern void amd_check_microcode(void);
> > +extern int amd_get_this_core_type(void);
> >   #else
> >   static inline u32 amd_get_highest_perf(void)              { return 0; }
> >   static inline void amd_clear_divider(void)                { }
> >   static inline void amd_check_microcode(void)              { }
> > +static inline int amd_get_this_core_type(void)             { return -1; }
> >   #endif
> >
> >   extern unsigned long arch_align_stack(unsigned long sp); diff --git
> > a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index
> > 44df3f11e731..62a4ef21ef79 100644
> > --- a/arch/x86/kernel/cpu/amd.c
> > +++ b/arch/x86/kernel/cpu/amd.c
> > @@ -1231,3 +1231,22 @@ void noinstr amd_clear_divider(void)
> >                  :: "a" (0), "d" (0), "r" (1));
> >   }
> >   EXPORT_SYMBOL_GPL(amd_clear_divider);
> > +
> > +#define X86_CPU_TYPE_ID_SHIFT      28
> > +
> > +/**
> > + * amd_get_this_core_type - Get the type of this heterogeneous CPU
> > + *
> > + * Returns the CPU type [31:28] (i.e., performance or efficient) of
> > + * a CPU in the processor.
> > + * If the processor has no core type support, returns -1.
> > + */
> > +
> > +int amd_get_this_core_type(void)
>
>
> Did you miss my feedback from v3?  I don't see changes for the return type
> or for returning CPU_CORE_TYPE_NO_HETERO_SUP instead of -1.

This CPU_CORE_TYPE_NO_HETERO_SUP is defined in the amd_pstate.c, if we want to use it, it will need to define them in another header.
Boris also mentioned that there is another patchset working to export core types in future, we can use "-1" in short term.
Once the core type patches finalize the solution, we can rework the pstate driver.
Firstly, let`s provide a workable solution, then improve the driver with coming patches.

Perry.


>
>
> > +{
> > +   if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY))
> > +           return -1;
> > +
> > +   return cpuid_ebx(0x80000026) >> X86_CPU_TYPE_ID_SHIFT;
> > +}
> > +EXPORT_SYMBOL_GPL(amd_get_this_core_type);
> > diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> > index cb750ef305fe..cf68343219d1 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -52,8 +52,10 @@
> >   #define AMD_PSTATE_TRANSITION_LATENCY     20000
> >   #define AMD_PSTATE_TRANSITION_DELAY       1000
> >   #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
> > -#define CPPC_HIGHEST_PERF_PERFORMANCE      196
> > -#define CPPC_HIGHEST_PERF_DEFAULT  166
> > +
> > +#define CPPC_HIGHEST_PERF_EFFICIENT                132
> > +#define CPPC_HIGHEST_PERF_PERFORMANCE              196
> > +#define CPPC_HIGHEST_PERF_DEFAULT          166
> >
> >   #define AMD_CPPC_EPP_PERFORMANCE          0x00
> >   #define AMD_CPPC_EPP_BALANCE_PERFORMANCE  0x80
> > @@ -86,6 +88,14 @@ struct quirk_entry {
> >     u32 lowest_freq;
> >   };
> >
> > +/* defined by CPUID_Fn80000026_EBX BIT [31:28] */
> > +enum amd_core_type {
> > +   CPU_CORE_TYPE_NO_HETERO_SUP = -1,
> > +   CPU_CORE_TYPE_PERFORMANCE = 0,
> > +   CPU_CORE_TYPE_EFFICIENCY = 1,
> > +   CPU_CORE_TYPE_UNDEFINED = 2,
> > +};
> > +
> >   /*
> >    * TODO: We need more time to fine tune processors with shared memory
> solution
> >    * with community together.
> > @@ -358,9 +368,27 @@ static inline int amd_pstate_enable(bool enable)
> >     return static_call(amd_pstate_enable)(enable);
> >   }
> >
> > +static void get_this_core_type(void *data)
> > +{
> > +   enum amd_core_type *cpu_type = data;
> > +
> > +   *cpu_type = amd_get_this_core_type();
> > +}
> > +
> > +static enum amd_core_type  amd_pstate_get_cpu_type(int cpu)
> > +{
> > +   enum amd_core_type cpu_type;
> > +
> > +   smp_call_function_single(cpu, get_this_core_type, &cpu_type, 1);
> > +
> > +   return cpu_type;
> > +}
> > +
> >   static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
> >   {
> >     struct cpuinfo_x86 *c = &cpu_data(0);
> > +   u32 highest_perf;
> > +   enum amd_core_type core_type;
> >
> >     /*
> >      * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
> > @@ -370,7 +398,26 @@ static u32 amd_pstate_highest_perf_set(struct
> amd_cpudata *cpudata)
> >     if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <=
> 0x7f))
> >             return CPPC_HIGHEST_PERF_PERFORMANCE;
> >
> > -   return CPPC_HIGHEST_PERF_DEFAULT;
> > +   core_type = amd_pstate_get_cpu_type(cpudata->cpu);
> > +   pr_debug("core_type %d found\n", core_type);
> > +
> > +   switch (core_type) {
> > +   case CPU_CORE_TYPE_NO_HETERO_SUP:
> > +           highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> > +           break;
> > +   case CPU_CORE_TYPE_PERFORMANCE:
> > +           highest_perf = CPPC_HIGHEST_PERF_PERFORMANCE;
> > +           break;
> > +   case CPU_CORE_TYPE_EFFICIENCY:
> > +           highest_perf = CPPC_HIGHEST_PERF_EFFICIENT;
> > +           break;
> > +   default:
> > +           highest_perf = CPPC_HIGHEST_PERF_DEFAULT;
> > +           WARN_ONCE(true, "WARNING: Undefined core type found");
> > +           break;
> > +   }
> > +
> > +    return highest_perf;
> >   }
> >
> >   static int pstate_init_perf(struct amd_cpudata *cpudata)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ