From eaf30416cab758fefcc65cb089ec0796a6730c7d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 5 Jul 2023 13:02:41 -0700 Subject: [PATCH 2/3] cpufreq: intel_pstate: Support highest performance change interrupt On some systems, HWP highest performance can change from the boot up value. It leads to two issues: - cpufreq sysfs cpuinfo_max_freq will not show the highest performance of the CPU. - Even if the highest performance of the CPU is increased after boot, CPU will not reach the full expected performance. The change in the highest performance can be triggered by Intel Speed Select Technology-Performance profile feature. Each performance profile can have different base and max turbo (highest) frequency. When admin switches to a new performance profile, the firmware sends HWP interrupt for any change in the guaranteed or highest performance. Admin can also switch to a new performance profile via BMC (Board management Controller) from a remote management controller. For more details about technology refer to: https://docs.kernel.org/admin-guide/pm/intel-speed-select.html There are other triggers like over-clocking or dynamic adjustment of performance limits remotely via BMC to manage power, thermal and performance. The support of this feature depends on CPUID[6].EAX[15] = 1. When supported, MSR_HWP_INTERRUPT BIT(2) enables notification of the highest performance change. As part of enabling HWP interrupt, also set Bit(2) of MSR MSR_HWP_INTERRUPT, when this feature is supported. On highest performance change a new HWP interrupt is generated with MSR_HWP_STATUS BIT(3) set and MSR_HWP_CAPABILITIES is updated with a new highest performance limit. The processing of the interrupt is the same as the guaranteed performance change. Notify change to cpufreq core and update MSR_HWP_REQUEST with new performance limits. The current driver implementation already takes care of the highest performance change as part of: commit dfeeedc1bf57 ("cpufreq: intel_pstate: Update cpuinfo.max_freq on HWP_CAP changes") For example: Before highest performance change interrupt: cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3700000 cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq 3700000 After highest performance changes interrupt: cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq 3900000 cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3900000 Signed-off-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4b986c044741..998b2e8fc2d7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1620,17 +1620,24 @@ static void intel_pstate_notify_work(struct work_struct *work) static DEFINE_SPINLOCK(hwp_notify_lock); static cpumask_t hwp_intr_enable_mask; +#define HWP_GUARANTEED_PERF_CHANGE_STATUS BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_STATUS BIT(3) + void notify_hwp_interrupt(void) { unsigned int this_cpu = smp_processor_id(); + u64 value, status_mask; unsigned long flags; - u64 value; if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) return; + status_mask = HWP_GUARANTEED_PERF_CHANGE_STATUS; + if (boot_cpu_has(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS; + rdmsrl_safe(MSR_HWP_STATUS, &value); - if (!(value & 0x01)) + if (!(value & status_mask)) return; spin_lock_irqsave(&hwp_notify_lock, flags); @@ -1668,17 +1675,25 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) cancel_delayed_work_sync(&cpudata->hwp_notify_work); } +#define HWP_GUARANTEED_PERF_CHANGE_REQ BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_REQ BIT(2) + static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) { - /* Enable HWP notification interrupt for guaranteed performance change */ + /* Enable HWP notification interrupt for performance change */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { + u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ; + spin_lock_irq(&hwp_notify_lock); INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); spin_unlock_irq(&hwp_notify_lock); + if (boot_cpu_has(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ; + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask); wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } } -- 2.40.1