lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJZ5v0jrEDkfZbMzdLHzvGwa3jK61vUBBqzUUM8BaQvLLcZnhg@mail.gmail.com>
Date:   Fri, 18 Nov 2022 18:57:24 +0100
From:   "Rafael J. Wysocki" <rafael@...nel.org>
To:     Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>
Cc:     rafael@...nel.org, daniel.lezcano@...aro.org, amitk@...nel.org,
        rui.zhang@...el.com, linux-pm@...r.kernel.org,
        linux-kernel@...r.kernel.org,
        Ricardo Neri <ricardo.neri-calderon@...ux.intel.com>
Subject: Re: [PATCH RESEND 2/2] thermal: intel: Protect clearing of thermal
 status bits

On Wed, Nov 16, 2022 at 3:54 AM Srinivas Pandruvada
<srinivas.pandruvada@...ux.intel.com> wrote:
>
> The clearing of the package thermal status is done by Read-Modify-Write
> operation. This may result in clearing of some new status bits which are
> being or about to be processed.
>
> For example, while clearing of HFI status, after read of thermal status
> register, a new thermal status bit is set by the hardware. But during
> write back, the newly generated status bit will be set to 0 or cleared.
> So, it is not safe to do read-modify-write.
>
> Since thermal status Read-Write bits can be set to only 0 not 1, it is
> safe to set all other bits to 1 which are not getting cleared.
>
> Create a common interface for clearing package thermal status bits. Use
> this interface to replace existing code to clear thermal package status
> bits.
>
> It is safe to call from different CPUs without protection as there is no
> read-modify-write. Also wrmsrl results in just single instruction. For
> example while CPU 0 and CPU 3 are clearing bit 1 and 3 respectively. If
> CPU 3 wins the race, it will write 0x4000aa2, then CPU 1 will write
> 0x4000aa8. The bits which are not part of clear are set to 1. The default
> mask for bits, which can be written here is 0x4000aaa.
>
> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>
> Reviewed-by: Ricardo Neri <ricardo.neri-calderon@...ux.intel.com>

How urgent is this?  Would 6.2 be sufficient?

Also, do you want it to go into -stable?

> ---
> Email address was wrong, so sending again.
>
>  drivers/thermal/intel/intel_hfi.c            |  8 ++-----
>  drivers/thermal/intel/therm_throt.c          | 23 ++++++++++----------
>  drivers/thermal/intel/thermal_interrupt.h    |  6 +++++
>  drivers/thermal/intel/x86_pkg_temp_thermal.c |  9 ++------
>  4 files changed, 22 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index a0640f762dc5..c9e0827c9ebe 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -42,9 +42,7 @@
>
>  #include "../thermal_core.h"
>  #include "intel_hfi.h"
> -
> -#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
> -                                    BIT(9) | BIT(11) | BIT(26))
> +#include "thermal_interrupt.h"
>
>  /* Hardware Feedback Interface MSR configuration bits */
>  #define HW_FEEDBACK_PTR_VALID_BIT              BIT(0)
> @@ -304,9 +302,7 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
>          * Let hardware know that we are done reading the HFI table and it is
>          * free to update it again.
>          */
> -       pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
> -                                   ~PACKAGE_THERM_STATUS_HFI_UPDATED;
> -       wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
> +       thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
>
>         queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
>                            HFI_UPDATE_INTERVAL);
> diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
> index 9e8ab31d756e..4bb7fddaa143 100644
> --- a/drivers/thermal/intel/therm_throt.c
> +++ b/drivers/thermal/intel/therm_throt.c
> @@ -190,32 +190,33 @@ static const struct attribute_group thermal_attr_group = {
>  };
>  #endif /* CONFIG_SYSFS */
>
> -#define CORE_LEVEL     0
> -#define PACKAGE_LEVEL  1
> -
>  #define THERM_THROT_POLL_INTERVAL      HZ
>  #define THERM_STATUS_PROCHOT_LOG       BIT(1)
>
>  #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
>  #define THERM_STATUS_CLEAR_PKG_MASK  (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(26))
>
> -static void clear_therm_status_log(int level)
> +/*
> + * Clear the bits in package thermal status register for bit = 1
> + * in bitmask
> + */
> +void thermal_clear_package_intr_status(int level, u64 bit_mask)
>  {
> +       u64 msr_val;
>         int msr;
> -       u64 mask, msr_val;
>
>         if (level == CORE_LEVEL) {
>                 msr  = MSR_IA32_THERM_STATUS;
> -               mask = THERM_STATUS_CLEAR_CORE_MASK;
> +               msr_val = THERM_STATUS_CLEAR_CORE_MASK;
>         } else {
>                 msr  = MSR_IA32_PACKAGE_THERM_STATUS;
> -               mask = THERM_STATUS_CLEAR_PKG_MASK;
> +               msr_val = THERM_STATUS_CLEAR_PKG_MASK;
>         }
>
> -       rdmsrl(msr, msr_val);
> -       msr_val &= mask;
> -       wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
> +       msr_val &= ~bit_mask;
> +       wrmsrl(msr, msr_val);
>  }
> +EXPORT_SYMBOL_GPL(thermal_clear_package_intr_status);
>
>  static void get_therm_status(int level, bool *proc_hot, u8 *temp)
>  {
> @@ -295,7 +296,7 @@ static void __maybe_unused throttle_active_work(struct work_struct *work)
>         state->average = avg;
>
>  re_arm:
> -       clear_therm_status_log(state->level);
> +       thermal_clear_package_intr_status(state->level, THERM_STATUS_PROCHOT_LOG);
>         schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
>  }
>
> diff --git a/drivers/thermal/intel/thermal_interrupt.h b/drivers/thermal/intel/thermal_interrupt.h
> index 01e7bed2ffc7..01dfd4cdb5df 100644
> --- a/drivers/thermal/intel/thermal_interrupt.h
> +++ b/drivers/thermal/intel/thermal_interrupt.h
> @@ -2,6 +2,9 @@
>  #ifndef _INTEL_THERMAL_INTERRUPT_H
>  #define _INTEL_THERMAL_INTERRUPT_H
>
> +#define CORE_LEVEL     0
> +#define PACKAGE_LEVEL  1
> +
>  /* Interrupt Handler for package thermal thresholds */
>  extern int (*platform_thermal_package_notify)(__u64 msr_val);
>
> @@ -15,4 +18,7 @@ extern bool (*platform_thermal_package_rate_control)(void);
>  /* Handle HWP interrupt */
>  extern void notify_hwp_interrupt(void);
>
> +/* Common function to clear Package thermal status register */
> +extern void thermal_clear_package_intr_status(int level, u64 bit_mask);
> +
>  #endif /* _INTEL_THERMAL_INTERRUPT_H */
> diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> index a0e234fce71a..84c3a116ed04 100644
> --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
> +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> @@ -265,7 +265,6 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
>         struct thermal_zone_device *tzone = NULL;
>         int cpu = smp_processor_id();
>         struct zone_device *zonedev;
> -       u64 msr_val, wr_val;
>
>         mutex_lock(&thermal_zone_mutex);
>         raw_spin_lock_irq(&pkg_temp_lock);
> @@ -279,12 +278,8 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
>         }
>         zonedev->work_scheduled = false;
>
> -       rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
> -       wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
> -       if (wr_val != msr_val) {
> -               wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
> -               tzone = zonedev->tzone;
> -       }
> +       thermal_clear_package_intr_status(PACKAGE_LEVEL, THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
> +       tzone = zonedev->tzone;
>
>         enable_pkg_thres_interrupt();
>         raw_spin_unlock_irq(&pkg_temp_lock);
> --
> 2.31.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ