linux-kernel - Re: [PATCH 4/7] thermal: intel: hfi: Handle CPU hotplug events

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJZ5v0gemmV1Lz3+9iKz1eiXtkyDc3+4+po4Eidchzk+J2=ceA@mail.gmail.com>
Date:   Wed, 24 Nov 2021 15:48:49 +0100
From:   "Rafael J. Wysocki" <rafael@...nel.org>
To:     Ricardo Neri <ricardo.neri-calderon@...ux.intel.com>
Cc:     "Rafael J. Wysocki" <rafael.j.wysocki@...el.com>,
        Daniel Lezcano <daniel.lezcano@...aro.org>,
        Linux PM <linux-pm@...r.kernel.org>,
        "the arch/x86 maintainers" <x86@...nel.org>,
        "open list:DOCUMENTATION" <linux-doc@...r.kernel.org>,
        Len Brown <len.brown@...el.com>,
        Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>,
        Aubrey Li <aubrey.li@...ux.intel.com>,
        Amit Kucheria <amitk@...nel.org>,
        Andi Kleen <ak@...ux.intel.com>,
        Tim Chen <tim.c.chen@...ux.intel.com>,
        "Ravi V. Shankar" <ravi.v.shankar@...el.com>,
        Ricardo Neri <ricardo.neri@...el.com>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 4/7] thermal: intel: hfi: Handle CPU hotplug events

On Sat, Nov 6, 2021 at 2:34 AM Ricardo Neri
<ricardo.neri-calderon@...ux.intel.com> wrote:
>
> All CPUs in a package are represented in an HFI table. There exists an
> HFI table per package. Thus, CPUs in a package need to coordinate to
> initialize and access the table. Do such coordination during CPU hotplug.
> Use the first CPU to come online in a package to initialize the HFI table
> and the data structure representing it. Other CPUs in the same package need
> only to register or unregister themselves in that data structure.
>
> The HFI depends on both the package-level thermal management and the local
> APIC thermal local vector. Thus, ensure that both are properly configured
> before calling intel_hfi_online(). The CPU hotplug callbacks of the thermal
> throttle events code already meet these conditions. Enable the HFI from
> thermal_throttle_online().
>
> Cc: Andi Kleen <ak@...ux.intel.com>
> Cc: Aubrey Li <aubrey.li@...ux.intel.com>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>
> Cc: Tim Chen <tim.c.chen@...ux.intel.com>
> Cc: "Ravi V. Shankar" <ravi.v.shankar@...el.com>
> Reviewed-by: Len Brown <len.brown@...el.com>
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@...ux.intel.com>
> ---
>  drivers/thermal/intel/intel_hfi.c   | 211 ++++++++++++++++++++++++++++
>  drivers/thermal/intel/intel_hfi.h   |   4 +
>  drivers/thermal/intel/therm_throt.c |   8 ++
>  3 files changed, 223 insertions(+)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index edfe343507b3..6a3adfd57d72 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -21,6 +21,7 @@
>
>  #define pr_fmt(fmt)  "intel-hfi: " fmt
>
> +#include <linux/io.h>
>  #include <linux/slab.h>
>
>  #include "intel_hfi.h"
> @@ -52,16 +53,26 @@ struct hfi_hdr {
>
>  /**
>   * struct hfi_instance - Representation of an HFI instance (i.e., a table)
> + * @table_base:                Base of the local copy of the HFI table
>   * @ts_counter:                Time stamp of the last update of the table
>   * @hdr:               Base address of the table header
>   * @data:              Base address of the table data
> + * @die_id:            Logical die ID this HFI table instance
> + * @cpus:              CPUs represented in this HFI table instance
> + * @hw_table:          Pointer to the HFI table of this instance
> + * @initialized:       True if this HFI instance has bee initialized
>   *
>   * A set of parameters to parse and navigate a specific HFI table.
>   */
>  struct hfi_instance {
> +       void                    *table_base;
>         u64                     *ts_counter;
>         void                    *hdr;
>         void                    *data;
> +       u16                     die_id;
> +       struct cpumask          *cpus;
> +       void                    *hw_table;
> +       bool                    initialized;
>  };
>
>  /**
> @@ -83,10 +94,210 @@ struct hfi_features {
>         bool            parsed;
>  };
>
> +/**
> + * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
> + * @index:             Row of this CPU in its HFI table
> + * @hfi_instance:      Attributes of the HFI table to which this CPU belongs
> + *
> + * Parameters to link a logical processor to an HFI table and a row within it.
> + */
> +struct hfi_cpu_info {
> +       s16                     index;
> +       struct hfi_instance     *hfi_instance;
> +};
> +
> +static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
> +
>  static int max_hfi_instances;
>  static struct hfi_instance *hfi_instances;
>
>  static struct hfi_features hfi_features;
> +static DEFINE_MUTEX(hfi_lock);
> +
> +static void init_hfi_cpu_index(unsigned int cpu)

I would make this function take a (struct hfi_cpu_info *) argument
instead of the CPU number.  It would be more concise then.

> +{
> +       s16 hfi_idx;
> +       u32 edx;
> +
> +       /* Do not re-read @cpu's index if it has already been initialized. */
> +       if (per_cpu(hfi_cpu_info, cpu).index > -1)
> +               return;
> +
> +       edx = cpuid_edx(CPUID_HFI_LEAF);
> +       hfi_idx = (edx & CPUID_HFI_CPU_INDEX_MASK) >> CPUID_HFI_CPU_INDEX_SHIFT;
> +
> +       per_cpu(hfi_cpu_info, cpu).index = hfi_idx;
> +}
> +
> +/*
> + * The format of the HFI table depends on the number of capabilities that the
> + * hardware supports. Keep a data structure to navigate the table.
> + */
> +static void init_hfi_instance(struct hfi_instance *hfi_instance)
> +{
> +       /* The HFI time-stamp is located at the base of the table. */
> +       hfi_instance->ts_counter = hfi_instance->table_base;
> +
> +       /* The HFI header is below the time-stamp. */
> +       hfi_instance->hdr = hfi_instance->table_base +
> +                           sizeof(*hfi_instance->ts_counter);
> +
> +       /* The HFI data starts below the header. */
> +       hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
> +}
> +
> +/**
> + * intel_hfi_online() - Enable HFI on @cpu
> + * @cpu:       CPU in which the HFI will be enabled
> + *
> + * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
> + * level. The first CPU in the die/package to come online does the full HFI
> + * initialization. Subsequent CPUs will just link themselves to the HFI
> + * instance of their die/package.
> + */
> +void intel_hfi_online(unsigned int cpu)
> +{
> +       struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
> +       u16 die_id = topology_logical_die_id(cpu);
> +       struct hfi_instance *hfi_instance;
> +       phys_addr_t hw_table_pa;
> +       u64 msr_val;
> +
> +       if (!boot_cpu_has(X86_FEATURE_INTEL_HFI))
> +               return;

IMO it is not useful to do anything below in this function if
hfi_instances is NULL, so I would check it along with the above.

> +
> +       init_hfi_cpu_index(cpu);
> +
> +       /*
> +        * The HFI instance of this @cpu may exist already but they have not
> +        * been linked to @cpu.
> +        */
> +       hfi_instance = info->hfi_instance;
> +       if (!hfi_instance) {
> +               if (!hfi_instances)
> +                       return;
> +
> +               if (die_id >= 0 && die_id < max_hfi_instances)
> +                       hfi_instance = &hfi_instances[die_id];
> +
> +               if (!hfi_instance)
> +                       return;

And here I would do

if (die_id < 0 || die_id >= max_hfi_instances)
        return;

hfi_instance = &hfi_instances[die_id];

which is one branch less and fewer LOC.

> +       }
> +
> +       /*
> +        * Now check if the HFI instance of the package/die of this CPU has
> +        * been initialized. In such case, all we have to do is link @cpu's info
> +        * to the HFI instance of its die/package.
> +        */
> +       mutex_lock(&hfi_lock);
> +       if (hfi_instance->initialized) {
> +               info->hfi_instance = hfi_instance;
> +
> +               /*
> +                * @cpu is the first one in its die/package to come back online.
> +                * Use it to track the CPUs in the die/package.
> +                */
> +               if (!hfi_instance->cpus)
> +                       hfi_instance->cpus = topology_core_cpumask(cpu);
> +
> +               mutex_unlock(&hfi_lock);
> +               return;
> +       }
> +
> +       /*
> +        * Hardware is programmed with the physical address of the first page
> +        * frame of the table. Hence, the allocated memory must be page-aligned.
> +        */
> +       hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
> +                                                  GFP_KERNEL | __GFP_ZERO);
> +       if (!hfi_instance->hw_table)
> +               goto err_out;
> +
> +       hw_table_pa = virt_to_phys(hfi_instance->hw_table);
> +
> +       hfi_instance->table_base = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
> +                                          GFP_KERNEL);
> +       if (!hfi_instance->table_base)
> +               goto free_hw_table;
> +
> +       /*
> +        * Program the address of the feedback table of this die/package. On
> +        * some processors, hardware remembers the old address of the HFI table
> +        * even after having been reprogrammed and re-enabled. Thus, do not free
> +        * pages allocated for the table or reprogram the hardware with a new
> +        * base address. Namely, program the hardware only once.
> +        */
> +       msr_val = hw_table_pa | HFI_PTR_VALID_BIT;
> +       wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
> +
> +       init_hfi_instance(hfi_instance);
> +
> +       hfi_instance->die_id = die_id;
> +
> +       /*
> +        * We can use the core cpumask of any cpu in the die/package. Any of
> +        * them will reflect all the CPUs the same package that are online.
> +        */
> +       hfi_instance->cpus = topology_core_cpumask(cpu);
> +       info->hfi_instance = hfi_instance;
> +       hfi_instance->initialized = true;
> +
> +       mutex_unlock(&hfi_lock);
> +
> +       return;
> +
> +free_hw_table:
> +       free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
> +err_out:
> +       mutex_unlock(&hfi_lock);
> +}
> +
> +/**
> + * intel_hfi_offline() - Disable HFI on @cpu
> + * @cpu:       CPU in which the HFI will be disabled
> + *
> + * Remove @cpu from those covered by its HFI instance.
> + *
> + * On some processors, hardware remembers previous programming settings even
> + * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
> + * die/package of @cpu are offline. See note in intel_hfi_online().
> + */
> +void intel_hfi_offline(unsigned int cpu)
> +{
> +       struct cpumask *die_cpumask = topology_core_cpumask(cpu);
> +       struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
> +       struct hfi_instance *hfi_instance;
> +
> +       if (!boot_cpu_has(X86_FEATURE_INTEL_HFI))
> +               return;
> +
> +       hfi_instance = info->hfi_instance;
> +       if (!hfi_instance)
> +               return;
> +
> +       if (!hfi_instance->initialized)
> +               return;
> +
> +       mutex_lock(&hfi_lock);
> +
> +       /*
> +        * We were using the core cpumask of @cpu to track CPUs in the same
> +        * die/package. Now it is going offline and we need to find another
> +        * CPU we can use.
> +        */
> +       if (die_cpumask == hfi_instance->cpus) {
> +               int new_cpu;
> +
> +               new_cpu = cpumask_any_but(hfi_instance->cpus, cpu);
> +               if (new_cpu >= nr_cpu_ids)
> +                       /* All other CPUs in the package are offline. */
> +                       hfi_instance->cpus = NULL;
> +               else
> +                       hfi_instance->cpus = topology_core_cpumask(new_cpu);

Hmmm.  Is topology_core_cpumask() updated when CPUs go offline and online?

> +       }
> +
> +       mutex_unlock(&hfi_lock);
> +}
>
>  static __init int hfi_parse_features(void)
>  {
> diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
> index 42529d3ac92d..d87c3823bb76 100644
> --- a/drivers/thermal/intel/intel_hfi.h
> +++ b/drivers/thermal/intel/intel_hfi.h
> @@ -27,8 +27,12 @@
>
>  #if defined(CONFIG_INTEL_HFI)
>  void __init intel_hfi_init(void);
> +void intel_hfi_online(unsigned int cpu);
> +void intel_hfi_offline(unsigned int cpu);
>  #else
>  static inline void intel_hfi_init(void) { }
> +static inline void intel_hfi_online(unsigned int cpu) { }
> +static inline void intel_hfi_offline(unsigned int cpu) { }
>  #endif
>
>  #endif /* _INTEL_HFI_H */
> diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
> index ac408714d52b..2a79598a7f7a 100644
> --- a/drivers/thermal/intel/therm_throt.c
> +++ b/drivers/thermal/intel/therm_throt.c
> @@ -480,6 +480,12 @@ static int thermal_throttle_online(unsigned int cpu)
>         l = apic_read(APIC_LVTTHMR);
>         apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
>
> +       /*
> +        * Enable the package-level HFI interrupt. By now the local APIC is
> +        * ready to get thermal interrupts.
> +        */
> +       intel_hfi_online(cpu);
> +
>         return thermal_throttle_add_dev(dev, cpu);
>  }
>
> @@ -489,6 +495,8 @@ static int thermal_throttle_offline(unsigned int cpu)
>         struct device *dev = get_cpu_device(cpu);
>         u32 l;
>
> +       intel_hfi_offline(cpu);
> +
>         /* Mask the thermal vector before draining evtl. pending work */
>         l = apic_read(APIC_LVTTHMR);
>         apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
> --
> 2.17.1
>