lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <50546456-8b65-4e42-97b1-01e3cc48beac@arm.com>
Date: Tue, 5 Nov 2024 09:08:12 +0000
From: Lukasz Luba <lukasz.luba@....com>
To: "Rafael J. Wysocki" <rafael@...nel.org>
Cc: linux-kernel@...r.kernel.org, linux-pm@...r.kernel.org,
 dietmar.eggemann@....com
Subject: Re: [PATCH v3 1/1] PM: EM: Add min/max available performance state
 limits



On 11/4/24 22:02, Rafael J. Wysocki wrote:
> On Wed, Oct 30, 2024 at 5:40 PM Lukasz Luba <lukasz.luba@....com> wrote:
>>
>> On some devices there are HW dependencies for shared frequency and voltage
>> between devices. It will impact Energy Aware Scheduler (EAS) decision,
>> where CPUs share the voltage & frequency domain with other CPUs or devices
>> e.g.
>> - Mid CPUs + Big CPU
>> - Little CPU + L3 cache in DSU
>> - some other device + Little CPUs
>>
>> Detailed explanation of one example:
>> When the L3 cache frequency is increased, the affected Little CPUs might
>> run at higher voltage and frequency. That higher voltage causes higher CPU
>> power and thus more energy is used for running the tasks. This is
>> important for background running tasks, which try to run on energy
>> efficient CPUs.
>>
>> Therefore, add performance state limits which are applied for the device
>> (in this case CPU). This is important on SoCs with HW dependencies
>> mentioned above so that the Energy Aware Scheduler (EAS) does not use
>> performance states outside the valid min-max range for energy calculation.
>>
>> Signed-off-by: Lukasz Luba <lukasz.luba@....com>
>> ---
>>   include/linux/energy_model.h | 29 ++++++++++++++------
>>   kernel/power/energy_model.c  | 52 ++++++++++++++++++++++++++++++++++++
>>   2 files changed, 73 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
>> index 1ff52020cf757..752e0b2975820 100644
>> --- a/include/linux/energy_model.h
>> +++ b/include/linux/energy_model.h
>> @@ -55,6 +55,8 @@ struct em_perf_table {
>>    * struct em_perf_domain - Performance domain
>>    * @em_table:          Pointer to the runtime modifiable em_perf_table
>>    * @nr_perf_states:    Number of performance states
>> + * @min_perf_state:    Minimum allowed Performance State index
>> + * @max_perf_state:    Maximum allowed Performance State index
>>    * @flags:             See "em_perf_domain flags"
>>    * @cpus:              Cpumask covering the CPUs of the domain. It's here
>>    *                     for performance reasons to avoid potential cache
>> @@ -70,6 +72,8 @@ struct em_perf_table {
>>   struct em_perf_domain {
>>          struct em_perf_table __rcu *em_table;
>>          int nr_perf_states;
>> +       int min_perf_state;
>> +       int max_perf_state;
>>          unsigned long flags;
>>          unsigned long cpus[];
>>   };
>> @@ -173,13 +177,14 @@ void em_table_free(struct em_perf_table __rcu *table);
>>   int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
>>                           int nr_states);
>>   int em_dev_update_chip_binning(struct device *dev);
>> +int em_update_performance_limits(struct em_perf_domain *pd,
>> +               unsigned long freq_min_khz, unsigned long freq_max_khz);
>>
>>   /**
>>    * em_pd_get_efficient_state() - Get an efficient performance state from the EM
>>    * @table:             List of performance states, in ascending order
>> - * @nr_perf_states:    Number of performance states
>> + * @pd:                        performance domain for which this must be done
>>    * @max_util:          Max utilization to map with the EM
>> - * @pd_flags:          Performance Domain flags
>>    *
>>    * It is called from the scheduler code quite frequently and as a consequence
>>    * doesn't implement any check.
>> @@ -188,13 +193,16 @@ int em_dev_update_chip_binning(struct device *dev);
>>    * requirement.
>>    */
>>   static inline int
>> -em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
>> -                         unsigned long max_util, unsigned long pd_flags)
>> +em_pd_get_efficient_state(struct em_perf_state *table,
>> +                         struct em_perf_domain *pd, unsigned long max_util)
>>   {
>> +       unsigned long pd_flags = pd->flags;
>> +       int min_ps = pd->min_perf_state;
>> +       int max_ps = pd->max_perf_state;
>>          struct em_perf_state *ps;
>>          int i;
>>
>> -       for (i = 0; i < nr_perf_states; i++) {
>> +       for (i = min_ps; i <= max_ps; i++) {
>>                  ps = &table[i];
>>                  if (ps->performance >= max_util) {
>>                          if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
>> @@ -204,7 +212,7 @@ em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
>>                  }
>>          }
>>
>> -       return nr_perf_states - 1;
>> +       return max_ps;
>>   }
>>
>>   /**
>> @@ -253,8 +261,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
>>           * requested performance.
>>           */
>>          em_table = rcu_dereference(pd->em_table);
>> -       i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states,
>> -                                     max_util, pd->flags);
>> +       i = em_pd_get_efficient_state(em_table->state, pd, max_util);
>>          ps = &em_table->state[i];
>>
>>          /*
>> @@ -391,6 +398,12 @@ static inline int em_dev_update_chip_binning(struct device *dev)
>>   {
>>          return -EINVAL;
>>   }
>> +static inline
>> +int em_update_performance_limits(struct em_perf_domain *pd,
>> +               unsigned long freq_min_khz, unsigned long freq_max_khz)
>> +{
>> +       return -EINVAL;
>> +}
>>   #endif
>>
>>   #endif
>> diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
>> index 927cc55ba0b3d..d07faf42eace6 100644
>> --- a/kernel/power/energy_model.c
>> +++ b/kernel/power/energy_model.c
>> @@ -628,6 +628,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
>>                  goto unlock;
>>
>>          dev->em_pd->flags |= flags;
>> +       dev->em_pd->min_perf_state = 0;
>> +       dev->em_pd->max_perf_state = nr_states - 1;
>>
>>          em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state);
>>
>> @@ -856,3 +858,53 @@ int em_dev_update_chip_binning(struct device *dev)
>>          return em_recalc_and_update(dev, pd, em_table);
>>   }
>>   EXPORT_SYMBOL_GPL(em_dev_update_chip_binning);
>> +
>> +
>> +/**
>> + * em_update_performance_limits() - Update Energy Model with performance
>> + *                             limits information.
>> + * @pd                 : Performance Domain with EM that has to be updated.
>> + * @freq_min_khz       : New minimum allowed frequency for this device.
>> + * @freq_max_khz       : New maximum allowed frequency for this device.
>> + *
>> + * This function allows to update the EM with information about available
>> + * performance levels. It takes the minimum and maximum frequency in kHz
>> + * and does internal translation to performance levels.
>> + * Returns 0 on success or -EINVAL when failed.
>> + */
>> +int em_update_performance_limits(struct em_perf_domain *pd,
>> +               unsigned long freq_min_khz, unsigned long freq_max_khz)
>> +{
>> +       struct em_perf_state *table;
>> +       int min_ps = -1;
>> +       int max_ps = -1;
>> +       int i;
>> +
>> +       if (!pd)
>> +               return -EINVAL;
>> +
>> +       rcu_read_lock();
>> +       table = em_perf_state_from_pd(pd);
>> +
>> +       for (i = 0; i < pd->nr_perf_states; i++) {
>> +               if (freq_min_khz == table[i].frequency)
>> +                       min_ps = i;
>> +               if (freq_max_khz == table[i].frequency)
>> +                       max_ps = i;
>> +       }
>> +       rcu_read_unlock();
>> +
>> +       /* Only update when both are found and sane */
>> +       if (min_ps < 0 || max_ps < 0 || max_ps < min_ps)
>> +               return -EINVAL;
>> +
>> +
>> +       /* Guard simultaneous updates and make them atomic */
>> +       mutex_lock(&em_pd_mutex);
>> +       pd->min_perf_state = min_ps;
>> +       pd->max_perf_state = max_ps;
>> +       mutex_unlock(&em_pd_mutex);
>> +
>> +       return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(em_update_performance_limits);
>> --
> 
> Applied as 6.13 material, thanks!

Thank you Rafael!

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ