linux-kernel - Re: [Patch v4 5/6] thermal/cpu-cooling: Update thermal pressure in case of a maximum frequency capping

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtCpZq61gQVpATtTdg5hDA+tP4bF6xPMsvHYUMoY+H-6FQ@mail.gmail.com>
Date:   Thu, 31 Oct 2019 17:38:25 +0100
From:   Vincent Guittot <vincent.guittot@...aro.org>
To:     Dietmar Eggemann <dietmar.eggemann@....com>
Cc:     Thara Gopinath <thara.gopinath@...aro.org>,
        Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Ionela Voinescu <ionela.voinescu@....com>,
        Zhang Rui <rui.zhang@...el.com>,
        Eduardo Valentin <edubezval@...il.com>,
        Quentin Perret <qperret@...gle.com>,
        linux-kernel <linux-kernel@...r.kernel.org>,
        Amit Kachhap <amit.kachhap@...il.com>,
        Javi Merino <javi.merino@...nel.org>,
        Daniel Lezcano <daniel.lezcano@...aro.org>
Subject: Re: [Patch v4 5/6] thermal/cpu-cooling: Update thermal pressure in
 case of a maximum frequency capping

On Thu, 31 Oct 2019 at 17:29, Dietmar Eggemann <dietmar.eggemann@....com> wrote:
>
> On 22.10.19 22:34, Thara Gopinath wrote:
> > Thermal governors can request for a cpu's maximum supported frequency
> > to be capped in case of an overheat event. This in turn means that the
> > maximum capacity available for tasks to run on the particular cpu is
> > reduced. Delta between the original maximum capacity and capped
> > maximum capacity is known as thermal pressure. Enable cpufreq cooling
> > device to update the thermal pressure in event of a capped
> > maximum frequency.
> >
> > Signed-off-by: Thara Gopinath <thara.gopinath@...aro.org>
> > ---
> >  drivers/thermal/cpu_cooling.c | 31 +++++++++++++++++++++++++++++--
> >  1 file changed, 29 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> > index 391f397..2e6a979 100644
> > --- a/drivers/thermal/cpu_cooling.c
> > +++ b/drivers/thermal/cpu_cooling.c
> > @@ -218,6 +218,23 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
> >  }
> >
> >  /**
> > + * update_sched_max_capacity - update scheduler about change in cpu
> > + *                                   max frequency.
> > + * @policy - cpufreq policy whose max frequency is capped.
> > + */
> > +static void update_sched_max_capacity(struct cpumask *cpus,
> > +                                   unsigned int cur_max_freq,
> > +                                   unsigned int max_freq)
> > +{
> > +     int cpu;
> > +     unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
> > +                               max_freq;
> > +
> > +     for_each_cpu(cpu, cpus)
> > +             update_thermal_pressure(cpu, capacity);
> > +}
> > +
> > +/**
> >   * get_load() - get load for a cpu since last updated
> >   * @cpufreq_cdev:    &struct cpufreq_cooling_device for this cpu
> >   * @cpu:     cpu number
> > @@ -320,6 +337,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
> >                                unsigned long state)
> >  {
> >       struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
> > +     int ret;
> >
> >       /* Request state should be less than max_level */
> >       if (WARN_ON(state > cpufreq_cdev->max_level))
> > @@ -331,8 +349,17 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
> >
> >       cpufreq_cdev->cpufreq_state = state;
> >
> > -     return dev_pm_qos_update_request(&cpufreq_cdev->qos_req,
> > -                             cpufreq_cdev->freq_table[state].frequency);
> > +     ret = dev_pm_qos_update_request
> > +                             (&cpufreq_cdev->qos_req,
> > +                              cpufreq_cdev->freq_table[state].frequency);
> > +
> > +     if (ret > 0)
> > +             update_sched_max_capacity
> > +                             (cpufreq_cdev->policy->cpus,
> > +                              cpufreq_cdev->freq_table[state].frequency,
> > +                              cpufreq_cdev->policy->cpuinfo.max_freq);
> > +
> > +     return ret;
> >  }
> >
> >  /**
> >
>
> Why not getting rid of update_sched_max_capacity() entirely and call
> update_thermal_pressure() in cpu_cooling.c directly? Saves one level in
> the call chain and would mean less code for this feature.

But you add complexity in update_thermal_pressure which now has to
deal with a cpumask and to compute some frequency ratio
IMHO, it's cleaner to keep update_thermal_pressure simple as it is now

>
> Just compile tested on arm64:
>
> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> index 3211b4d3a899..bf36995013b0 100644
> --- a/drivers/thermal/cpu_cooling.c
> +++ b/drivers/thermal/cpu_cooling.c
> @@ -217,23 +217,6 @@ static u32 cpu_power_to_freq(struct
> cpufreq_cooling_device *cpufreq_cdev,
>         return freq_table[i - 1].frequency;
>  }
>
> -/**
> - * update_sched_max_capacity - update scheduler about change in cpu
> - *                                     max frequency.
> - * @policy - cpufreq policy whose max frequency is capped.
> - */
> -static void update_sched_max_capacity(struct cpumask *cpus,
> -                                     unsigned int cur_max_freq,
> -                                     unsigned int max_freq)
> -{
> -       int cpu;
> -       unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
> -                                 max_freq;
> -
> -       for_each_cpu(cpu, cpus)
> -               update_thermal_pressure(cpu, capacity);
> -}
> -
>  /**
>   * get_load() - get load for a cpu since last updated
>   * @cpufreq_cdev:      &struct cpufreq_cooling_device for this cpu
> @@ -353,7 +336,7 @@ static int cpufreq_set_cur_state(struct
> thermal_cooling_device *cdev,
>                                 cpufreq_cdev->freq_table[state].frequency);
>
>         if (ret > 0)
> -               update_sched_max_capacity
> +               update_thermal_pressure
>                                 (cpufreq_cdev->policy->cpus,
>                                  cpufreq_cdev->freq_table[state].frequency,
>                                  cpufreq_cdev->policy->cpuinfo.max_freq);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 55dfe9634f67..5707813c7621 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1985,9 +1985,9 @@ static inline void rseq_syscall(struct pt_regs *regs)
>  #endif
>
>  #ifdef CONFIG_SMP
> -void update_thermal_pressure(int cpu, u64 capacity);
> +void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
> unsigned int max);
>  #else
> -static inline void update_thermal_pressure(int cpu, u64 capacity)
> +static inline void update_thermal_pressure(struct cpumask *cpus,
> unsigned int cur, unsigned int max);
>  {
>  }
>  #endif
> diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
> index 0da31e12a5ff..691bdd79597a 100644
> --- a/kernel/sched/thermal.c
> +++ b/kernel/sched/thermal.c
> @@ -43,17 +43,16 @@ static DEFINE_PER_CPU(unsigned long, delta_capacity);
>   * the arch_scale_cpu_capacity and capped capacity is stored in per cpu
>   * delta_capacity.
>   */
> -void update_thermal_pressure(int cpu, u64 capped_freq_ratio)
> +void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
> unsigned int max)
>  {
> -       unsigned long __capacity, delta;
> +       int cpu;
>
> -       /* Normalize the capped freq ratio */
> -       __capacity = (capped_freq_ratio * arch_scale_cpu_capacity(cpu)) >>
> -
> SCHED_CAPACITY_SHIFT;
> -       delta = arch_scale_cpu_capacity(cpu) -  __capacity;
> -       pr_debug("updating cpu%d thermal pressure to %lu\n", cpu, delta);
> +       for_each_cpu(cpu, cpus) {
> +               unsigned long scale_cap = arch_scale_cpu_capacity(cpu);
> +               unsigned long cur_cap = cur * scale_cap / max;
>
> -       per_cpu(delta_capacity, cpu) = delta;
> +               per_cpu(delta_capacity, cpu) = scale_cap - cur_cap;
> +       }
>  }