lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 3 Aug 2023 09:50:09 -0700
From:   David Dai <davidai@...gle.com>
To:     Viresh Kumar <viresh.kumar@...aro.org>
Cc:     "Rafael J. Wysocki" <rafael@...nel.org>,
        Rob Herring <robh+dt@...nel.org>,
        Krzysztof Kozlowski <krzysztof.kozlowski+dt@...aro.org>,
        Conor Dooley <conor+dt@...nel.org>,
        Sudeep Holla <sudeep.holla@....com>,
        Saravana Kannan <saravanak@...gle.com>,
        Quentin Perret <qperret@...gle.com>,
        Masami Hiramatsu <mhiramat@...gle.com>,
        Will Deacon <will@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Marc Zyngier <maz@...nel.org>,
        Oliver Upton <oliver.upton@...ux.dev>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Pavan Kondeti <quic_pkondeti@...cinc.com>,
        Gupta Pankaj <pankaj.gupta@....com>,
        Mel Gorman <mgorman@...e.de>, kernel-team@...roid.com,
        linux-pm@...r.kernel.org, devicetree@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 2/2] cpufreq: add virtual-cpufreq driver

Hi Viresh,

Thanks for reviewing!

On Tue, Aug 1, 2023 at 2:36 AM Viresh Kumar <viresh.kumar@...aro.org> wrote:
>
> On 31-07-23, 10:46, David Dai wrote:
> > diff --git a/drivers/cpufreq/virtual-cpufreq.c b/drivers/cpufreq/virtual-cpufreq.c
> > new file mode 100644
> > index 000000000000..66b0fd9b821c
> > --- /dev/null
> > +++ b/drivers/cpufreq/virtual-cpufreq.c
> > @@ -0,0 +1,237 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2023 Google LLC
> > + */
> > +
> > +#include <linux/arch_topology.h>
> > +#include <linux/cpufreq.h>
> > +#include <linux/init.h>
> > +#include <linux/sched.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/of_address.h>
> > +#include <linux/of_platform.h>
> > +#include <linux/pm_opp.h>
> > +#include <linux/slab.h>
> > +
> > +#define REG_CUR_FREQ_OFFSET 0x0
> > +#define REG_SET_FREQ_OFFSET 0x4
> > +#define PER_CPU_OFFSET 0x8
> > +
> > +struct virt_cpufreq_ops {
> > +     void (*set_freq)(struct cpufreq_policy *policy, u32 freq);
> > +     u32 (*get_freq)(struct cpufreq_policy *policy);
> > +};
>
> Since you only have one implementation currently, this isn't really
> required. Keep the data as NULL in `virt_cpufreq_match` and use
> writel/readl directly.

Okay, I’ll remove the ops for now and bring it back in the future if required.

>
> This can be updated if we need more implementations later.
>
> > +struct virt_cpufreq_drv_data {
> > +     void __iomem *base;
> > +     const struct virt_cpufreq_ops *ops;
> > +};
> > +
> > +static void virt_cpufreq_set_freq(struct cpufreq_policy *policy, u32 freq)
> > +{
> > +     struct virt_cpufreq_drv_data *data = policy->driver_data;
> > +
> > +     writel_relaxed(freq, data->base + policy->cpu * PER_CPU_OFFSET
> > +                     + REG_SET_FREQ_OFFSET);
> > +}
> > +
> > +static u32 virt_cpufreq_get_freq(struct cpufreq_policy *policy)
> > +{
> > +     struct virt_cpufreq_drv_data *data = policy->driver_data;
> > +
> > +     return readl_relaxed(data->base + policy->cpu * PER_CPU_OFFSET
> > +                     + REG_CUR_FREQ_OFFSET);
>
> This doesn't look properly aligned. Please run checkpatch (--strict
> (optional)).

Ok.

>
> > +}
> > +
> > +static const struct virt_cpufreq_ops virt_freq_ops = {
> > +     .set_freq = virt_cpufreq_set_freq,
> > +     .get_freq = virt_cpufreq_get_freq,
> > +};
> > +
> > +static void virt_scale_freq_tick(void)
> > +{
> > +     struct cpufreq_policy *policy = cpufreq_cpu_get(smp_processor_id());
> > +     struct virt_cpufreq_drv_data *data = policy->driver_data;
> > +     u32 max_freq = (u32)policy->cpuinfo.max_freq;
> > +     u64 cur_freq;
> > +     u64 scale;
> > +
> > +     cpufreq_cpu_put(policy);
> > +
> > +     cur_freq = (u64)data->ops->get_freq(policy);
> > +     cur_freq <<= SCHED_CAPACITY_SHIFT;
> > +     scale = div_u64(cur_freq, max_freq);
> > +
> > +     this_cpu_write(arch_freq_scale, (unsigned long)scale);
> > +}
> > +
> > +static struct scale_freq_data virt_sfd = {
> > +     .source = SCALE_FREQ_SOURCE_VIRT,
> > +     .set_freq_scale = virt_scale_freq_tick,
> > +};
> > +
> > +static unsigned int virt_cpufreq_set_perf(struct cpufreq_policy *policy)
> > +{
> > +     struct virt_cpufreq_drv_data *data = policy->driver_data;
> > +     /*
> > +      * Use cached frequency to avoid rounding to freq table entries
> > +      * and undo 25% frequency boost applied by schedutil.
> > +      */
> > +     u32 freq = mult_frac(policy->cached_target_freq, 80, 100);
> > +
> > +     data->ops->set_freq(policy, freq);
> > +     return 0;
> > +}
> > +
> > +static unsigned int virt_cpufreq_fast_switch(struct cpufreq_policy *policy,
> > +             unsigned int target_freq)
> > +{
> > +     virt_cpufreq_set_perf(policy);
> > +     return target_freq;
> > +}
> > +
> > +static int virt_cpufreq_target_index(struct cpufreq_policy *policy,
> > +             unsigned int index)
> > +{
> > +     return virt_cpufreq_set_perf(policy);
> > +}
> > +
> > +static int virt_cpufreq_cpu_init(struct cpufreq_policy *policy)
> > +{
> > +     struct virt_cpufreq_drv_data *drv_data = cpufreq_get_driver_data();
> > +     struct cpufreq_frequency_table *table;
> > +     struct device *cpu_dev;
> > +     int ret;
> > +
> > +     cpu_dev = get_cpu_device(policy->cpu);
> > +     if (!cpu_dev)
> > +             return -ENODEV;
> > +
> > +     ret = dev_pm_opp_of_add_table(cpu_dev);
> > +     if (ret)
> > +             return ret;
> > +
> > +     ret = dev_pm_opp_get_opp_count(cpu_dev);
> > +     if (ret <= 0) {
> > +             dev_err(cpu_dev, "OPP table can't be empty\n");
> > +             return -ENODEV;
> > +     }
> > +
> > +     ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &table);
> > +     if (ret) {
> > +             dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
> > +             return ret;
> > +     }
> > +
> > +     policy->freq_table = table;
> > +     policy->dvfs_possible_from_any_cpu = false;
>
> Why can't we call virt_cpufreq_target_index() from any CPU ?
>
> > +     policy->fast_switch_possible = true;
> > +     policy->driver_data = drv_data;
> > +
> > +     /*
> > +      * Only takes effect if another FIE source such as AMUs
> > +      * have not been registered.
> > +      */
> > +     topology_set_scale_freq_source(&virt_sfd, policy->cpus);
> > +
> > +     return 0;
> > +
> > +}
> > +
> > +static int virt_cpufreq_cpu_exit(struct cpufreq_policy *policy)
> > +{
> > +     topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_VIRT, policy->related_cpus);
> > +     kfree(policy->freq_table);
> > +     policy->freq_table = NULL;
> > +     return 0;
> > +}
> > +
> > +static int virt_cpufreq_online(struct cpufreq_policy *policy)
> > +{
> > +     /* Nothing to restore. */
> > +     return 0;
> > +}
> > +
> > +static int virt_cpufreq_offline(struct cpufreq_policy *policy)
> > +{
> > +     /* Dummy offline() to avoid exit() being called and freeing resources. */
> > +     return 0;
> > +}
> > +
> > +static struct cpufreq_driver cpufreq_virt_driver = {
> > +     .name           = "virt-cpufreq",
> > +     .init           = virt_cpufreq_cpu_init,
> > +     .exit           = virt_cpufreq_cpu_exit,
> > +     .online         = virt_cpufreq_online,
> > +     .offline        = virt_cpufreq_offline,
> > +     .verify         = cpufreq_generic_frequency_table_verify,
> > +     .target_index   = virt_cpufreq_target_index,
> > +     .fast_switch    = virt_cpufreq_fast_switch,
> > +     .attr           = cpufreq_generic_attr,
> > +};
> > +
> > +static int virt_cpufreq_driver_probe(struct platform_device *pdev)
> > +{
> > +     int ret;
> > +     struct virt_cpufreq_drv_data *drv_data;
> > +
> > +     drv_data = devm_kzalloc(&pdev->dev, sizeof(*drv_data), GFP_KERNEL);
> > +     if (!drv_data)
> > +             return -ENOMEM;
> > +
> > +     drv_data->ops = of_device_get_match_data(&pdev->dev);
> > +     if (!drv_data->ops)
> > +             return -EINVAL;
> > +
> > +     drv_data->base = devm_platform_ioremap_resource(pdev, 0);
> > +     if (IS_ERR(drv_data->base))
> > +             return PTR_ERR(drv_data->base);
> > +
> > +     cpufreq_virt_driver.driver_data = drv_data;
> > +
> > +     ret = cpufreq_register_driver(&cpufreq_virt_driver);
> > +     if (ret) {
> > +             dev_err(&pdev->dev, "Virtual CPUFreq driver failed to register: %d\n", ret);
> > +             return ret;
> > +     }
> > +
> > +     dev_dbg(&pdev->dev, "Virtual CPUFreq driver initialized\n");
> > +     return 0;
> > +}
> > +
> > +static int virt_cpufreq_driver_remove(struct platform_device *pdev)
> > +{
> > +     cpufreq_unregister_driver(&cpufreq_virt_driver);
> > +     return 0;
> > +}
> > +
> > +static const struct of_device_id virt_cpufreq_match[] = {
> > +     { .compatible = "virtual,cpufreq", .data = &virt_freq_ops},
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(of, virt_cpufreq_match);
> > +
> > +static struct platform_driver virt_cpufreq_driver = {
> > +     .probe = virt_cpufreq_driver_probe,
> > +     .remove = virt_cpufreq_driver_remove,
> > +     .driver = {
> > +             .name = "virt-cpufreq",
> > +             .of_match_table = virt_cpufreq_match,
> > +     },
> > +};
> > +
> > +static int __init virt_cpufreq_init(void)
> > +{
> > +     return platform_driver_register(&virt_cpufreq_driver);
> > +}
> > +postcore_initcall(virt_cpufreq_init);
>
> Why do you want to use this and not module_init() ? Then you can
> simply use `module_platform_driver()`.

We found that using postcore_init over module_init results in a
small(2-3%) but measurable benefit during boot time for VMs, so this
is an optimization that I’d prefer to keep.

Thanks,
David

>
> > +
> > +static void __exit virt_cpufreq_exit(void)
> > +{
> > +     platform_driver_unregister(&virt_cpufreq_driver);
> > +}
> > +module_exit(virt_cpufreq_exit);
> > +
> > +MODULE_DESCRIPTION("Virtual cpufreq driver");
> > +MODULE_LICENSE("GPL");
>
> --
> viresh

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ