[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <05359260-336f-4047-bc3a-003ace5ad7c4@arm.com>
Date: Mon, 6 Oct 2025 09:17:43 +0100
From: Lukasz Luba <lukasz.luba@....com>
To: Changwoo Min <changwoo@...lia.com>
Cc: christian.loehle@....com, tj@...nel.org, pavel@...nel.org,
len.brown@...el.com, rafael@...nel.org, kernel-dev@...lia.com,
linux-pm@...r.kernel.org, sched-ext@...ts.linux.dev,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH RESEND v4 01/10] PM: EM: Assign a unique ID when creating
a performance domain
Hi Chanwoo,
My apologies to delay on this topic.
On 9/21/25 04:19, Changwoo Min wrote:
> It is necessary to refer to a specific performance domain from a
> userspace. For example, the energy model of a particular performance
> domain is updated.
>
> To this end, assign a unique ID to each performance domain to address it,
Is this related to the sched_ext view on the EM that we cannot re-use
the allocated ID for the given domain?
> and manage them in a global linked list to look up a specific one by
> matching ID. IDA is used for ID assignment, and the mutex is used to
> protect the global list from concurrent access.
>
> Note that the mutex (em_pd_list_mutex) is not supposed to hold while
> holding em_pd_mutex to avoid ABBA deadlock.
This might be tricky design, but I have seen in some other
patches you've added the lockdep, so we might have some safety net.
>
> Signed-off-by: Changwoo Min <changwoo@...lia.com>
> ---
> include/linux/energy_model.h | 4 ++++
> kernel/power/energy_model.c | 33 ++++++++++++++++++++++++++++++++-
> 2 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
> index 61d50571ad88..43aa6153dc57 100644
> --- a/include/linux/energy_model.h
> +++ b/include/linux/energy_model.h
> @@ -54,6 +54,8 @@ struct em_perf_table {
> /**
> * struct em_perf_domain - Performance domain
> * @em_table: Pointer to the runtime modifiable em_perf_table
> + * @node: node in em_pd_list (in energy_model.c)
> + * @id: A unique ID number for each performance domain
> * @nr_perf_states: Number of performance states
> * @min_perf_state: Minimum allowed Performance State index
> * @max_perf_state: Maximum allowed Performance State index
> @@ -71,6 +73,8 @@ struct em_perf_table {
> */
> struct em_perf_domain {
> struct em_perf_table __rcu *em_table;
> + struct list_head node;
> + int id;
> int nr_perf_states;
> int min_perf_state;
> int max_perf_state;
> diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
> index 8df55397414a..3fe562b6230e 100644
> --- a/kernel/power/energy_model.c
> +++ b/kernel/power/energy_model.c
> @@ -23,6 +23,16 @@
> */
> static DEFINE_MUTEX(em_pd_mutex);
>
> +/*
> + * Manage performance domains with IDs. One can iterate the performance domains
> + * through the list and pick one with their associated ID. The mutex serializes
> + * the list access. When holding em_pd_list_mutex, em_pd_mutex should not be
> + * taken to avoid potential deadlock.
> + */
> +static DEFINE_IDA(em_pd_ida);
> +static LIST_HEAD(em_pd_list);
> +static DEFINE_MUTEX(em_pd_list_mutex);
> +
> static void em_cpufreq_update_efficiencies(struct device *dev,
> struct em_perf_state *table);
> static void em_check_capacity_update(void);
> @@ -396,7 +406,7 @@ static int em_create_pd(struct device *dev, int nr_states,
> struct em_perf_table *em_table;
> struct em_perf_domain *pd;
> struct device *cpu_dev;
> - int cpu, ret, num_cpus;
> + int cpu, ret, num_cpus, id;
>
> if (_is_cpu_device(dev)) {
> num_cpus = cpumask_weight(cpus);
> @@ -420,6 +430,13 @@ static int em_create_pd(struct device *dev, int nr_states,
>
> pd->nr_perf_states = nr_states;
>
> + INIT_LIST_HEAD(&pd->node);
> +
> + id = ida_alloc(&em_pd_ida, GFP_KERNEL);
> + if (id < 0)
> + return -ENOMEM;
> + pd->id = id;
> +
> em_table = em_table_alloc(pd);
> if (!em_table)
> goto free_pd;
> @@ -444,6 +461,7 @@ static int em_create_pd(struct device *dev, int nr_states,
> kfree(em_table);
> free_pd:
> kfree(pd);
> + ida_free(&em_pd_ida, id);
> return -EINVAL;
> }
>
> @@ -660,6 +678,13 @@ int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
> unlock:
> mutex_unlock(&em_pd_mutex);
>
> + if (_is_cpu_device(dev))
> + em_check_capacity_update();
> +
> + mutex_lock(&em_pd_list_mutex);
> + list_add_tail(&dev->em_pd->node, &em_pd_list);
> + mutex_unlock(&em_pd_list_mutex);
> +
> return ret;
> }
> EXPORT_SYMBOL_GPL(em_dev_register_pd_no_update);
> @@ -678,6 +703,10 @@ void em_dev_unregister_perf_domain(struct device *dev)
> if (_is_cpu_device(dev))
> return;
>
> + mutex_lock(&em_pd_list_mutex);
> + list_del_init(&dev->em_pd->node);
> + mutex_unlock(&em_pd_list_mutex);
> +
> /*
> * The mutex separates all register/unregister requests and protects
> * from potential clean-up/setup issues in the debugfs directories.
> @@ -689,6 +718,8 @@ void em_dev_unregister_perf_domain(struct device *dev)
> em_table_free(rcu_dereference_protected(dev->em_pd->em_table,
> lockdep_is_held(&em_pd_mutex)));
>
> + ida_free(&em_pd_ida, dev->em_pd->id);
> +
> kfree(dev->em_pd);
> dev->em_pd = NULL;
> mutex_unlock(&em_pd_mutex);
Apart from that, the code itself looks sane.
Regards,
Lukasz
Powered by blists - more mailing lists