[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241104135518.760214287@infradead.org>
Date: Mon, 04 Nov 2024 14:39:20 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: mingo@...nel.org,
lucas.demarchi@...el.com
Cc: linux-kernel@...r.kernel.org,
peterz@...radead.org,
willy@...radead.org,
acme@...nel.org,
namhyung@...nel.org,
mark.rutland@....com,
alexander.shishkin@...ux.intel.com,
jolsa@...nel.org,
irogers@...gle.com,
adrian.hunter@...el.com,
kan.liang@...ux.intel.com
Subject: [PATCH 11/19] perf: Detach perf_cpu_pmu_context and pmu lifetimes
In prepration for being able to unregister a pmu with existing events,
it becomes important to detach struct perf_cpu_pmu_context lifetimes
from that of struct pmu.
Notably perf_cpu_pmu_context embeds a perf_event_pmu_context that can
stay referenced until the last event goes.
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
include/linux/perf_event.h | 4 +--
kernel/events/core.c | 56 +++++++++++++++++++++++++++++++++++++--------
2 files changed, 49 insertions(+), 11 deletions(-)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -336,7 +336,7 @@ struct pmu {
*/
unsigned int scope;
- struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
+ struct perf_cpu_pmu_context __percpu **cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
@@ -901,7 +901,7 @@ struct perf_event_pmu_context {
struct list_head pinned_active;
struct list_head flexible_active;
- /* Used to avoid freeing per-cpu perf_event_pmu_context */
+ /* Used to identify the per-cpu perf_event_pmu_context */
unsigned int embedded : 1;
unsigned int nr_events;
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1178,7 +1178,7 @@ static int perf_mux_hrtimer_restart_ipi(
static __always_inline struct perf_cpu_pmu_context *this_cpc(struct pmu *pmu)
{
- return this_cpu_ptr(pmu->cpu_pmu_context);
+ return *this_cpu_ptr(pmu->cpu_pmu_context);
}
void perf_pmu_disable(struct pmu *pmu)
@@ -4971,11 +4971,14 @@ find_get_pmu_context(struct pmu *pmu, st
*/
struct perf_cpu_pmu_context *cpc;
- cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
+ cpc = *per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
epc = &cpc->epc;
raw_spin_lock_irq(&ctx->lock);
if (!epc->ctx) {
- atomic_set(&epc->refcount, 1);
+ /*
+ * One extra reference for the pmu; see perf_pmu_free().
+ */
+ atomic_set(&epc->refcount, 2);
epc->embedded = 1;
list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
epc->ctx = ctx;
@@ -5044,6 +5047,15 @@ static void get_pmu_ctx(struct perf_even
WARN_ON_ONCE(!atomic_inc_not_zero(&epc->refcount));
}
+static void free_cpc_rcu(struct rcu_head *head)
+{
+ struct perf_cpu_pmu_context *cpc =
+ container_of(head, typeof(*cpc), epc.rcu_head);
+
+ kfree(cpc->epc.task_ctx_data);
+ kfree(cpc);
+}
+
static void free_epc_rcu(struct rcu_head *head)
{
struct perf_event_pmu_context *epc = container_of(head, typeof(*epc), rcu_head);
@@ -5078,8 +5090,10 @@ static void put_pmu_ctx(struct perf_even
raw_spin_unlock_irqrestore(&ctx->lock, flags);
- if (epc->embedded)
+ if (epc->embedded) {
+ call_rcu(&epc->rcu_head, free_cpc_rcu);
return;
+ }
call_rcu(&epc->rcu_head, free_epc_rcu);
}
@@ -11595,7 +11609,7 @@ perf_event_mux_interval_ms_store(struct
cpus_read_lock();
for_each_online_cpu(cpu) {
struct perf_cpu_pmu_context *cpc;
- cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
+ cpc = *per_cpu_ptr(pmu->cpu_pmu_context, cpu);
cpc->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpc);
@@ -11767,7 +11781,25 @@ static void perf_pmu_free(struct pmu *pm
device_del(pmu->dev);
put_device(pmu->dev);
}
- free_percpu(pmu->cpu_pmu_context);
+
+ if (pmu->cpu_pmu_context) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct perf_cpu_pmu_context *cpc;
+
+ cpc = *per_cpu_ptr(pmu->cpu_pmu_context, cpu);
+ if (!cpc)
+ continue;
+ if (cpc->epc.embedded) {
+ /* refcount managed */
+ put_pmu_ctx(&cpc->epc);
+ continue;
+ }
+ kfree(cpc);
+ }
+ free_percpu(pmu->cpu_pmu_context);
+ }
}
DEFINE_FREE(pmu_unregister, struct pmu *, if (_T) perf_pmu_free(_T))
@@ -11806,14 +11838,20 @@ int perf_pmu_register(struct pmu *_pmu,
return ret;
}
- pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context);
+ pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context *);
if (!pmu->cpu_pmu_context)
return -ENOMEM;
for_each_possible_cpu(cpu) {
- struct perf_cpu_pmu_context *cpc;
+ struct perf_cpu_pmu_context *cpc =
+ kmalloc_node(sizeof(struct perf_cpu_pmu_context),
+ GFP_KERNEL | __GFP_ZERO,
+ cpu_to_node(cpu));
+
+ if (!cpc)
+ return -ENOMEM;
- cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
+ *per_cpu_ptr(pmu->cpu_pmu_context, cpu) = cpc;
__perf_init_event_pmu_context(&cpc->epc, pmu);
__perf_mux_hrtimer_init(cpc, cpu);
}
Powered by blists - more mailing lists