[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230927033124.1226509-9-dapeng1.mi@linux.intel.com>
Date: Wed, 27 Sep 2023 11:31:19 +0800
From: Dapeng Mi <dapeng1.mi@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>,
Paolo Bonzini <pbonzini@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Kan Liang <kan.liang@...ux.intel.com>,
Like Xu <likexu@...cent.com>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>
Cc: kvm@...r.kernel.org, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org,
Zhenyu Wang <zhenyuw@...ux.intel.com>,
Zhang Xiong <xiong.y.zhang@...el.com>,
Lv Zhiyuan <zhiyuan.lv@...el.com>,
Yang Weijiang <weijiang.yang@...el.com>,
Dapeng Mi <dapeng1.mi@...el.com>,
Dapeng Mi <dapeng1.mi@...ux.intel.com>
Subject: [Patch v4 08/13] perf/core: Add new function perf_event_topdown_metrics()
Add a new function perf_event_topdown_metrics(). This new function is
quite familiar with function perf_event_period(), but it updates slots
count and metrics raw data instead of sample period into perf system.
When guest restores FIXED_CTR3 and PERF_METRICS MSRs in sched-in process,
KVM needs to capture the MSR writing trap and set the MSR values of guest
into corresponding perf events just like function perf_event_period()
does.
Initially we tried to reuse the function perf_event_period() to set the
slots/metrics value, but we found it was quite hard. The function
perf_event_period() only works on sampling events but unfortunately
slots event and metric events in topdown mode are all non-sampling
events. There are sampling event check and lots of sampling period
related check and setting in the function perf_event_period()
call-chain. If we want to reuse the function perf_event_period(), we
have to add lots of if-else changes on the entire function-chain and
even modify the function name. This would totally mess up the function
perf_event_period().
Thus, we select to create a new function perf_event_topdown_metrics() to
set the slots/metrics values. This makes logic and code both be clearer.
Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
---
include/linux/perf_event.h | 13 ++++++++
kernel/events/core.c | 62 ++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 04e12a8e6584..10d737aab7fa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1057,6 +1057,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
}
#endif /* CONFIG_CGROUP_PERF */
+struct td_metrics {
+ u64 slots;
+ u64 metric;
+};
+
#ifdef CONFIG_PERF_EVENTS
extern struct perf_event_context *perf_cpu_task_ctx(void);
@@ -1707,6 +1712,8 @@ extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
extern int perf_event_period(struct perf_event *event, u64 value);
extern u64 perf_event_pause(struct perf_event *event, bool reset);
+extern int perf_event_topdown_metrics(struct perf_event *event,
+ struct td_metrics *value);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
@@ -1793,6 +1800,12 @@ static inline u64 perf_event_pause(struct perf_event *event, bool reset)
{
return 0;
}
+
+static inline int perf_event_topdown_metrics(struct perf_event *event,
+ struct td_metrics *value)
+{
+ return 0;
+}
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cc870d450c5..500ffcd2c621 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5776,6 +5776,68 @@ int perf_event_period(struct perf_event *event, u64 value)
}
EXPORT_SYMBOL_GPL(perf_event_period);
+static void __perf_event_topdown_metrics(struct perf_event *event,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx,
+ void *info)
+{
+ struct td_metrics *td_metrics = (struct td_metrics *)info;
+ bool active;
+
+ active = (event->state == PERF_EVENT_STATE_ACTIVE);
+ if (active) {
+ perf_pmu_disable(event->pmu);
+ /*
+ * We could be throttled; unthrottle now to avoid the tick
+ * trying to unthrottle while we already re-started the event.
+ */
+ if (event->hw.interrupts == MAX_INTERRUPTS) {
+ event->hw.interrupts = 0;
+ perf_log_throttle(event, 1);
+ }
+ event->pmu->stop(event, PERF_EF_UPDATE);
+ }
+
+ event->hw.saved_slots = td_metrics->slots;
+ event->hw.saved_metric = td_metrics->metric;
+
+ if (active) {
+ event->pmu->start(event, PERF_EF_RELOAD);
+ perf_pmu_enable(event->pmu);
+ }
+}
+
+static int _perf_event_topdown_metrics(struct perf_event *event,
+ struct td_metrics *value)
+{
+ /*
+ * Slots event in topdown metrics scenario
+ * must be non-sampling event.
+ */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ if (!value)
+ return -EINVAL;
+
+ event_function_call(event, __perf_event_topdown_metrics, value);
+
+ return 0;
+}
+
+int perf_event_topdown_metrics(struct perf_event *event, struct td_metrics *value)
+{
+ struct perf_event_context *ctx;
+ int ret;
+
+ ctx = perf_event_ctx_lock(event);
+ ret = _perf_event_topdown_metrics(event, value);
+ perf_event_ctx_unlock(event, ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(perf_event_topdown_metrics);
+
static const struct file_operations perf_fops;
static inline int perf_fget_light(int fd, struct fd *p)
--
2.34.1
Powered by blists - more mailing lists