linux-kernel - [tip: perf/core] perf: Add APIs to load/put guest mediated PMU context

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <176597509246.510.17927277797275133035.tip-bot2@tip-bot2>
Date: Wed, 17 Dec 2025 12:38:12 -0000
From: "tip-bot2 for Kan Liang" <tip-bot2@...utronix.de>
To: linux-tip-commits@...r.kernel.org
Cc: Sean Christopherson <seanjc@...gle.com>,
 Kan Liang <kan.liang@...ux.intel.com>, Mingwei Zhang <mizhang@...gle.com>,
 "Peter Zijlstra (Intel)" <peterz@...radead.org>,
 Xudong Hao <xudong.hao@...el.com>, x86@...nel.org,
 linux-kernel@...r.kernel.org
Subject:
 [tip: perf/core] perf: Add APIs to load/put guest mediated PMU context

The following commit has been merged into the perf/core branch of tip:

Commit-ID:     42457a7fb6cacca83be4deaf202ac3e45830daf2
Gitweb:        https://git.kernel.org/tip/42457a7fb6cacca83be4deaf202ac3e45830daf2
Author:        Kan Liang <kan.liang@...ux.intel.com>
AuthorDate:    Fri, 05 Dec 2025 16:16:43 -08:00
Committer:     Peter Zijlstra <peterz@...radead.org>
CommitterDate: Wed, 17 Dec 2025 13:31:05 +01:00

perf: Add APIs to load/put guest mediated PMU context

Add exported APIs to load/put a guest mediated PMU context.  KVM will
load the guest PMU shortly before VM-Enter, and put the guest PMU shortly
after VM-Exit.

On the perf side of things, schedule out all exclude_guest events when the
guest context is loaded, and schedule them back in when the guest context
is put.  I.e. yield the hardware PMU resources to the guest, by way of KVM.

Note, perf is only responsible for managing host context.  KVM is
responsible for loading/storing guest state to/from hardware.

[sean: shuffle patches around, write changelog]
Suggested-by: Sean Christopherson <seanjc@...gle.com>
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
Signed-off-by: Mingwei Zhang <mizhang@...gle.com>
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
Tested-by: Xudong Hao <xudong.hao@...el.com>
Link: https://patch.msgid.link/20251206001720.468579-8-seanjc@google.com
---
 include/linux/perf_event.h |  2 +-
 kernel/events/core.c       | 61 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d9988e3..322cfa9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1925,6 +1925,8 @@ extern u64 perf_event_pause(struct perf_event *event, bool reset);
 #ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
 int perf_create_mediated_pmu(void);
 void perf_release_mediated_pmu(void);
+void perf_load_guest_context(void);
+void perf_put_guest_context(void);
 #endif
 
 #else /* !CONFIG_PERF_EVENTS: */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6781d39..bbb81a4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -470,10 +470,19 @@ static cpumask_var_t perf_online_pkg_mask;
 static cpumask_var_t perf_online_sys_mask;
 static struct kmem_cache *perf_event_cache;
 
+#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
+static DEFINE_PER_CPU(bool, guest_ctx_loaded);
+
+static __always_inline bool is_guest_mediated_pmu_loaded(void)
+{
+	return __this_cpu_read(guest_ctx_loaded);
+}
+#else
 static __always_inline bool is_guest_mediated_pmu_loaded(void)
 {
 	return false;
 }
+#endif
 
 /*
  * perf event paranoia level:
@@ -6384,6 +6393,58 @@ void perf_release_mediated_pmu(void)
 	atomic_dec(&nr_mediated_pmu_vms);
 }
 EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
+
+/* When loading a guest's mediated PMU, schedule out all exclude_guest events. */
+void perf_load_guest_context(void)
+{
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
+
+	lockdep_assert_irqs_disabled();
+
+	guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx);
+
+	if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded)))
+		return;
+
+	perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
+	ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST);
+	if (cpuctx->task_ctx) {
+		perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
+		task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST);
+	}
+
+	perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
+	if (cpuctx->task_ctx)
+		perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
+
+	__this_cpu_write(guest_ctx_loaded, true);
+}
+EXPORT_SYMBOL_GPL(perf_load_guest_context);
+
+void perf_put_guest_context(void)
+{
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
+
+	lockdep_assert_irqs_disabled();
+
+	guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx);
+
+	if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded)))
+		return;
+
+	perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
+	if (cpuctx->task_ctx)
+		perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
+
+	perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST);
+
+	if (cpuctx->task_ctx)
+		perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
+	perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
+
+	__this_cpu_write(guest_ctx_loaded, false);
+}
+EXPORT_SYMBOL_GPL(perf_put_guest_context);
 #else
 static int mediated_pmu_account_event(struct perf_event *event) { return 0; }
 static void mediated_pmu_unaccount_event(struct perf_event *event) {}