lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250324173121.1275209-10-mizhang@google.com>
Date: Mon, 24 Mar 2025 17:30:49 +0000
From: Mingwei Zhang <mizhang@...gle.com>
To: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>, 
	Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>, 
	Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>
Cc: Mark Rutland <mark.rutland@....com>, 
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>, 
	Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>, Liang@...gle.com, 
	Kan <kan.liang@...ux.intel.com>, "H. Peter Anvin" <hpa@...or.com>, 
	linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org, 
	kvm@...r.kernel.org, linux-kselftest@...r.kernel.org, 
	Mingwei Zhang <mizhang@...gle.com>, Yongwei Ma <yongwei.ma@...el.com>, 
	Xiong Zhang <xiong.y.zhang@...ux.intel.com>, Dapeng Mi <dapeng1.mi@...ux.intel.com>, 
	Jim Mattson <jmattson@...gle.com>, Sandipan Das <sandipan.das@....com>, 
	Zide Chen <zide.chen@...el.com>, Eranian Stephane <eranian@...gle.com>, 
	Das Sandipan <Sandipan.Das@....com>, Shukla Manali <Manali.Shukla@....com>, 
	Nikunj Dadhania <nikunj.dadhania@....com>
Subject: [PATCH v4 09/38] perf: Add switch_guest_ctx() interface

From: Kan Liang <kan.liang@...ux.intel.com>

When entering/exiting a guest, some contexts for a guest have to be
switched. For examples, there is a dedicated interrupt vector for
guests on Intel platforms.

When PMI switch into a new guest vector, guest_lvtpc value need to be
reflected onto HW, e,g., guest clear PMI mask bit, the HW PMI mask
bit should be cleared also, then PMI can be generated continuously
for guest. So guest_lvtpc parameter is added into perf_guest_enter()
and switch_guest_ctx().

Add a dedicated list to track all the pmus with the PASSTHROUGH cap, which
may require switching the guest context. It can avoid going through the
huge pmus list.

Suggested-by: Peter Zijlstra (Intel) <peterz@...radead.org>
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
Signed-off-by: Mingwei Zhang <mizhang@...gle.com>
---
 include/linux/perf_event.h | 17 +++++++++++--
 kernel/events/core.c       | 51 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 37187ee8e226..58c1cf6939bf 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -584,6 +584,11 @@ struct pmu {
 	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
 	 */
 	int (*check_period)		(struct perf_event *event, u64 value); /* optional */
+
+	/*
+	 * Switch guest context when a guest enter/exit, e.g., interrupt vectors.
+	 */
+	void (*switch_guest_ctx)	(bool enter, void *data); /* optional */
 };
 
 enum perf_addr_filter_action_t {
@@ -1030,6 +1035,11 @@ struct perf_event_context {
 	local_t				nr_no_switch_fast;
 };
 
+struct mediated_pmus_list {
+	raw_spinlock_t		lock;
+	struct list_head	list;
+};
+
 struct perf_cpu_pmu_context {
 	struct perf_event_pmu_context	epc;
 	struct perf_event_pmu_context	*task_epc;
@@ -1044,6 +1054,9 @@ struct perf_cpu_pmu_context {
 	struct hrtimer			hrtimer;
 	ktime_t				hrtimer_interval;
 	unsigned int			hrtimer_active;
+
+	/* Track the PMU with PERF_PMU_CAP_MEDIATED_VPMU cap */
+	struct list_head		mediated_entry;
 };
 
 /**
@@ -1822,7 +1835,7 @@ extern int perf_event_period(struct perf_event *event, u64 value);
 extern u64 perf_event_pause(struct perf_event *event, bool reset);
 int perf_get_mediated_pmu(void);
 void perf_put_mediated_pmu(void);
-void perf_guest_enter(void);
+void perf_guest_enter(u32 guest_lvtpc);
 void perf_guest_exit(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
@@ -1921,7 +1934,7 @@ static inline int perf_get_mediated_pmu(void)
 }
 
 static inline void perf_put_mediated_pmu(void)			{ }
-static inline void perf_guest_enter(void)			{ }
+static inline void perf_guest_enter(u32 guest_lvtpc)		{ }
 static inline void perf_guest_exit(void)			{ }
 #endif
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d05487d465c9..406b86641f02 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -451,6 +451,7 @@ static inline bool is_include_guest_event(struct perf_event *event)
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
 static struct srcu_struct pmus_srcu;
+static DEFINE_PER_CPU(struct mediated_pmus_list, mediated_pmus);
 static cpumask_var_t perf_online_mask;
 static cpumask_var_t perf_online_core_mask;
 static cpumask_var_t perf_online_die_mask;
@@ -6053,8 +6054,26 @@ static inline void perf_host_exit(struct perf_cpu_context *cpuctx)
 	}
 }
 
+static void perf_switch_guest_ctx(bool enter, u32 guest_lvtpc)
+{
+	struct mediated_pmus_list *pmus = this_cpu_ptr(&mediated_pmus);
+	struct perf_cpu_pmu_context *cpc;
+	struct pmu *pmu;
+
+	lockdep_assert_irqs_disabled();
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cpc, &pmus->list, mediated_entry) {
+		pmu = cpc->epc.pmu;
+
+		if (pmu->switch_guest_ctx)
+			pmu->switch_guest_ctx(enter, (void *)&guest_lvtpc);
+	}
+	rcu_read_unlock();
+}
+
 /* When entering a guest, schedule out all exclude_guest events. */
-void perf_guest_enter(void)
+void perf_guest_enter(u32 guest_lvtpc)
 {
 	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
 
@@ -6067,6 +6086,8 @@ void perf_guest_enter(void)
 
 	perf_host_exit(cpuctx);
 
+	perf_switch_guest_ctx(true, guest_lvtpc);
+
 	__this_cpu_write(perf_in_guest, true);
 
 unlock:
@@ -6098,6 +6119,8 @@ void perf_guest_exit(void)
 	if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest)))
 		goto unlock;
 
+	perf_switch_guest_ctx(false, 0);
+
 	perf_host_enter(cpuctx);
 
 	__this_cpu_write(perf_in_guest, false);
@@ -12104,6 +12127,15 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 		cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
 		__perf_init_event_pmu_context(&cpc->epc, pmu);
 		__perf_mux_hrtimer_init(cpc, cpu);
+
+		if (pmu->capabilities & PERF_PMU_CAP_MEDIATED_VPMU) {
+			struct mediated_pmus_list *pmus;
+
+			pmus = per_cpu_ptr(&mediated_pmus, cpu);
+			raw_spin_lock(&pmus->lock);
+			list_add_rcu(&cpc->mediated_entry, &pmus->list);
+			raw_spin_unlock(&pmus->lock);
+		}
 	}
 
 	if (!pmu->start_txn) {
@@ -12162,6 +12194,20 @@ void perf_pmu_unregister(struct pmu *pmu)
 	mutex_lock(&pmus_lock);
 	list_del_rcu(&pmu->entry);
 
+	if (pmu->capabilities & PERF_PMU_CAP_MEDIATED_VPMU) {
+		struct mediated_pmus_list *pmus;
+		struct perf_cpu_pmu_context *cpc;
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
+			pmus = per_cpu_ptr(&mediated_pmus, cpu);
+			raw_spin_lock(&pmus->lock);
+			list_del_rcu(&cpc->mediated_entry);
+			raw_spin_unlock(&pmus->lock);
+		}
+	}
+
 	/*
 	 * We dereference the pmu list under both SRCU and regular RCU, so
 	 * synchronize against both of those.
@@ -14252,6 +14298,9 @@ static void __init perf_event_init_all_cpus(void)
 
 		INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
 
+		INIT_LIST_HEAD(&per_cpu(mediated_pmus.list, cpu));
+		raw_spin_lock_init(&per_cpu(mediated_pmus.lock, cpu));
+
 		cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
 		__perf_event_init_context(&cpuctx->ctx);
 		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
-- 
2.49.0.395.g12beb8f557-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ