lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 26 Aug 2021 17:57:08 -0700
From:   Sean Christopherson <seanjc@...gle.com>
To:     Will Deacon <will@...nel.org>, Mark Rutland <mark.rutland@....com>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Catalin Marinas <catalin.marinas@....com>,
        Marc Zyngier <maz@...nel.org>, Guo Ren <guoren@...nel.org>,
        Nick Hu <nickhu@...estech.com>,
        Greentime Hu <green.hu@...il.com>,
        Vincent Chen <deanbo422@...il.com>,
        Paul Walmsley <paul.walmsley@...ive.com>,
        Palmer Dabbelt <palmer@...belt.com>,
        Albert Ou <aou@...s.berkeley.edu>,
        Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...en8.de>, x86@...nel.org,
        Paolo Bonzini <pbonzini@...hat.com>,
        Boris Ostrovsky <boris.ostrovsky@...cle.com>,
        Juergen Gross <jgross@...e.com>
Cc:     Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        James Morse <james.morse@....com>,
        Alexandru Elisei <alexandru.elisei@....com>,
        Suzuki K Poulose <suzuki.poulose@....com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>,
        Stefano Stabellini <sstabellini@...nel.org>,
        linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.cs.columbia.edu,
        linux-csky@...r.kernel.org, linux-riscv@...ts.infradead.org,
        kvm@...r.kernel.org, xen-devel@...ts.xenproject.org,
        Artem Kashkanov <artem.kashkanov@...el.com>,
        Like Xu <like.xu.linux@...il.com>,
        Zhu Lingshan <lingshan.zhu@...el.com>
Subject: [PATCH 05/15] perf: Track guest callbacks on a per-CPU basis

Use a per-CPU pointer to track perf's guest callbacks so that KVM can set
the callbacks more precisely and avoid a lurking NULL pointer dereference.
On x86, KVM supports being built as a module and thus can be unloaded.
And because the shared callbacks are referenced from IRQ/NMI context,
unloading KVM can run concurrently with perf, and thus all of perf's
checks for a NULL perf_guest_cbs are flawed as perf_guest_cbs could be
nullified between the check and dereference.

In practice, this has not been problematic because the callbacks are
always guarded with a "perf_guest_cbs && perf_guest_cbs->is_in_guest()"
pattern, and it's extremely unlikely the compiler will choost to reload
perf_guest_cbs in that particular sequence.  Because is_in_guest() is
obviously true only when KVM is running a guest, perf always wins the
race to the guarded code (which does often reload perf_guest_cbs) as KVM
has to stop running all guests and do a heavy teardown before unloading.

Cc: Zhu Lingshan <lingshan.zhu@...el.com>
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
 arch/arm64/kernel/perf_callchain.c | 18 ++++++++++++------
 arch/x86/events/core.c             | 17 +++++++++++------
 arch/x86/events/intel/core.c       |  8 +++++---
 include/linux/perf_event.h         |  2 +-
 kernel/events/core.c               | 12 +++++++++---
 5 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 4a72c2727309..38555275c6a2 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -102,7 +102,9 @@ compat_user_backtrace(struct compat_frame_tail __user *tail,
 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 			 struct pt_regs *regs)
 {
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+	struct perf_guest_info_callbacks *guest_cbs = this_cpu_read(perf_guest_cbs);
+
+	if (guest_cbs && guest_cbs->is_in_guest()) {
 		/* We don't support guest os callchain now */
 		return;
 	}
@@ -147,9 +149,10 @@ static bool callchain_trace(void *data, unsigned long pc)
 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 			   struct pt_regs *regs)
 {
+	struct perf_guest_info_callbacks *guest_cbs = this_cpu_read(perf_guest_cbs);
 	struct stackframe frame;
 
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+	if (guest_cbs && guest_cbs->is_in_guest()) {
 		/* We don't support guest os callchain now */
 		return;
 	}
@@ -160,18 +163,21 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
-		return perf_guest_cbs->get_guest_ip();
+	struct perf_guest_info_callbacks *guest_cbs = this_cpu_read(perf_guest_cbs);
+
+	if (guest_cbs && guest_cbs->is_in_guest())
+		return guest_cbs->get_guest_ip();
 
 	return instruction_pointer(regs);
 }
 
 unsigned long perf_misc_flags(struct pt_regs *regs)
 {
+	struct perf_guest_info_callbacks *guest_cbs = this_cpu_read(perf_guest_cbs);
 	int misc = 0;
 
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-		if (perf_guest_cbs->is_user_mode())
+	if (guest_cbs && guest_cbs->is_in_guest()) {
+		if (guest_cbs->is_user_mode())
 			misc |= PERF_RECORD_MISC_GUEST_USER;
 		else
 			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1eb45139fcc6..34155a52e498 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2761,10 +2761,11 @@ static bool perf_hw_regs(struct pt_regs *regs)
 void
 perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
+	struct perf_guest_info_callbacks *guest_cbs = this_cpu_read(perf_guest_cbs);
 	struct unwind_state state;
 	unsigned long addr;
 
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+	if (guest_cbs && guest_cbs->is_in_guest()) {
 		/* TODO: We don't support guest os callchain now */
 		return;
 	}
@@ -2864,10 +2865,11 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
 void
 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
+	struct perf_guest_info_callbacks *guest_cbs = this_cpu_read(perf_guest_cbs);
 	struct stack_frame frame;
 	const struct stack_frame __user *fp;
 
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+	if (guest_cbs && guest_cbs->is_in_guest()) {
 		/* TODO: We don't support guest os callchain now */
 		return;
 	}
@@ -2944,18 +2946,21 @@ static unsigned long code_segment_base(struct pt_regs *regs)
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
-		return perf_guest_cbs->get_guest_ip();
+	struct perf_guest_info_callbacks *guest_cbs = this_cpu_read(perf_guest_cbs);
+
+	if (guest_cbs && guest_cbs->is_in_guest())
+		return guest_cbs->get_guest_ip();
 
 	return regs->ip + code_segment_base(regs);
 }
 
 unsigned long perf_misc_flags(struct pt_regs *regs)
 {
+	struct perf_guest_info_callbacks *guest_cbs = this_cpu_read(perf_guest_cbs);
 	int misc = 0;
 
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-		if (perf_guest_cbs->is_user_mode())
+	if (guest_cbs && guest_cbs->is_in_guest()) {
+		if (guest_cbs->is_user_mode())
 			misc |= PERF_RECORD_MISC_GUEST_USER;
 		else
 			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fca7a6e2242f..96001962c24d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2784,6 +2784,7 @@ static void intel_pmu_reset(void)
 
 static int handle_pmi_common(struct pt_regs *regs, u64 status)
 {
+	struct perf_guest_info_callbacks *guest_cbs;
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	int bit;
@@ -2852,9 +2853,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
 	 */
 	if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
 		handled++;
-		if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
-			perf_guest_cbs->handle_intel_pt_intr))
-			perf_guest_cbs->handle_intel_pt_intr();
+		guest_cbs = this_cpu_read(perf_guest_cbs);
+		if (unlikely(guest_cbs && guest_cbs->is_in_guest() &&
+			     guest_cbs->handle_intel_pt_intr))
+			guest_cbs->handle_intel_pt_intr();
 		else
 			intel_pt_interrupt();
 	}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5eab690622ca..c98253dae037 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1237,7 +1237,7 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 				 u16 flags);
 
 #ifdef CONFIG_HAVE_GUEST_PERF_EVENTS
-extern struct perf_guest_info_callbacks *perf_guest_cbs;
+DECLARE_PER_CPU(struct perf_guest_info_callbacks *, perf_guest_cbs);
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern void perf_unregister_guest_info_callbacks(void);
 #endif /* CONFIG_HAVE_GUEST_PERF_EVENTS */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9820df7ee455..9bc1375d6ed9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6483,17 +6483,23 @@ static void perf_pending_event(struct irq_work *entry)
 }
 
 #ifdef CONFIG_HAVE_GUEST_PERF_EVENTS
-struct perf_guest_info_callbacks *perf_guest_cbs;
+DEFINE_PER_CPU(struct perf_guest_info_callbacks *, perf_guest_cbs);
 
 void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 {
-	perf_guest_cbs = cbs;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(perf_guest_cbs, cpu) = cbs;
 }
 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
 
 void perf_unregister_guest_info_callbacks(void)
 {
-	perf_guest_cbs = NULL;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(perf_guest_cbs, cpu) = NULL;
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 #endif
-- 
2.33.0.259.gc128427fd7-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ