linux-kernel - [RFC 3/3] perf/x86: Call mmap event callbacks on event's CPU

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210728230230.1911468-4-robh@kernel.org>
Date:   Wed, 28 Jul 2021 17:02:30 -0600
From:   Rob Herring <robh@...nel.org>
To:     Peter Zijlstra <peterz@...radead.org>,
        Mark Rutland <mark.rutland@....com>,
        Will Deacon <will@...nel.org>,
        Kan Liang <kan.liang@...ux.intel.com>,
        Andy Lutomirski <luto@...nel.org>
Cc:     linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...en8.de>, x86@...nel.org,
        "H. Peter Anvin" <hpa@...or.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        linux-perf-users@...r.kernel.org
Subject: [RFC 3/3] perf/x86: Call mmap event callbacks on event's CPU

Mark suggested that mmapping of events should be treated like other
event changes in that the PMU callbacks run on the CPU the event is on.
Given that the only implementation of .event_(un)mapped() (on x86) end up
running on each CPU, this makes sense.

Since the .event_(un)mapped() callbacks are now called on multiple CPUs,
the tracking of enabling RDPMC is moved to the context in the perf core.
This allows removing perf_rdpmc_allowed, and for arm64 to share the same
user access tracking.

Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Mark Rutland <mark.rutland@....com>
Cc: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc: Jiri Olsa <jolsa@...hat.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Kan Liang <kan.liang@...ux.intel.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Borislav Petkov <bp@...en8.de>
Cc: x86@...nel.org
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: linux-perf-users@...r.kernel.org
Signed-off-by: Rob Herring <robh@...nel.org>
---
Note that the intent here is to only call event_mapped() on the first mmap
and event_unmapped on the last unmapping using the event->mmap_count. I'm
not sure if there's some flaw with that idea?


 arch/x86/events/core.c     | 38 +++++++-------------------
 arch/x86/include/asm/mmu.h |  1 -
 include/linux/perf_event.h |  7 +++--
 kernel/events/core.c       | 56 ++++++++++++++++++++++++++++++++------
 4 files changed, 61 insertions(+), 41 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5c1703206ef5..c755190c3970 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -753,9 +753,13 @@ static void perf_clear_dirty_counters(struct cpu_hw_events *cpuc)

 static void x86_pmu_set_user_access(struct cpu_hw_events *cpuc)
 {
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(cpuc->pmu->pmu_cpu_context);
+	struct perf_event_context *task_ctx = cpuctx->task_ctx;
+
 	if (static_branch_unlikely(&rdpmc_always_available_key) ||
 	    (!static_branch_unlikely(&rdpmc_never_available_key) &&
-	     atomic_read(&(this_cpu_read(cpu_tlbstate.loaded_mm)->context.perf_rdpmc_allowed)))) {
+	     (atomic_read(&cpuctx->ctx.nr_user) ||
+	      (task_ctx && atomic_read(&task_ctx->nr_user))))) {
 		/*
 		 * Clear the existing dirty counters to
 		 * prevent the leak for an RDPMC task.
@@ -2522,34 +2526,12 @@ static void x86_pmu_set_user_access_ipi(void *unused)
 	x86_pmu_set_user_access(this_cpu_ptr(&cpu_hw_events));
 }

-static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
-{
-	if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
-		return;
-
-	/*
-	 * This function relies on not being called concurrently in two
-	 * tasks in the same mm.  Otherwise one task could observe
-	 * perf_rdpmc_allowed > 1 and return all the way back to
-	 * userspace with CR4.PCE clear while another task is still
-	 * doing on_each_cpu_mask() to propagate CR4.PCE.
-	 *
-	 * For now, this can't happen because all callers hold mmap_lock
-	 * for write.  If this changes, we'll need a different solution.
-	 */
-	mmap_assert_write_locked(mm);
-
-	if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
-		on_each_cpu_mask(mm_cpumask(mm), x86_pmu_set_user_access_ipi, NULL, 1);
-}
-
-static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+static void x86_pmu_event_map_changed(struct perf_event *event)
 {
-	if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+	if (atomic_read(&event->ctx->nr_user) != 1)
 		return;

-	if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
-		on_each_cpu_mask(mm_cpumask(mm), x86_pmu_set_user_access_ipi, NULL, 1);
+	x86_pmu_set_user_access_ipi(NULL);
 }

 static int x86_pmu_event_idx(struct perf_event *event)
@@ -2707,8 +2689,8 @@ static struct pmu pmu = {

 	.event_init		= x86_pmu_event_init,

-	.event_mapped		= x86_pmu_event_mapped,
-	.event_unmapped		= x86_pmu_event_unmapped,
+	.event_mapped		= x86_pmu_event_map_changed,
+	.event_unmapped		= x86_pmu_event_map_changed,

 	.add			= x86_pmu_add,
 	.del			= x86_pmu_del,
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5d7494631ea9..bd27fc666024 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -46,7 +46,6 @@ typedef struct {
 	void __user *vdso;			/* vdso base address */
 	const struct vdso_image *vdso_image;	/* vdso image in use */

-	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 	/*
 	 * One bit per protection key says whether userspace can
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f5815448ca9b..23944f9386b3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -329,10 +329,10 @@ struct pmu {

 	/*
 	 * Notification that the event was mapped or unmapped.  Called
-	 * in the context of the mapping task.
+	 * on each CPU the event is on.
 	 */
-	void (*event_mapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
-	void (*event_unmapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
+	void (*event_mapped)		(struct perf_event *event); /* optional */
+	void (*event_unmapped)		(struct perf_event *event); /* optional */

 	/*
 	 * Flags for ->add()/->del()/ ->start()/->stop(). There are
@@ -824,6 +824,7 @@ struct perf_event_context {
 	int				nr_stat;
 	int				nr_freq;
 	int				rotate_disable;
+	atomic_t			nr_user;
 	/*
 	 * Set when nr_events != nr_active, except tolerant to events not
 	 * necessary to be active due to scheduling constraints, such as cgroups.
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 464917096e73..26c3fb962e4a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5988,6 +5988,48 @@ void ring_buffer_put(struct perf_buffer *rb)
 	call_rcu(&rb->rcu_head, rb_free_rcu);
 }

+static void __perf_event_mapped(struct perf_event *event,
+				 struct perf_cpu_context *cpuctx,
+				 struct perf_event_context *ctx,
+				 void *info)
+{
+	event->pmu->event_mapped(event);
+}
+
+static void perf_event_mapped(struct perf_event *event)
+{
+	struct perf_event_context *ctx;
+
+	if (!event->pmu->event_mapped || !(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+		return;
+
+	ctx = perf_event_ctx_lock(event);
+	event_function_call(event, __perf_event_mapped, NULL);
+	atomic_dec(&ctx->nr_user);
+	perf_event_ctx_unlock(event, ctx);
+}
+
+static void __perf_event_unmapped(struct perf_event *event,
+				 struct perf_cpu_context *cpuctx,
+				 struct perf_event_context *ctx,
+				 void *info)
+{
+	event->pmu->event_unmapped(event);
+}
+
+static void perf_event_unmapped(struct perf_event *event)
+{
+	struct perf_event_context *ctx;
+
+	if (!event->pmu->event_unmapped || !(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+		return;
+
+	ctx = perf_event_ctx_lock(event);
+	atomic_inc(&ctx->nr_user);
+	event_function_call(event, __perf_event_unmapped, NULL);
+	perf_event_ctx_unlock(event, ctx);
+}
+
 static void perf_mmap_open(struct vm_area_struct *vma)
 {
 	struct perf_event *event = vma->vm_file->private_data;
@@ -5997,9 +6039,6 @@ static void perf_mmap_open(struct vm_area_struct *vma)

 	if (vma->vm_pgoff)
 		atomic_inc(&event->rb->aux_mmap_count);
-
-	if (event->pmu->event_mapped)
-		event->pmu->event_mapped(event, vma->vm_mm);
 }

 static void perf_pmu_output_stop(struct perf_event *event);
@@ -6021,9 +6060,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	unsigned long size = perf_data_size(rb);
 	bool detach_rest = false;

-	if (event->pmu->event_unmapped)
-		event->pmu->event_unmapped(event, vma->vm_mm);
-
 	/*
 	 * rb->aux_mmap_count will always drop before rb->mmap_count and
 	 * event->mmap_count, so it is ok to use event->mmap_mutex to
@@ -6056,6 +6092,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
 		goto out_put;

+	perf_event_unmapped(event);
+
 	ring_buffer_attach(event, NULL);
 	mutex_unlock(&event->mmap_mutex);

@@ -6330,6 +6368,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		atomic_dec(&rb->mmap_count);
 	}
 aux_unlock:
+	if (atomic_read(&event->mmap_count) == 1)
+		perf_event_mapped(event);
+
 	mutex_unlock(&event->mmap_mutex);

 	/*
@@ -6339,9 +6380,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &perf_mmap_vmops;

-	if (event->pmu->event_mapped)
-		event->pmu->event_mapped(event, vma->vm_mm);
-
 	return ret;
 }

--
2.27.0