lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20240115170120.662220-4-tvrtko.ursulin@linux.intel.com>
Date: Mon, 15 Jan 2024 17:01:20 +0000
From: Tvrtko Ursulin <tvrtko.ursulin@...ux.intel.com>
To: linux-kernel@...r.kernel.org,
	tvrtko.ursulin@...ux.intel.com
Cc: Tvrtko Ursulin <tvrtko.ursulin@...el.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Umesh Nerlige Ramappa <umesh.nerlige.ramappa@...el.com>,
	Aravind Iddamsetty <aravind.iddamsetty@...ux.intel.com>
Subject: [RFC 3/3] perf: Reference count struct perf_cpu_pmu_context to fix driver unbind

From: Tvrtko Ursulin <tvrtko.ursulin@...el.com>

If a PMU driver is a PCI driver which can be unbound at runtime there is
currently an use after free issue with CPU contexts when an active perf fd
is kept around.

Specifically, when perf_pmu_unregister() calls free_pmu_context() on the
PMU provided per cpu struct perf_cpu_pmu_context storage, any call path
which ends up in event_sched_out() (such as __perf_remove_from_context()
via perf_event_release_kernel()) will dereference a freed event->pmu_ctx.
Furthermore if the same percpu area has in the meantime been re-allocated,
the use after free will corrupt someone elses per cpu storage area.

To fix it we attempt to add reference counting to struct
perf_cpu_pmu_context such that the object remains until the last user
is done with it.

TODO/FIXME/QQQ:

1)

I am really not sure about the locking here. I *think* I needed a per
struct pmu counter and by looking at what find_get_pmu_context does when
it takes a slot from driver provided pmu->cpu_pmu_context under the
ctx->lock, it looked like that should be sufficient. Maybe even if not
atomic_t. Or maybe ctx->lock is not enough.

2)

I believe pmu->pmu_disable_count will need a similar treatment, but as
I wasn't sure of the locking model, or even if this all makes sense on
the high level I left it out for now. Like does the idea to reference
count even flies or a completely different solution will be needed.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@...el.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@...el.com>
Cc: Aravind Iddamsetty <aravind.iddamsetty@...ux.intel.com>
---
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 21 ++++++++++++++-------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a567d2d98be1..bd1c8f3c1736 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -317,6 +317,7 @@ struct pmu {
 
 	int __percpu			*pmu_disable_count;
 	struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
+	atomic_t 			cpu_pmu_context_refcount;
 	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
 	int				task_ctx_nr;
 	int				hrtimer_interval_ms;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4b62d2201ca7..0c95aecf560a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4873,6 +4873,9 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
 			atomic_inc(&epc->refcount);
 		}
 		raw_spin_unlock_irq(&ctx->lock);
+
+		atomic_inc(&pmu->cpu_pmu_context_refcount);
+
 		return epc;
 	}
 
@@ -4928,6 +4931,12 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
 	return epc;
 }
 
+static void put_pmu_context(struct pmu *pmu)
+{
+	if (atomic_dec_and_test(&pmu->cpu_pmu_context_refcount))
+		free_percpu(pmu->cpu_pmu_context);
+}
+
 static void get_pmu_ctx(struct perf_event_pmu_context *epc)
 {
 	WARN_ON_ONCE(!atomic_inc_not_zero(&epc->refcount));
@@ -4967,8 +4976,10 @@ static void put_pmu_ctx(struct perf_event_pmu_context *epc)
 
 	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 
-	if (epc->embedded)
+	if (epc->embedded) {
+		put_pmu_context(epc->pmu);
 		return;
+	}
 
 	call_rcu(&epc->rcu_head, free_epc_rcu);
 }
@@ -11347,11 +11358,6 @@ static int perf_event_idx_default(struct perf_event *event)
 	return 0;
 }
 
-static void free_pmu_context(struct pmu *pmu)
-{
-	free_percpu(pmu->cpu_pmu_context);
-}
-
 /*
  * Let userspace know that this PMU supports address range filtering:
  */
@@ -11573,6 +11579,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 		pmu->event_idx = perf_event_idx_default;
 
 	list_add_rcu(&pmu->entry, &pmus);
+	atomic_set(&pmu->cpu_pmu_context_refcount, 1);
 	atomic_set(&pmu->exclusive_cnt, 0);
 	ret = 0;
 unlock:
@@ -11615,7 +11622,7 @@ void perf_pmu_unregister(struct pmu *pmu)
 		device_del(pmu->dev);
 		put_device(pmu->dev);
 	}
-	free_pmu_context(pmu);
+	put_pmu_context(pmu);
 	mutex_unlock(&pmus_lock);
 }
 EXPORT_SYMBOL_GPL(perf_pmu_unregister);
-- 
2.40.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ