lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250604141640.GL38114@noisy.programming.kicks-ass.net>
Date: Wed, 4 Jun 2025 16:16:40 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Leo Yan <leo.yan@....com>
Cc: Yeoreum Yun <yeoreum.yun@....com>, mingo@...hat.com, mingo@...nel.org,
	acme@...nel.org, namhyung@...nel.org, mark.rutland@....com,
	alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
	irogers@...gle.com, adrian.hunter@...el.com,
	kan.liang@...ux.intel.com, linux-perf-users@...r.kernel.org,
	linux-kernel@...r.kernel.org, David Wang <00107082@....com>
Subject: Re: [PATCH 1/1] perf/core: fix dangling cgroup pointer in cpuctx

On Wed, Jun 04, 2025 at 11:18:21AM +0100, Leo Yan wrote:
> On Wed, Jun 04, 2025 at 10:03:39AM +0200, Peter Zijlstra wrote:

> > And now we have the sitation that __perf_remove_from_context() can do:
> > 
> >   {ACTIVE,INACTIVE,OFF,ERROR} -> {OFF,EXIT,REVOKED,DEAD}
> 
> A detailed transition is:
> 
>   Case 1: {ACTIVE} -> {INACTIVE} -> {OFF,EXIT,REVOKED,DEAD}

It can also start with INACTIVE, but yeah..
 
>   Case 2: {ERROR} -> {ERROR,EXIT,REVOKED,DEAD}
>   Case 3: {OFF} -> {OFF,EXIT,REVOKED,DEAD}

> 
> > Where the {OFF,ERROR} -> * transition already have
> > perf_cgroup_event_disable(), but the {ACTIVE,INACTIVE} -> * part has
> > not.
> 
> Just a minor concern.
> 
> I noticed perf_put_aux_event() sets the ERROR state for sibling events
> of an AUX event. 

There is also perf_remove_sibling_event(), which can cause ERROR.

> IIUC, the AUX event is the group leader, so we only
> need to clean up the cgroup pointer for the AUX event, and simply set
> the ERROR state for its sibling events, correct?

Not sure; I forever forget how the AUX events are supposed to work :-/

It might be prudent to do something like so:


diff --git a/kernel/events/core.c b/kernel/events/core.c
index f34c99f8ce8f..e6583747838a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2149,8 +2149,9 @@ perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
 }
 
 static void put_event(struct perf_event *event);
-static void event_sched_out(struct perf_event *event,
-			    struct perf_event_context *ctx);
+static void __event_disable(struct perf_event *event,
+			    struct perf_event_context *ctx,
+			    enum perf_event_state state);
 
 static void perf_put_aux_event(struct perf_event *event)
 {
@@ -2183,8 +2184,7 @@ static void perf_put_aux_event(struct perf_event *event)
 		 * state so that we don't try to schedule it again. Note
 		 * that perf_event_enable() will clear the ERROR status.
 		 */
-		event_sched_out(iter, ctx);
-		perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+		__event_disable(event, ctx, PERF_EVENT_STATE_ERROR);
 	}
 }
 
@@ -2242,18 +2242,6 @@ static inline struct list_head *get_event_list(struct perf_event *event)
 				    &event->pmu_ctx->flexible_active;
 }
 
-/*
- * Events that have PERF_EV_CAP_SIBLING require being part of a group and
- * cannot exist on their own, schedule them out and move them into the ERROR
- * state. Also see _perf_event_enable(), it will not be able to recover
- * this ERROR state.
- */
-static inline void perf_remove_sibling_event(struct perf_event *event)
-{
-	event_sched_out(event, event->ctx);
-	perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
-}
-
 static void perf_group_detach(struct perf_event *event)
 {
 	struct perf_event *leader = event->group_leader;
@@ -2289,8 +2277,15 @@ static void perf_group_detach(struct perf_event *event)
 	 */
 	list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
 
+		/*
+		 * Events that have PERF_EV_CAP_SIBLING require being part of
+		 * a group and cannot exist on their own, schedule them out
+		 * and move them into the ERROR state. Also see
+		 * _perf_event_enable(), it will not be able to recover this
+		 * ERROR state.
+		 */
 		if (sibling->event_caps & PERF_EV_CAP_SIBLING)
-			perf_remove_sibling_event(sibling);
+			__event_disable(sibling, ctx, PERF_EVENT_STATE_ERROR);
 
 		sibling->group_leader = sibling;
 		list_del_init(&sibling->sibling_list);
@@ -2562,6 +2557,19 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
 	event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
+static void __event_disable(struct perf_event *event,
+			    struct perf_event_context *ctx,
+			    enum perf_event_state state)
+{
+	if (event == event->group_leader)
+		group_sched_out(event, ctx);
+	else
+		event_sched_out(event, ctx);
+
+	perf_event_set_state(event, state);
+	perf_cgroup_event_disable(event, ctx);
+}
+
 /*
  * Cross CPU call to disable a performance event
  */
@@ -2575,15 +2583,7 @@ static void __perf_event_disable(struct perf_event *event,
 
 	perf_pmu_disable(event->pmu_ctx->pmu);
 	ctx_time_update_event(ctx, event);
-
-	if (event == event->group_leader)
-		group_sched_out(event, ctx);
-	else
-		event_sched_out(event, ctx);
-
-	perf_event_set_state(event, PERF_EVENT_STATE_OFF);
-	perf_cgroup_event_disable(event, ctx);
-
+	__event_disable(event, ctx, PERF_EVENT_STATE_OFF);
 	perf_pmu_enable(event->pmu_ctx->pmu);
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ