[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240624152732.1231678-7-bigeasy@linutronix.de>
Date: Mon, 24 Jun 2024 17:15:19 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: Adrian Hunter <adrian.hunter@...el.com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Daniel Bristot de Oliveira <bristot@...nel.org>,
Frederic Weisbecker <frederic@...nel.org>,
Ian Rogers <irogers@...gle.com>,
Ingo Molnar <mingo@...hat.com>,
Jiri Olsa <jolsa@...nel.org>,
Kan Liang <kan.liang@...ux.intel.com>,
Marco Elver <elver@...gle.com>,
Mark Rutland <mark.rutland@....com>,
Namhyung Kim <namhyung@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Arnaldo Carvalho de Melo <acme@...hat.com>
Subject: [PATCH v4 6/6] perf: Split __perf_pending_irq() out of perf_pending_irq()
perf_pending_irq() invokes perf_event_wakeup() and __perf_pending_irq().
The former is in charge of waking any tasks which wait to be woken up
while the latter disables perf-events.
The irq_work perf_pending_irq(), while this an irq_work, the callback
is invoked in thread context on PREEMPT_RT. This is needed because all
the waking functions (wake_up_all(), kill_fasync()) acquire sleep locks
which must not be used with disabled interrupts.
Disabling events, as done by __perf_pending_irq(), expects a hardirq
context and disabled interrupts. This requirement is not fulfilled on
PREEMPT_RT.
Split functionality based on perf_event::pending_disable into irq_work
named `pending_disable_irq' and invoke it in hardirq context on
PREEMPT_RT. Rename the split out callback to perf_pending_disable().
Tested-by: Marco Elver <elver@...gle.com>
Tested-by: Arnaldo Carvalho de Melo <acme@...hat.com>
Reported-by: Arnaldo Carvalho de Melo <acme@...hat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
include/linux/perf_event.h | 1 +
kernel/events/core.c | 31 +++++++++++++++++++++++--------
2 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 99a7ea1d29ed5..65ece0d5b4b6d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -783,6 +783,7 @@ struct perf_event {
unsigned int pending_disable;
unsigned long pending_addr; /* SIGTRAP */
struct irq_work pending_irq;
+ struct irq_work pending_disable_irq;
struct callback_head pending_task;
unsigned int pending_work;
struct rcuwait pending_work_wait;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f75aa9f14c979..8bba63ea9c686 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2451,7 +2451,7 @@ static void __perf_event_disable(struct perf_event *event,
* hold the top-level event's child_mutex, so any descendant that
* goes to exit will block in perf_event_exit_event().
*
- * When called from perf_pending_irq it's OK because event->ctx
+ * When called from perf_pending_disable it's OK because event->ctx
* is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context.
*/
@@ -2491,7 +2491,7 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
void perf_event_disable_inatomic(struct perf_event *event)
{
event->pending_disable = 1;
- irq_work_queue(&event->pending_irq);
+ irq_work_queue(&event->pending_disable_irq);
}
#define MAX_INTERRUPTS (~0ULL)
@@ -5218,6 +5218,7 @@ static void perf_pending_task_sync(struct perf_event *event)
static void _free_event(struct perf_event *event)
{
irq_work_sync(&event->pending_irq);
+ irq_work_sync(&event->pending_disable_irq);
perf_pending_task_sync(event);
unaccount_event(event);
@@ -6760,7 +6761,7 @@ static void perf_sigtrap(struct perf_event *event)
/*
* Deliver the pending work in-event-context or follow the context.
*/
-static void __perf_pending_irq(struct perf_event *event)
+static void __perf_pending_disable(struct perf_event *event)
{
int cpu = READ_ONCE(event->oncpu);
@@ -6798,11 +6799,26 @@ static void __perf_pending_irq(struct perf_event *event)
* irq_work_queue(); // FAILS
*
* irq_work_run()
- * perf_pending_irq()
+ * perf_pending_disable()
*
* But the event runs on CPU-B and wants disabling there.
*/
- irq_work_queue_on(&event->pending_irq, cpu);
+ irq_work_queue_on(&event->pending_disable_irq, cpu);
+}
+
+static void perf_pending_disable(struct irq_work *entry)
+{
+ struct perf_event *event = container_of(entry, struct perf_event, pending_disable_irq);
+ int rctx;
+
+ /*
+ * If we 'fail' here, that's OK, it means recursion is already disabled
+ * and we won't recurse 'further'.
+ */
+ rctx = perf_swevent_get_recursion_context();
+ __perf_pending_disable(event);
+ if (rctx >= 0)
+ perf_swevent_put_recursion_context(rctx);
}
static void perf_pending_irq(struct irq_work *entry)
@@ -6825,8 +6841,6 @@ static void perf_pending_irq(struct irq_work *entry)
perf_event_wakeup(event);
}
- __perf_pending_irq(event);
-
if (rctx >= 0)
perf_swevent_put_recursion_context(rctx);
}
@@ -9734,7 +9748,7 @@ static int __perf_event_overflow(struct perf_event *event,
* is processed.
*/
if (in_nmi())
- irq_work_queue(&event->pending_irq);
+ irq_work_queue(&event->pending_disable_irq);
} else if (event->attr.exclude_kernel && valid_sample) {
/*
@@ -11972,6 +11986,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
init_waitqueue_head(&event->waitq);
init_irq_work(&event->pending_irq, perf_pending_irq);
+ event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
init_task_work(&event->pending_task, perf_pending_task);
rcuwait_init(&event->pending_work_wait);
--
2.45.2
Powered by blists - more mailing lists