[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240704170424.1466941-4-bigeasy@linutronix.de>
Date: Thu, 4 Jul 2024 19:03:37 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: Adrian Hunter <adrian.hunter@...el.com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Frederic Weisbecker <frederic@...nel.org>,
Ian Rogers <irogers@...gle.com>,
Ingo Molnar <mingo@...hat.com>,
Jiri Olsa <jolsa@...nel.org>,
Kan Liang <kan.liang@...ux.intel.com>,
Marco Elver <elver@...gle.com>,
Mark Rutland <mark.rutland@....com>,
Namhyung Kim <namhyung@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Arnaldo Carvalho de Melo <acme@...hat.com>
Subject: [PATCH v5 3/7] perf: Enqueue SIGTRAP always via task_work.
A signal is delivered by raising irq_work() which works from any context
including NMI. irq_work() can be delayed if the architecture does not
provide an interrupt vector. In order not to lose a signal, the signal
is injected via task_work during event_sched_out().
Instead going via irq_work, the signal could be added directly via
task_work. The signal is sent to current and can be enqueued on its
return path to userland.
Queue signal via task_work and consider possible NMI context. Remove
perf_event::pending_sigtrap and and use perf_event::pending_work
instead.
Tested-by: Marco Elver <elver@...gle.com>
Tested-by: Arnaldo Carvalho de Melo <acme@...hat.com>
Reported-by: Arnaldo Carvalho de Melo <acme@...hat.com>
Link: https://lore.kernel.org/all/ZMAtZ2t43GXoF6tM@kernel.org/
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
include/linux/perf_event.h | 3 +--
kernel/events/core.c | 31 ++++++++++---------------------
2 files changed, 11 insertions(+), 23 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 393fb13733b02..ea0d82418d854 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -781,7 +781,6 @@ struct perf_event {
unsigned int pending_wakeup;
unsigned int pending_kill;
unsigned int pending_disable;
- unsigned int pending_sigtrap;
unsigned long pending_addr; /* SIGTRAP */
struct irq_work pending_irq;
struct callback_head pending_task;
@@ -963,7 +962,7 @@ struct perf_event_context {
struct rcu_head rcu_head;
/*
- * Sum (event->pending_sigtrap + event->pending_work)
+ * Sum (event->pending_work + event->pending_work)
*
* The SIGTRAP is targeted at ctx->task, as such it won't do changing
* that until the signal is delivered.
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 647abeeaeeb02..c278aefa94e76 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2283,17 +2283,6 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
state = PERF_EVENT_STATE_OFF;
}
- if (event->pending_sigtrap) {
- event->pending_sigtrap = 0;
- if (state != PERF_EVENT_STATE_OFF &&
- !event->pending_work &&
- !task_work_add(current, &event->pending_task, TWA_RESUME)) {
- event->pending_work = 1;
- } else {
- local_dec(&event->ctx->nr_pending);
- }
- }
-
perf_event_set_state(event, state);
if (!is_software_event(event))
@@ -6787,11 +6776,6 @@ static void __perf_pending_irq(struct perf_event *event)
* Yay, we hit home and are in the context of the event.
*/
if (cpu == smp_processor_id()) {
- if (event->pending_sigtrap) {
- event->pending_sigtrap = 0;
- perf_sigtrap(event);
- local_dec(&event->ctx->nr_pending);
- }
if (event->pending_disable) {
event->pending_disable = 0;
perf_event_disable_local(event);
@@ -9732,21 +9716,26 @@ static int __perf_event_overflow(struct perf_event *event,
*/
bool valid_sample = sample_is_allowed(event, regs);
unsigned int pending_id = 1;
+ enum task_work_notify_mode notify_mode;
if (regs)
pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
- if (!event->pending_sigtrap) {
- event->pending_sigtrap = pending_id;
+
+ notify_mode = in_nmi() ? TWA_NMI_CURRENT : TWA_RESUME;
+
+ if (!event->pending_work &&
+ !task_work_add(current, &event->pending_task, notify_mode)) {
+ event->pending_work = pending_id;
local_inc(&event->ctx->nr_pending);
event->pending_addr = 0;
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
event->pending_addr = data->addr;
- irq_work_queue(&event->pending_irq);
+
} else if (event->attr.exclude_kernel && valid_sample) {
/*
* Should not be able to return to user space without
- * consuming pending_sigtrap; with exceptions:
+ * consuming pending_work; with exceptions:
*
* 1. Where !exclude_kernel, events can overflow again
* in the kernel without returning to user space.
@@ -9756,7 +9745,7 @@ static int __perf_event_overflow(struct perf_event *event,
* To approximate progress (with false negatives),
* check 32-bit hash of the current IP.
*/
- WARN_ON_ONCE(event->pending_sigtrap != pending_id);
+ WARN_ON_ONCE(event->pending_work != pending_id);
}
}
--
2.45.2
Powered by blists - more mailing lists