[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ad5fb696fbcd276c0902dbb94baa75fb79a6e1e2.1737511963.git.jpoimboe@kernel.org>
Date: Tue, 21 Jan 2025 18:30:54 -0800
From: Josh Poimboeuf <jpoimboe@...nel.org>
To: x86@...nel.org
Cc: Peter Zijlstra <peterz@...radead.org>,
Steven Rostedt <rostedt@...dmis.org>,
Ingo Molnar <mingo@...nel.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
linux-kernel@...r.kernel.org,
Indu Bhagat <indu.bhagat@...cle.com>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>,
linux-perf-users@...r.kernel.org,
Mark Brown <broonie@...nel.org>,
linux-toolchains@...r.kernel.org,
Jordan Rome <jordalgo@...a.com>,
Sam James <sam@...too.org>,
linux-trace-kernel@...r.kernel.org,
Andrii Nakryiko <andrii.nakryiko@...il.com>,
Jens Remus <jremus@...ux.ibm.com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Florian Weimer <fweimer@...hat.com>,
Andy Lutomirski <luto@...nel.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Weinan Liu <wnliu@...gle.com>
Subject: [PATCH v4 02/39] task_work: Fix TWA_NMI_CURRENT race with __schedule()
If TWA_NMI_CURRENT task work is queued from an NMI triggered while
running in __schedule() with IRQs disabled, task_work_set_notify_irq()
ends up inadvertently running on the next scheduled task. So the
original task doesn't get its TIF_NOTIFY_RESUME flag set and the task
work may get delayed indefinitely, or may not get to run at all.
__schedule()
// disable irqs
<NMI>
task_work_add(current, work, TWA_NMI_CURRENT);
</NMI>
// current = next;
// enable irqs
<IRQ>
task_work_set_notify_irq()
test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); // wrong task!
</IRQ>
// original task skips task work on its next return to user (or exit!)
Fix it by storing the task pointer along with the irq_work struct and
passing that task to set_notify_resume().
Fixes: 466e4d801cd4 ("task_work: Add TWA_NMI_CURRENT as an additional notify mode.")
Signed-off-by: Josh Poimboeuf <jpoimboe@...nel.org>
---
kernel/task_work.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 92024a8bfe12..f17447f69843 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -7,12 +7,23 @@
static struct callback_head work_exited; /* all we need is ->next == NULL */
#ifdef CONFIG_IRQ_WORK
+
+struct nmi_irq_work {
+ struct irq_work work;
+ struct task_struct *task;
+};
+
static void task_work_set_notify_irq(struct irq_work *entry)
{
- test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+ struct nmi_irq_work *work = container_of(entry, struct nmi_irq_work, work);
+
+ set_notify_resume(work->task);
}
-static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) =
- IRQ_WORK_INIT_HARD(task_work_set_notify_irq);
+
+static DEFINE_PER_CPU(struct nmi_irq_work, nmi_irq_work) = {
+ .work = IRQ_WORK_INIT_HARD(task_work_set_notify_irq),
+};
+
#endif
/**
@@ -65,15 +76,21 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
if (!IS_ENABLED(CONFIG_IRQ_WORK))
return -EINVAL;
#ifdef CONFIG_IRQ_WORK
+{
+ struct nmi_irq_work *irq_work = this_cpu_ptr(&nmi_irq_work);
+
head = task->task_works;
if (unlikely(head == &work_exited))
return -ESRCH;
- if (!irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)))
+ if (!irq_work_queue(&irq_work->work))
return -EBUSY;
+ irq_work->task = current;
+
work->next = head;
task->task_works = work;
+}
#endif
return 0;
}
@@ -109,11 +126,6 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
case TWA_SIGNAL_NO_IPI:
__set_notify_signal(task);
break;
-#ifdef CONFIG_IRQ_WORK
- case TWA_NMI_CURRENT:
- irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume));
- break;
-#endif
default:
WARN_ON_ONCE(1);
break;
--
2.48.1
Powered by blists - more mailing lists