[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20240823102816.5e55753b@gandalf.local.home>
Date: Fri, 23 Aug 2024 10:28:16 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: LKML <linux-kernel@...r.kernel.org>, Linux Trace Kernel
<linux-trace-kernel@...r.kernel.org>
Cc: Masami Hiramatsu <mhiramat@...nel.org>, Mathieu Desnoyers
<mathieu.desnoyers@...icios.com>, Tomas Glozar <tglozar@...hat.com>, John
Kacur <jkacur@...hat.com>, "Luis Claudio R. Goncalves"
<lgoncalv@...hat.com>
Subject: [PATCH] tracing/osnoise: Protect the per CPU kthread variable with
mutex
From: Steven Rostedt <rostedt@...dmis.org>
The start_kthread() and stop_thread() code was not always called with the
interface_lock held. This means that the kthread variable could be
unexpectedly changed causing the kthread_stop() to be called on it when it
should not have been, leading to:
while true; do
rtla timerlat top -u -q & PID=$!;
sleep 5;
kill -INT $PID;
sleep 0.001;
kill -TERM $PID;
wait $PID;
done
Causing the following OOPS:
Oops: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017]
CPU: 5 UID: 0 PID: 885 Comm: timerlatu/5 Not tainted 6.11.0-rc4-test-00002-gbc754cc76d1b-dirty #125 a533010b71dab205ad2f507188ce8c82203b0254
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:hrtimer_active+0x58/0x300
Code: 48 c1 ee 03 41 54 48 01 d1 48 01 d6 55 53 48 83 ec 20 80 39 00 0f 85 30 02 00 00 49 8b 6f 30 4c 8d 75 10 4c 89 f0 48 c1 e8 03 <0f> b6 3c 10 4c 89 f0 83 e0 07 83 c0 03 40 38 f8 7c 09 40 84 ff 0f
RSP: 0018:ffff88811d97f940 EFLAGS: 00010202
RAX: 0000000000000002 RBX: ffff88823c6b5b28 RCX: ffffed10478d6b6b
RDX: dffffc0000000000 RSI: ffffed10478d6b6c RDI: ffff88823c6b5b28
RBP: 0000000000000000 R08: ffff88823c6b5b58 R09: ffff88823c6b5b60
R10: ffff88811d97f957 R11: 0000000000000010 R12: 00000000000a801d
R13: ffff88810d8b35d8 R14: 0000000000000010 R15: ffff88823c6b5b28
FS: 0000000000000000(0000) GS:ffff88823c680000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000561858ad7258 CR3: 000000007729e001 CR4: 0000000000170ef0
Call Trace:
<TASK>
? die_addr+0x40/0xa0
? exc_general_protection+0x154/0x230
? asm_exc_general_protection+0x26/0x30
? hrtimer_active+0x58/0x300
? __pfx_mutex_lock+0x10/0x10
? __pfx_locks_remove_file+0x10/0x10
hrtimer_cancel+0x15/0x40
timerlat_fd_release+0x8e/0x1f0
? security_file_release+0x43/0x80
__fput+0x372/0xb10
task_work_run+0x11e/0x1f0
? _raw_spin_lock+0x85/0xe0
? __pfx_task_work_run+0x10/0x10
? poison_slab_object+0x109/0x170
? do_exit+0x7a0/0x24b0
do_exit+0x7bd/0x24b0
? __pfx_migrate_enable+0x10/0x10
? __pfx_do_exit+0x10/0x10
? __pfx_read_tsc+0x10/0x10
? ktime_get+0x64/0x140
? _raw_spin_lock_irq+0x86/0xe0
do_group_exit+0xb0/0x220
get_signal+0x17ba/0x1b50
? vfs_read+0x179/0xa40
? timerlat_fd_read+0x30b/0x9d0
? __pfx_get_signal+0x10/0x10
? __pfx_timerlat_fd_read+0x10/0x10
arch_do_signal_or_restart+0x8c/0x570
? __pfx_arch_do_signal_or_restart+0x10/0x10
? vfs_read+0x179/0xa40
? ksys_read+0xfe/0x1d0
? __pfx_ksys_read+0x10/0x10
syscall_exit_to_user_mode+0xbc/0x130
do_syscall_64+0x74/0x110
? __pfx___rseq_handle_notify_resume+0x10/0x10
? __pfx_ksys_read+0x10/0x10
? fpregs_restore_userregs+0xdb/0x1e0
? fpregs_restore_userregs+0xdb/0x1e0
? syscall_exit_to_user_mode+0x116/0x130
? do_syscall_64+0x74/0x110
? do_syscall_64+0x74/0x110
? do_syscall_64+0x74/0x110
entry_SYSCALL_64_after_hwframe+0x71/0x79
RIP: 0033:0x7ff0070eca9c
Code: Unable to access opcode bytes at 0x7ff0070eca72.
RSP: 002b:00007ff006dff8c0 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
RAX: 0000000000000000 RBX: 0000000000000005 RCX: 00007ff0070eca9c
RDX: 0000000000000400 RSI: 00007ff006dff9a0 RDI: 0000000000000003
RBP: 00007ff006dffde0 R08: 0000000000000000 R09: 00007ff000000ba0
R10: 00007ff007004b08 R11: 0000000000000246 R12: 0000000000000003
R13: 00007ff006dff9a0 R14: 0000000000000007 R15: 0000000000000008
</TASK>
Modules linked in: snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec snd_hwdep snd_hda_core
---[ end trace 0000000000000000 ]---
This is because it would mistakenly call kthread_stop() on a user space
thread making it "exit" before it actually exits.
Add the proper locking around the calls to start_kthread() and
stop_kthread().
Link: https://lore.kernel.org/all/CAP4=nvRTH5VxSO3VSDCospWcZagawTMs0L9J_kcKdGSkn7xT_Q@mail.gmail.com/
This was debugged by using the persistent ring buffer:
Link: https://lore.kernel.org/all/20240823013902.135036960@goodmis.org/
Cc: stable@...r.kernel.org
Fixes: c8895e271f799 ("trace/osnoise: Support hotplug operations")
Reported-by: Tomas Glozar <tglozar@...hat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
---
kernel/trace/trace_osnoise.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 66a871553d4a..97864f1bb227 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2106,7 +2106,9 @@ static int osnoise_cpu_init(unsigned int cpu)
*/
static int osnoise_cpu_die(unsigned int cpu)
{
+ mutex_lock(&interface_lock);
stop_kthread(cpu);
+ mutex_unlock(&interface_lock);
return 0;
}
@@ -2239,8 +2241,11 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
*/
mutex_lock(&trace_types_lock);
running = osnoise_has_registered_instances();
- if (running)
+ if (running) {
+ mutex_lock(&interface_lock);
stop_per_cpu_kthreads();
+ mutex_unlock(&interface_lock);
+ }
mutex_lock(&interface_lock);
/*
@@ -2355,8 +2360,11 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
*/
mutex_lock(&trace_types_lock);
running = osnoise_has_registered_instances();
- if (running)
+ if (running) {
+ mutex_lock(&interface_lock);
stop_per_cpu_kthreads();
+ mutex_unlock(&interface_lock);
+ }
mutex_lock(&interface_lock);
/*
@@ -2951,7 +2959,9 @@ static void osnoise_workload_stop(void)
*/
barrier();
+ mutex_lock(&interface_lock);
stop_per_cpu_kthreads();
+ mutex_unlock(&interface_lock);
osnoise_unhook_events();
}
--
2.43.0
Powered by blists - more mailing lists