[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0d877e6f-41a7-4724-875d-0b0a27b8a545@roeck-us.net>
Date: Wed, 12 Nov 2025 19:11:15 -0800
From: Guenter Roeck <linux@...ck-us.net>
To: Steven Rostedt <rostedt@...nel.org>
Cc: linux-kernel@...r.kernel.org, linux-trace-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org,
Masami Hiramatsu <mhiramat@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>,
"Liang, Kan" <kan.liang@...ux.intel.com>,
Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [RESEND][PATCH 3/5] perf: Use current->flags &
PF_KTHREAD|PF_USER_WORKER instead of current->mm == NULL
Hi Steven,
On Wed, Aug 20, 2025 at 02:03:41PM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@...dmis.org>
>
> To determine if a task is a kernel thread or not, it is more reliable to
> use (current->flags & (PF_KTHREAD|PF_USER_WORKERi)) than to rely on
> current->mm being NULL. That is because some kernel tasks (io_uring
> helpers) may have a mm field.
>
> Link: https://lore.kernel.org/linux-trace-kernel/20250424163607.GE18306@noisy.programming.kicks-ass.net/
> Link: https://lore.kernel.org/all/20250624130744.602c5b5f@batman.local.home/
>
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
> ---
> kernel/events/callchain.c | 6 +++---
> kernel/events/core.c | 4 ++--
> 2 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
> index cd0e3fc7ed05..5982d18f169b 100644
> --- a/kernel/events/callchain.c
> +++ b/kernel/events/callchain.c
> @@ -246,10 +246,10 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>
> if (user && !crosstask) {
> if (!user_mode(regs)) {
> - if (current->mm)
> - regs = task_pt_regs(current);
> - else
> + if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
> regs = NULL;
> + else
> + regs = task_pt_regs(current);
> }
>
> if (regs) {
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index bade8e0fced7..f880cec0c980 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7446,7 +7446,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
> if (user_mode(regs)) {
> regs_user->abi = perf_reg_abi(current);
> regs_user->regs = regs;
> - } else if (!(current->flags & PF_KTHREAD)) {
> + } else if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> perf_get_regs_user(regs_user, regs);
> } else {
> regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
> @@ -8086,7 +8086,7 @@ static u64 perf_virt_to_phys(u64 virt)
> * Try IRQ-safe get_user_page_fast_only first.
> * If failed, leave phys_addr as 0.
> */
> - if (current->mm != NULL) {
> + if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
Subsequent code uses current->mm. This triggers a crash when running a page
table stress test. See below for details. I have seen the crash in 6.12.57
and 6.18-rc5.
Guenter
---
[ 120.334908] BUG: kernel NULL pointer dereference, address: 0000000000000078
[ 120.341901] #PF: supervisor read access in kernel mode
[ 120.347055] #PF: error_code(0x0000) - not-present page
[ 120.352208] PGD 0 P4D 0
[ 120.354750] Oops: Oops: 0000 [#1] SMP NOPTI
[ 120.358946] CPU: 36 UID: 0 PID: 14127 Comm: page_table_stre Tainted: G S O 6.18.0-smp-DEV #2 NONE
[ 120.369242] Tainted: [S]=CPU_OUT_OF_SPEC, [O]=OOT_MODULE
[ 120.374568] Hardware name: Google LLC Indus/Indus_QC_03, BIOS 30.116.4 08/29/2025
[ 120.382075] RIP: 0010:gup_fast_fallback+0x150/0xb60
[ 120.386977] Code: d0 c9 8b 48 89 84 24 a0 00 00 00 48 8b 80 30 05 00 00 0f b6 0d 0d 6b 1a 01 49 89 f8 49 d3 e8 41 81 e0 ff 01 00 00 41 c1 e0
03 <4c> 03 40 78 4c 8d 5b ff 44 89 c8 83 e0 01 48 8d 04 45 05 00 00 00
[ 120.405809] RSP: 0018:ffffa32be5f9b7a0 EFLAGS: 00010006
[ 120.411051] RAX: 0000000000000000 RBX: 00007f0f57dfd000 RCX: 0000000000000027
[ 120.418210] RDX: 0000000000000046 RSI: 0000000000000001 RDI: 00007f0f57dfc000
[ 120.425368] RBP: 0000000000000000 R08: 00000000000007f0 R09: 0000000000100002
[ 120.432526] R10: ffffa32be5f9b8c8 R11: 0000000000000000 R12: 00007f0f57dfc6c0
[ 120.439683] R13: ffff99b44dd7c800 R14: 00000000fffffff2 R15: 00000000000800c3
[ 120.446842] FS: 0000000000000000(0000) GS:ffff9a127357b000(0000) knlGS:0000000000000000
[ 120.454956] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 120.460721] CR2: 0000000000000078 CR3: 000000512d03e006 CR4: 00000000007706f0
[ 120.467879] PKRU: 55555554
[ 120.470592] Call Trace:
[ 120.473045] <TASK>
[ 120.475152] perf_prepare_sample+0x77b/0x910
[ 120.479445] perf_event_output+0x35/0x100
[ 120.483467] intel_pmu_drain_pebs_nhm+0x570/0x750
[ 120.488198] intel_pmu_pebs_sched_task+0x74/0x80
[ 120.492839] ? __put_partials+0xd6/0x130
[ 120.496775] ? __mt_destroy+0x3f/0x80
[ 120.500451] ? put_cpu_partial+0x9b/0xc0
[ 120.504384] ? __slab_free+0x249/0x320
[ 120.508144] ? refill_obj_stock+0x120/0x1a0
[ 120.512341] ? __mt_destroy+0x3f/0x80
[ 120.516013] ? kfree+0x2ca/0x390
[ 120.519254] ? update_load_avg+0x1c8/0x7d0
[ 120.523364] ? update_entity_lag+0xf6/0x110
[ 120.527560] intel_pmu_sched_task+0x1d/0x30
[ 120.531755] perf_pmu_sched_task+0xf2/0x1a0
[ 120.535952] __perf_event_task_sched_out+0x3f/0x1f0
[ 120.540844] ? pick_next_task_fair+0x3e/0x2a0
[ 120.545214] __schedule+0xad0/0xb40
[ 120.548715] do_task_dead+0x48/0xa0
[ 120.552215] do_exit+0x734/0x920
[ 120.555463] ? do_exit+0x9/0x920
[ 120.558699] do_group_exit+0x85/0x90
[ 120.562284] __x64_sys_exit_group+0x17/0x20
[ 120.566478] x64_sys_call+0x21f7/0x2200
[ 120.570327] do_syscall_64+0x6f/0x940
[ 120.574001] ? clear_bhb_loop+0x50/0xa0
[ 120.577849] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 120.582915] RIP: 0033:0x7f0f5a0d2c48
[ 120.586501] Code: Unable to access opcode bytes at 0x7f0f5a0d2c1e.
[ 120.592700] RSP: 002b:00007f0f57dfcec8 EFLAGS: 00000207 ORIG_RAX: 00000000000000e7
[ 120.600294] RAX: ffffffffffffffda RBX: 00007f0f57dfd700 RCX: 00007f0f5a0d2c48
[ 120.607452] RDX: 00007f0f57dfd660 RSI: 0000000000000000 RDI: 0000000000000000
[ 120.614607] RBP: 00007f0f57dfcef0 R08: 00007f0f57dfd700 R09: 00007f0f57dfd700
[ 120.621765] R10: 00007f0f5a17a6c0 R11: 0000000000000207 R12: 00007f0f57dfd9d0
[ 120.628923] R13: 00007ffc64840aa6 R14: 00007f0f57dfdd1c R15: 00007f0f57dfcfc0
[ 120.636081] </TASK>
[ 120.638272] Modules linked in: vfat fat i2c_mux_pca954x i2c_mux spidev cdc_acm xhci_pci xhci_hcd gq(O) sha3_generic
[ 120.649976] gsmi: Log Shutdown Reason 0x03
[ 120.654086] CR2: 0000000000000078
[ 120.657409] ---[ end trace 0000000000000000 ]---
Stack decode:
[ 120.334908] BUG: kernel NULL pointer dereference, address: 0000000000000078
[ 120.341901] #PF: supervisor read access in kernel mode
[ 120.347055] #PF: error_code(0x0000) - not-present page
[ 120.352208] PGD 0 P4D 0
[ 120.354750] Oops: Oops: 0000 [#1] SMP NOPTI
[ 120.358946] CPU: 36 UID: 0 PID: 14127 Comm: page_table_stre Tainted: G S O 6.18.0-smp-DEV #2 NONE
[ 120.369242] Tainted: [S]=CPU_OUT_OF_SPEC, [O]=OOT_MODULE
[ 120.374568] Hardware name: Google LLC Indus/Indus_QC_03, BIOS 30.116.4 08/29/2025
[ 120.382075] RIP: 0010:gup_fast_fallback (./include/linux/pgtable.h:140 mm/gup.c:3795 mm/gup.c:3899 mm/gup.c:3946)
[ 120.386977] Code: d0 c9 8b 48 89 84 24 a0 00 00 00 48 8b 80 30 05 00 00 0f b6 0d 0d 6b 1a 01 49 89 f8 49 d3 e8 41 81 e0 ff 01 00 00 41 c1 e0
03 <4c> 03 40 78 4c 8d 5b ff 44 89 c8 83 e0 01 48 8d 04 45 05 00 00 00
All code
========
0: d0 c9 ror $1,%cl
2: 8b 48 89 mov -0x77(%rax),%ecx
5: 84 24 a0 test %ah,(%rax,%riz,4)
8: 00 00 add %al,(%rax)
a: 00 48 8b add %cl,-0x75(%rax)
d: 80 30 05 xorb $0x5,(%rax)
10: 00 00 add %al,(%rax)
12: 0f b6 0d 0d 6b 1a 01 movzbl 0x11a6b0d(%rip),%ecx # 0x11a6b26
19: 49 89 f8 mov %rdi,%r8
1c: 49 d3 e8 shr %cl,%r8
1f: 41 81 e0 ff 01 00 00 and $0x1ff,%r8d
26: 41 c1 e0 03 shl $0x3,%r8d
2a:* 4c 03 40 78 add 0x78(%rax),%r8 <-- trapping instruction
2e: 4c 8d 5b ff lea -0x1(%rbx),%r11
32: 44 89 c8 mov %r9d,%eax
35: 83 e0 01 and $0x1,%eax
38: 48 8d 04 45 05 00 00 lea 0x5(,%rax,2),%rax
3f: 00
Code starting with the faulting instruction
===========================================
0: 4c 03 40 78 add 0x78(%rax),%r8
4: 4c 8d 5b ff lea -0x1(%rbx),%r11
8: 44 89 c8 mov %r9d,%eax
b: 83 e0 01 and $0x1,%eax
e: 48 8d 04 45 05 00 00 lea 0x5(,%rax,2),%rax
15: 00
[ 120.405809] RSP: 0018:ffffa32be5f9b7a0 EFLAGS: 00010006
[ 120.411051] RAX: 0000000000000000 RBX: 00007f0f57dfd000 RCX: 0000000000000027
[ 120.418210] RDX: 0000000000000046 RSI: 0000000000000001 RDI: 00007f0f57dfc000
[ 120.425368] RBP: 0000000000000000 R08: 00000000000007f0 R09: 0000000000100002
[ 120.432526] R10: ffffa32be5f9b8c8 R11: 0000000000000000 R12: 00007f0f57dfc6c0
[ 120.439683] R13: ffff99b44dd7c800 R14: 00000000fffffff2 R15: 00000000000800c3
[ 120.446842] FS: 0000000000000000(0000) GS:ffff9a127357b000(0000) knlGS:0000000000000000
[ 120.454956] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 120.460721] CR2: 0000000000000078 CR3: 000000512d03e006 CR4: 00000000007706f0
[ 120.467879] PKRU: 55555554
[ 120.470592] Call Trace:
[ 120.473045] <TASK>
[ 120.475152] perf_prepare_sample (kernel/events/core.c:7490 kernel/events/core.c:8302)
[ 120.479445] perf_event_output (kernel/events/core.c:8389 kernel/events/core.c:8426)
[ 120.483467] intel_pmu_drain_pebs_nhm (arch/x86/events/intel/ds.c:? arch/x86/events/intel/ds.c:2182 arch/x86/events/intel/ds.c:2372)
[ 120.488198] intel_pmu_pebs_sched_task (arch/x86/events/intel/ds.c:939 arch/x86/events/intel/ds.c:1248)
[ 120.492839] ? __put_partials (mm/slub.c:3195)
[ 120.496775] ? __mt_destroy (lib/maple_tree.c:? lib/maple_tree.c:6883)
[ 120.500451] ? put_cpu_partial (mm/slub.c:3278)
[ 120.504384] ? __slab_free (mm/slub.c:4521)
[ 120.508144] ? refill_obj_stock (./include/linux/percpu-refcount.h:335 ./include/linux/percpu-refcount.h:351 ./include/linux/memcontrol.h:988
mm/memcontrol.c:3732)
[ 120.512341] ? __mt_destroy (lib/maple_tree.c:? lib/maple_tree.c:6883)
[ 120.516013] ? kfree (mm/slab.h:681 mm/slub.c:4649 mm/slub.c:4797)
[ 120.519254] ? update_load_avg (kernel/sched/fair.c:5376 kernel/sched/fair.c:5601 kernel/sched/fair.c:5720)
[ 120.523364] ? update_entity_lag (kernel/sched/fair.c:?)
[ 120.527560] intel_pmu_sched_task (arch/x86/events/intel/core.c:5231)
[ 120.531755] perf_pmu_sched_task (kernel/events/core.c:1219 kernel/events/core.c:1231 kernel/events/core.c:3739 kernel/events/core.c:3755)
[ 120.535952] __perf_event_task_sched_out (kernel/events/core.c:3776)
[ 120.540844] ? pick_next_task_fair (kernel/sched/sched.h:4660 kernel/sched/sched.h:4666 kernel/sched/fair.c:9593 kernel/sched/fair.c:15504)
[ 120.545214] __schedule (kernel/sched/core.c:7405 kernel/sched/core.c:8080)
[ 120.548715] do_task_dead (??:?)
[ 120.552215] do_exit (./include/linux/list.h:364 kernel/exit.c:810 kernel/exit.c:1030)
[ 120.555463] ? do_exit (kernel/exit.c:934)
[ 120.558699] do_group_exit (kernel/exit.c:1161)
[ 120.562284] __x64_sys_exit_group (kernel/exit.c:1172)
[ 120.566478] x64_sys_call (arch/x86/entry/syscall_64.c:32)
[ 120.570327] do_syscall_64 (arch/x86/entry/common.c:57 arch/x86/entry/common.c:100)
[ 120.574001] ? clear_bhb_loop (arch/x86/entry/entry_64.S:1598)
Powered by blists - more mailing lists