linux-kernel - Re: [perf] unchecked MSR access error: WRMSR to 0x3f1

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ef72c8cd-5bed-44ce-b950-d64dc1c22baa@linux.intel.com>
Date: Fri, 20 Jun 2025 07:07:24 -0400
From: "Liang, Kan" <kan.liang@...ux.intel.com>
To: Vince Weaver <vincent.weaver@...ne.edu>
Cc: linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
 Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
 Arnaldo Carvalho de Melo <acme@...nel.org>,
 Namhyung Kim <namhyung@...nel.org>, Mark Rutland <mark.rutland@....com>,
 Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
 Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
 Adrian Hunter <adrian.hunter@...el.com>
Subject: Re: [perf] unchecked MSR access error: WRMSR to 0x3f1

Hi Vince,

On 2025-06-19 4:10 p.m., Vince Weaver wrote:
> On Thu, 19 Jun 2025, Liang, Kan wrote:
> 
>>
>>
>> On 2025-06-19 11:17 a.m., Vince Weaver wrote:
>>> On Wed, 18 Jun 2025, Vince Weaver wrote:
>>>
>>>> On Wed, 18 Jun 2025, Liang, Kan wrote:
>>>>
>>>>> No, the error message doesn't say it. Just want to check if you have
>>>>> extra information. Because the Topdown perf metrics is only supported on
>>>>> p-core. I want to understand whether the code messes up with e-core.
>>>>
>>>> I can't easily tell from the fuzzer as it intentionally switches cores 
>>>> often.  I guess I could patch the kernel to report CPU when the WRMSR 
>>>> error triggers.
>>>
>>> I've patched the kernel to get rid of the warn_once() and added a printk
>>> for smp_processor_id()  (is that what I want to print?)  In any case that 
>>> reports the warning is happening on CPU1 which is actually a P core, not 
>>> an atom core.
>>
>> Thanks for the confirmation.
>> I've tried fuzzer in some newer machines (later than raptor-lake), but I
>> haven't reproduce it yet. I will try to find a raptor-lake for more tests.
> 
> I've managed to use the perf_fuzzer tools to create a small reproducible 
> test case that can trigger the bug.  It's included below.

Thanks very much for the reproducer! The issue has been root-caused.
I've sent a patch to fix it. Please give it a try.
https://lore.kernel.org/lkml/20250620110406.3782402-1-kan.liang@linux.intel.com/

Thanks,
Kan>
> Vince
> 
> ---
> 
> 
> /* WRMSR top-down reproducer */
> /* by Vince Weaver <vincent.weaver _at_ maine.edu> */
> 
> #define _GNU_SOURCE 1
> #include <stdio.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <string.h>
> #include <signal.h>
> #include <sys/mman.h>
> #include <sys/syscall.h>
> #include <sys/ioctl.h>
> #include <sys/prctl.h>
> #include <sys/wait.h>
> #include <poll.h>
> #include <linux/hw_breakpoint.h>
> #include <linux/perf_event.h>
> #include <sched.h>
> 
> static int fd[1024];
> static struct perf_event_attr pe[1024];
> 
> FILE *fff;
> static int result;
> 
> int perf_event_open(struct perf_event_attr *hw_event_uptr,
> 	pid_t pid, int cpu, int group_fd, unsigned long flags) {
> 
> 	return syscall(__NR_perf_event_open,hw_event_uptr, pid, cpu,
> 		group_fd, flags);
> }
> 
> int main(int argc, char **argv) {
> 
> 	int i;
> 	for(i=0;i<1024;i++) fd[i]=-1;
> 
> /* 1 */
> /* fd = 72 */
> 
> 	memset(&pe[72],0,sizeof(struct perf_event_attr));
> 	pe[72].type=PERF_TYPE_RAW;
> 	pe[72].config=0xffff880000008000ULL;
> 	pe[72].sample_freq=0x49ULL;
> 	pe[72].sample_type=PERF_SAMPLE_TID|PERF_SAMPLE_ADDR|PERF_SAMPLE_READ|PERF_SAMPLE_CPU; /* 9a */
> 	pe[72].read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP|0x10ULL; /* 1c */
> 	pe[72].exclude_user=1;
> 	pe[72].exclude_kernel=1;
> 	pe[72].mmap=1;
> 	pe[72].comm=1;
> 	pe[72].freq=1;
> 	pe[72].enable_on_exec=1;
> 	pe[72].watermark=1;
> 	pe[72].precise_ip=1; /* constant skid */
> 	pe[72].sample_id_all=1;
> 	pe[72].exclude_callchain_user=1;
> 	pe[72].comm_exec=1;
> 	pe[72].wakeup_watermark=-1970634752;
> 	pe[72].bp_type=HW_BREAKPOINT_R|HW_BREAKPOINT_W; /*3*/
> 	pe[72].bp_addr=0x0ULL;
> 	pe[72].bp_len=0x2ULL;
> 	pe[72].branch_sample_type=PERF_SAMPLE_BRANCH_HV|PERF_SAMPLE_BRANCH_ANY|PERF_SAMPLE_BRANCH_ANY_CALL|PERF_SAMPLE_BRANCH_ANY_RETURN|PERF_SAMPLE_BRANCH_IND_JUMP|PERF_SAMPLE_BRANCH_ABORT_TX|PERF_SAMPLE_BRANCH_COND|0xbcbcbca800ULL;
> 	pe[72].sample_regs_user=4294967253ULL;
> 	pe[72].sample_stack_user=0x23008000;
> 
> 	fd[72]=perf_event_open(&pe[72],
> 				0, /* current thread */
> 				1, /* Only cpu 1 */
> 				fd[114], /* 114 is group leader */
> 				PERF_FLAG_FD_NO_GROUP /*1*/ );
> 
> 
> /* 2 */
> 	prctl(PR_TASK_PERF_EVENTS_DISABLE);
> /* 3 */
> // a 0 1 1
> // which=0,num=1,cpi=1
> 
> #define MAX_CPUS 1024
> 
> 	pid_t pid=0;    /* current thread */
>         static cpu_set_t *cpu_mask;
>         int max_cpus=MAX_CPUS;
>         size_t set_size;
> 
> 	cpu_mask=CPU_ALLOC(max_cpus);
> 	set_size=CPU_ALLOC_SIZE(max_cpus);
> 
> 
> 	CPU_ZERO_S(set_size,cpu_mask);
> 	CPU_SET_S(1,set_size,cpu_mask);
> 
> 	result=sched_setaffinity(pid,max_cpus,cpu_mask);
> 
> /* 4 */
> 	prctl(PR_TASK_PERF_EVENTS_ENABLE);
> /* 5 */
> /* fd = 38 */
> 
> 	memset(&pe[38],0,sizeof(struct perf_event_attr));
> 	pe[38].type=PERF_TYPE_HARDWARE;
> 	pe[38].size=112;
> 	pe[38].config=PERF_COUNT_HW_BRANCH_MISSES;
> 	pe[38].sample_type=0; /* 0 */
> 	pe[38].read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP|0x10ULL; /* 1c */
> 	pe[38].disabled=1;
> 	pe[38].precise_ip=0; /* arbitrary skid */
> 	pe[38].wakeup_events=0;
> 	pe[38].bp_type=HW_BREAKPOINT_EMPTY;
> 
> 	fd[38]=perf_event_open(&pe[38],
> 				getpid(), /* current thread */
> 				22, /* Only cpu 22 */
> 				-1, /* New Group Leader */
> 				PERF_FLAG_FD_NO_GROUP /*1*/ );
> 
> 
> 
> 
> 	/* Replayed 4 syscalls */
> 	return 0;
> }
>