lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0c546386-dde8-4aac-b499-9c18221ad981@linux.intel.com>
Date: Tue, 12 Aug 2025 10:33:23 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: "Liang, Kan" <kan.liang@...ux.intel.com>,
 Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
 Arnaldo Carvalho de Melo <acme@...nel.org>,
 Namhyung Kim <namhyung@...nel.org>, Ian Rogers <irogers@...gle.com>,
 Adrian Hunter <adrian.hunter@...el.com>,
 Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
 Andi Kleen <ak@...ux.intel.com>, Eranian Stephane <eranian@...gle.com>
Cc: linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
 Dapeng Mi <dapeng1.mi@...el.com>, kernel test robot <oliver.sang@...el.com>
Subject: Re: [Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists
 before accessing it


On 8/12/2025 7:32 AM, Liang, Kan wrote:
>
> On 2025-08-11 2:00 a.m., Dapeng Mi wrote:
>> The PMI handler could disable some events as the interrupt throttling
>> and clear the corresponding items in cpuc->events[] array.
>>
>> perf_event_overflow()
>>   -> __perf_event_overflow()
>>     ->__perf_event_account_interrupt()
>>       -> perf_event_throttle_group()
>>         -> perf_event_throttle()
>>           -> event->pmu->stop()
>>             -> x86_pmu_stop()
>>
>> Moreover PMI is NMI on x86 platform and it could interrupt other perf
>> code like setup_pebs_adaptive_sample_data(). 
> The PMU is disabled when draining the PEBS records. I don't think a PMI
> can be triggered in the setup_pebs_adaptive_sample_data().

Besides in NMI handler, the drain_pebs helper intel_pmu_drain_pebs_buffer()
could be called in many places, like context switch and PEBS event
disabling. All these places could be interrupted by the NMI handler, and
then the trigger this NULL pointer access issue.


>
>> So once PMI handling
>> finishes and returns into setup_pebs_adaptive_sample_data() and it could
>> find the cpuc->events[*] becomes NULL and accessing this NULL pointer
>> triggers an invalid memory access and leads to kernel crashes eventually.
> The commit 9734e25fbf5a stops all events in a group when processing the
> last records of the leader event. For large PEBS, it's possible that
> there are still some records for member events left. It should be the
> root cause of the NULL pointer. If so, we should drain those records as
> well.

The left PEBS record would always be cleared by
intel_pmu_drain_large_pebs() when disabling PEBS event.


>
> Thanks,
> Kan>
>> Thus add NULL check before accessing cpuc->events[*] pointer.
>>
>> Reported-by: kernel test robot <oliver.sang@...el.com>
>> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
>> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
>> Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
>> Tested-by: kernel test robot <oliver.sang@...el.com>
>> ---
>>  arch/x86/events/core.c       |  3 +++
>>  arch/x86/events/intel/core.c |  6 +++++-
>>  arch/x86/events/intel/ds.c   | 13 ++++++-------
>>  3 files changed, 14 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>> index 7610f26dfbd9..f0a3bc57157d 100644
>> --- a/arch/x86/events/core.c
>> +++ b/arch/x86/events/core.c
>> @@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>>  			continue;
>>  
>>  		event = cpuc->events[idx];
>> +		if (!event)
>> +			continue;
>> +
>>  		last_period = event->hw.last_period;
>>  
>>  		val = static_call(x86_pmu_update)(event);
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 15da60cf69f2..386717b75a09 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
>>  		if (!is_topdown_idx(idx))
>>  			continue;
>>  		other = cpuc->events[idx];
>> +		if (!other)
>> +			continue;
>>  		other->hw.saved_slots = slots;
>>  		other->hw.saved_metric = metrics;
>>  	}
>> @@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
>>  		if (!is_topdown_idx(idx))
>>  			continue;
>>  		other = cpuc->events[idx];
>> +		if (!other)
>> +			continue;
>>  		__icl_update_topdown_event(other, slots, metrics,
>>  					   event ? event->hw.saved_slots : 0,
>>  					   event ? event->hw.saved_metric : 0);
>> @@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>>  
>>  	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
>>  		event = cpuc->events[bit];
>> -		if (!event->attr.precise_ip)
>> +		if (!event || !event->attr.precise_ip)
>>  			continue;
>>  
>>  		perf_sample_data_init(data, 0, event->hw.last_period);
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index c0b7ac1c7594..b23c49e2e06f 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>>  	 */
>>  	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
>>  		event = cpuc->events[bit];
>> +		if (!event)
>> +			continue;
>>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>>  			intel_pmu_save_and_restart_reload(event, 0);
>>  	}
>> @@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  			continue;
>>  
>>  		event = cpuc->events[bit];
>> -		if (WARN_ON_ONCE(!event))
>> -			continue;
>> -
>> -		if (WARN_ON_ONCE(!event->attr.precise_ip))
>> +		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>  			continue;
>>  
>>  		/* log dropped samples number */
>> @@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
>>  		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
>>  			event = cpuc->events[bit];
>> -
>> -			if (WARN_ON_ONCE(!event) ||
>> -			    WARN_ON_ONCE(!event->attr.precise_ip))
>> +			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>  				continue;
>>  
>>  			if (counts[bit]++) {
>> @@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  			continue;
>>  
>>  		event = cpuc->events[bit];
>> +		if (!event)
>> +			continue;
>>  
>>  		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
>>  					    counts[bit], setup_pebs_adaptive_sample_data);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ