lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <431a5acb-4906-4095-8dec-b2d824adaac6@arm.com>
Date: Tue, 11 Mar 2025 17:22:16 +0000
From: Mark Barnett <mark.barnett@....com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: mingo@...hat.com, acme@...nel.org, namhyung@...nel.org,
 irogers@...gle.com, ben.gainey@....com, deepak.surti@....com,
 ak@...ux.intel.com, will@...nel.org, james.clark@....com,
 mark.rutland@....com, alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
 adrian.hunter@...el.com, linux-perf-users@...r.kernel.org,
 linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org
Subject: Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the
 alternate sampling period

On 3/11/25 11:31, Peter Zijlstra wrote:
> On Fri, Mar 07, 2025 at 08:22:45PM +0000, mark.barnett@....com wrote:
>> From: Ben Gainey <ben.gainey@....com>
>>
>> This change modifies the core perf overflow handler, adding some small
>> random jitter to each sample period whenever an event switches between the
>> two alternate sample periods. A new flag is added to perf_event_attr to
>> opt into this behaviour.
>>
>> This change follows the discussion in [1], where it is recognized that it
>> may be possible for certain patterns of execution to end up with biased
>> results.
>>
>> [1] https://lore.kernel.org/linux-perf-users/Zc24eLqZycmIg3d2@tassilo/
>>
>> Signed-off-by: Ben Gainey <ben.gainey@....com>
>> Signed-off-by: Mark Barnett <mark.barnett@....com>
>> ---
>>   include/uapi/linux/perf_event.h | 7 ++++++-
>>   kernel/events/core.c            | 9 ++++++++-
>>   2 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index 499a8673df8e..c0076ce8f80a 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -461,7 +461,12 @@ struct perf_event_attr {
>>   				inherit_thread :  1, /* children only inherit if cloned with CLONE_THREAD */
>>   				remove_on_exec :  1, /* event is removed from task on exec */
>>   				sigtrap        :  1, /* send synchronous SIGTRAP on event */
>> -				__reserved_1   : 26;
>> +				/*
>> +				 * Add a limited amount of jitter on each alternate period, where
>> +				 * the jitter is between [0, (2<<jitter_alt_period) - 1]
>> +				 */
>> +				jitter_alt_period : 3,
>> +				__reserved_1   : 23;
> 
> So; I've been thinking about this interface.
> 
> I think I prefer you keep the existing sample_period/sample_freq working
> as is and simply modulate with random and high-freq components.
> 
> A very little like so..
> 
> I've made the hf_sample_period 32bit since I figured that ought to be
> enough -- you're aiming at very short periods after all. But there's
> enough unused bits left.
> 
> So this has sample_period or sample_freq compute hwc->sample_period_base
> which is first modified with random such that the average is exactly
> sample_period_base (assuming a flat distribution).
> 
> This means that sample_period_base is still the right number to use for
> computing freq based things. Additionally, have the 'extra' interrupt
> ignored for adaptive period crud.
> 
> Also, someone needs to consider the eBPF hook and what to do with it.
> I've kept the ordering as per this series, but I suspect it's wrong and
> we want this before the BPF hook. Please think about this and explicitly
> mention this in the next series.
> 
> Anyway, very much a sketch of things, incomplete and not been near a
> compiler.
> 
> 

Thanks, Peter!

OK, I see what you mean. Packing the fields into hf_sample makes sense. 
I'll have a look at the eBPF hook and see if we need to do anything. The 
sample period is always stored in perf_sample_data so it's technically 
possible for eBPF programs to identify the high-frequency ones, but it's 
not a great API. Maybe we should have an explicit flag.

I have one question about interrupt accounting, below...

> 
> ---
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 76f4265efee9..c5dd6442e96f 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -229,6 +229,10 @@ struct hw_perf_event {
>   #define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
>   #define PERF_HES_ARCH		0x04
>   
> +#define PERF_HES_HF_ON		0x10 /* using high-fred sampling */
> +#define PERF_HES_HF_SAMPLE	0x20
> +#define PERF_HES_HF_RAND	0x40
> +
>   	int				state;
>   
>   	/*
> @@ -241,6 +245,7 @@ struct hw_perf_event {
>   	 * The period to start the next sample with.
>   	 */
>   	u64				sample_period;
> +	u64				sample_period_base;
>   
>   	union {
>   		struct { /* Sampling */
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 0524d541d4e3..8dbe027f93f1 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -379,6 +379,7 @@ enum perf_event_read_format {
>   #define PERF_ATTR_SIZE_VER6	120	/* add: aux_sample_size */
>   #define PERF_ATTR_SIZE_VER7	128	/* add: sig_data */
>   #define PERF_ATTR_SIZE_VER8	136	/* add: config3 */
> +#define PERF_ATTR_SIZE_VER9	144	/* add: hf_sample */
>   
>   /*
>    * Hardware event_id to monitor via a performance monitoring event:
> @@ -531,6 +532,14 @@ struct perf_event_attr {
>   	__u64	sig_data;
>   
>   	__u64	config3; /* extension of config2 */
> +	union {
> +		__u64 hf_sample;
> +		struct {
> +			__u64 hf_sample_period : 32,
> +			      hf_sample_rand   :  4,
> +			      __reserved_4     : 28;
> +		};
> +	};
>   };
>   
>   /*
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index b87a5ac42ce2..e5a93edf3b5f 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -8,6 +8,7 @@
>    *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@....ibm.com>
>    */
>   
> +#include "linux/random.h"
>   #include <linux/fs.h>
>   #include <linux/mm.h>
>   #include <linux/cpu.h>
> @@ -55,6 +56,7 @@
>   #include <linux/pgtable.h>
>   #include <linux/buildid.h>
>   #include <linux/task_work.h>
> +#include <linux/prandom.h>
>   
>   #include "internal.h"
>   
> @@ -443,6 +445,8 @@ static cpumask_var_t perf_online_pkg_mask;
>   static cpumask_var_t perf_online_sys_mask;
>   static struct kmem_cache *perf_event_cache;
>   
> +static struct rnd_state perf_rand;
> +
>   /*
>    * perf event paranoia level:
>    *  -1 - not paranoid at all
> @@ -4233,19 +4237,19 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
>   
>   	period = perf_calculate_period(event, nsec, count);
>   
> -	delta = (s64)(period - hwc->sample_period);
> +	delta = (s64)(period - hwc->sample_period_base);
>   	if (delta >= 0)
>   		delta += 7;
>   	else
>   		delta -= 7;
>   	delta /= 8; /* low pass filter */
>   
> -	sample_period = hwc->sample_period + delta;
> +	sample_period = hwc->sample_period_base + delta;
>   
>   	if (!sample_period)
>   		sample_period = 1;
>   
> -	hwc->sample_period = sample_period;
> +	hwc->sample_period_base = sample_period;
>   
>   	if (local64_read(&hwc->period_left) > 8*sample_period) {
>   		if (disable)
> @@ -4490,6 +4494,8 @@ void perf_event_task_tick(void)
>   	if (ctx)
>   		perf_adjust_freq_unthr_context(ctx, !!throttled);
>   	rcu_read_unlock();
> +
> +	prandom_seed_state(&perf_rand, get_random_u64());
>   }
>   
>   static int event_enable_on_exec(struct perf_event *event,
> @@ -9979,6 +9985,8 @@ static int __perf_event_overflow(struct perf_event *event,
>   				 int throttle, struct perf_sample_data *data,
>   				 struct pt_regs *regs)
>   {
> +	struct hw_perf_event *hwc = &event->hw;
> +	u64 sample_period;
>   	int events = atomic_read(&event->event_limit);
>   	int ret = 0;
>   
> @@ -9989,15 +9997,50 @@ static int __perf_event_overflow(struct perf_event *event,
>   	if (unlikely(!is_sampling_event(event)))
>   		return 0;
>   
> -	ret = __perf_event_account_interrupt(event, throttle);
> +	/*
> +	 * High Freq samples are injected inside the larger period:
> +	 *
> +	 *   |------------|-|------------|-|
> +	 *   P0          HF P1          HF
> +	 *
> +	 * By ignoring the HF samples, we measure the actual period.
> +	 */
> +	if (!(hwc->state & PERF_HES_HF_SAMPLE))
> +		ret = __perf_event_account_interrupt(event, throttle);
>   

The high-frequency samples should still contribute to interrupt 
accounting/throttling, right? We'd just need to put guards around the 
adaptive period stuff so that HF samples don't contribute to the 
frequency training.

>   	if (event->attr.aux_pause)
>   		perf_event_aux_pause(event->aux_event, true);
>   
> +	/* XXX interaction between HF samples and BPF */
>   	if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
>   	    !bpf_overflow_handler(event, data, regs))
>   		goto out;
>   
> +	sample_period = hwc->sample_period_base;
> +	if (hwc->state & PERF_HES_HF_RAND) {
> +		u64 rand = 1 << event->attr.hf_sample_rand;
> +		sample_period -= rand / 2;
> +		sample_period += prandom_u32_state(&perf_rand) & (rand - 1);
> +	}
> +	if (hwc->state & PERF_HES_HF_ON) {
> +		u64 hf_sample_period = event->attr.hf_sample_period;
> +
> +		if (sample_period < hf_sample_period) {
> +			hwc->state &= ~PERF_HES_HF_ON;
> +			goto set_period;
> +		}
> +
> +		if (!(hwc->state & PERF_HES_HF_SAMPLE)) {
> +			hwc->sample_period -= hf_sample_period;
> +			hwc->state |= PERF_HES_HF_SAMPLE;
> +		} else {
> +			hwc->sample_period = hf_sample_period;
> +			hwc->state &= ~PERF_HES_HF_SAMPLE;
> +		}
> +	}
> +set_period:
> +	hwc->sample_period = sample_period;
> +
>   	/*
>   	 * XXX event_limit might not quite work as expected on inherited
>   	 * events
> @@ -12458,8 +12501,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
>   
>   	hwc = &event->hw;
>   	hwc->sample_period = attr->sample_period;
> -	if (attr->freq && attr->sample_freq)
> +	hwc->sample_period_base = attr->sample_period;
> +	if (attr->freq && attr->sample_freq) {
>   		hwc->sample_period = 1;
> +		hwc->sample_period_base = 1;
> +	}
>   	hwc->last_period = hwc->sample_period;
>   
>   	local64_set(&hwc->period_left, hwc->sample_period);
> @@ -13824,6 +13870,7 @@ inherit_event(struct perf_event *parent_event,
>   		struct hw_perf_event *hwc = &child_event->hw;
>   
>   		hwc->sample_period = sample_period;
> +		hwc->sample_period_base = sample_period;
>   		hwc->last_period   = sample_period;
>   
>   		local64_set(&hwc->period_left, sample_period);
> @@ -14279,6 +14326,8 @@ void __init perf_event_init(void)
>   {
>   	int ret;
>   
> +	prandom_seed_state(&perf_rand, get_random_u64());
> +
>   	idr_init(&pmu_idr);
>   
>   	perf_event_init_all_cpus();
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ