linux-kernel - Re: [PATCH V10 3/3] irq: Compute the periodic interval for interrupts

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <02cc8f28-a8ec-9ac0-2f08-fb34541649d0@linaro.org>
Date:   Fri, 2 Jun 2017 10:57:41 +0200
From:   Daniel Lezcano <daniel.lezcano@...aro.org>
To:     tglx@...utronix.de
Cc:     Peter Zijlstra <peterz@...radead.org>,
        "Rafael J . Wysocki" <rafael@...nel.org>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Nicolas Pitre <nicolas.pitre@...aro.org>,
        Christoph Hellwig <hch@....de>,
        Hannes Reinecke <hare@...e.com>, Jens Axboe <axboe@...nel.dk>,
        open list <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH V10 3/3] irq: Compute the periodic interval for interrupts

On 23/05/2017 14:55, Daniel Lezcano wrote:
> On 16/05/2017 21:44, Daniel Lezcano wrote:
>> An interrupt behaves with a burst of activity with periodic interval of time
>> followed by one or two peaks of longer interval.
>>
>> As the time intervals are periodic, statistically speaking they follow a normal
>> distribution and each interrupts can be tracked individually.
>>
>> This patch does statistics on all interrupts, except the timers which are
>> deterministic by essence. The goal is to extract the periodicity for each
>> interrupt, with the last timestamp and sum them, so we have the next event.
>>
>> Taking the earliest prediction gives the expected wakeup on the system (assuming
>> a timer won't expire before).
>>
>> As stated in the previous patch, this code is not enabled in the kernel by
>> default.
>>
>> Signed-off-by: Daniel Lezcano <daniel.lezcano@...aro.org>
>> Cc: Peter Zijlstra <peterz@...radead.org>
>> Cc: Rafael J. Wysocki <rafael@...nel.org>
>> Cc: Vincent Guittot <vincent.guittot@...aro.org>
>> Cc: Nicolas Pitre <nicolas.pitre@...aro.org>
> 
> Hi all,
> 
> I do believe I have taken into account all the previous comments. Is
> there any objections for these patches?


Hi Thomas,

there is no objection on this series.

Can you consider merging them?

Thanks in advance.

  -- Daniel

>> ---
>> Changelog:
>>    V10:
>> 	- Simplified index/count computation
>>    V9:
>> 	- Deal with 48+16 bits encoded values
>> 	- Changed irq_stat => irqt_stat to prevent name collision on s390
>> 	- Changed div64 by constant IRQ_TIMINGS_SHIFT bit shift for average
>> 	- Changed div64 by constant IRQ_TIMINGS_SHIFT bit shift for variance
>> ---
>>  include/linux/interrupt.h |   1 +
>>  kernel/irq/internals.h    |  21 ++-
>>  kernel/irq/timings.c      | 336 ++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 357 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
>> index 4c9d3ca..1546ccd 100644
>> --- a/include/linux/interrupt.h
>> +++ b/include/linux/interrupt.h
>> @@ -715,6 +715,7 @@ static inline void init_irq_proc(void)
>>  #ifdef CONFIG_IRQ_TIMINGS
>>  void irq_timings_enable(void);
>>  void irq_timings_disable(void);
>> +u64 irq_timings_next_event(u64 now);
>>  #endif
>>  
>>  struct seq_file;
>> diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
>> index df51b5e0..bf3d827 100644
>> --- a/kernel/irq/internals.h
>> +++ b/kernel/irq/internals.h
>> @@ -237,18 +237,26 @@ irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { }
>>  
>>  struct irq_timings {
>>  	u64 values[IRQ_TIMINGS_SIZE]; /* our circular buffer */
>> -	unsigned int count; /* Number of interruptions since last inspection */
>> +	int count; /* Number of interruptions since last inspection */
>>  };
>>  
>>  DECLARE_PER_CPU(struct irq_timings, irq_timings);
>>  
>> +extern void irq_timings_free(int irq);
>> +extern int irq_timings_alloc(int irq);
>> +
>>  static inline void remove_timings(struct irq_desc *desc)
>>  {
>>  	desc->istate &= ~IRQS_TIMINGS;
>> +
>> +	irq_timings_free(irq_desc_get_irq(desc));
>>  }
>>  
>>  static inline void setup_timings(struct irq_desc *desc, struct irqaction *act)
>>  {
>> +	int irq = irq_desc_get_irq(desc);
>> +	int ret;
>> +
>>  	/*
>>  	 * We don't need the measurement because the idle code already
>>  	 * knows the next expiry event.
>> @@ -256,6 +264,17 @@ static inline void setup_timings(struct irq_desc *desc, struct irqaction *act)
>>  	if (act->flags & __IRQF_TIMER)
>>  		return;
>>  
>> +	/*
>> +	 * In case the timing allocation fails, we just want to warn,
>> +	 * not fail, so letting the system boot anyway.
>> +	 */
>> +	ret = irq_timings_alloc(irq);
>> +	if (ret) {
>> +		pr_warn("Failed to allocate irq timing stats for irq%d (%d)",
>> +			irq, ret);
>> +		return;
>> +	}
>> +
>>  	desc->istate |= IRQS_TIMINGS;
>>  }
>>  
>> diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
>> index 56cf687..4af7703 100644
>> --- a/kernel/irq/timings.c
>> +++ b/kernel/irq/timings.c
>> @@ -8,10 +8,16 @@
>>   * published by the Free Software Foundation.
>>   *
>>   */
>> +#include <linux/kernel.h>
>>  #include <linux/percpu.h>
>> +#include <linux/slab.h>
>>  #include <linux/static_key.h>
>>  #include <linux/interrupt.h>
>> +#include <linux/idr.h>
>>  #include <linux/irq.h>
>> +#include <linux/math64.h>
>> +
>> +#include <trace/events/irq.h>
>>  
>>  #include "internals.h"
>>  
>> @@ -19,6 +25,18 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
>>  
>>  DEFINE_PER_CPU(struct irq_timings, irq_timings);
>>  
>> +struct irqt_stat {
>> +	u64 ne;         /* next event                               */
>> +	u64 lts;	/* last timestamp                           */
>> +	u64 variance;	/* variance                                 */
>> +	u32 avg;	/* mean value                               */
>> +	u32 count;      /* number of samples                        */
>> +	int anomalies;  /* number of consecutives anomalies         */
>> +	int valid;      /* behaviour of the interrupt               */
>> +};
>> +
>> +static DEFINE_IDR(irqt_stats);
>> +
>>  void irq_timings_enable(void)
>>  {
>>  	static_branch_enable(&irq_timing_enabled);
>> @@ -28,3 +46,321 @@ void irq_timings_disable(void)
>>  {
>>  	static_branch_disable(&irq_timing_enabled);
>>  }
>> +
>> +/**
>> + * irqs_update - update the irq timing statistics with a new timestamp
>> + *
>> + * @irqs: an irqt_stat struct pointer
>> + * @ts: the new timestamp
>> + *
>> + * ** This function must be called with the local irq disabled **
>> + *
>> + * The statistics are computed online, in other words, the code is
>> + * designed to compute the statistics on a stream of values rather
>> + * than doing multiple passes on the values to compute the average,
>> + * then the variance. The integer division introduces a loss of
>> + * precision but with an acceptable error margin regarding the results
>> + * we would have with the double floating precision: we are dealing
>> + * with nanosec, so big numbers, consequently the mantisse is
>> + * negligeable, especially when converting the time in usec
>> + * afterwards.
>> + *
>> + * The computation happens at idle time. When the CPU is not idle, the
>> + * interrupts' timestamps are stored in the circular buffer, when the
>> + * CPU goes idle and this routine is called, all the buffer's values
>> + * are injected in the statistical model continuying to extend the
>> + * statistics from the previous busy-idle cycle.
>> + *
>> + * The observations showed a device will trigger a burst of periodic
>> + * interrupts followed by one or two peaks of longer time, for
>> + * instance when a SD card device flushes its cache, then the periodic
>> + * intervals occur again. A one second inactivity period resets the
>> + * stats, that gives us the certitude the statistical values won't
>> + * exceed 1x10^9, thus the computation won't overflow.
>> + *
>> + * Basically, the purpose of the algorithm is to watch the periodic
>> + * interrupts and eliminate the peaks.
>> + *
>> + * An interrupt is considered periodically stable if the interval of
>> + * its occurences follow the normal distribution, thus the values
>> + * comply with:
>> + *
>> + *      avg - 3 x stddev < value < avg + 3 x stddev
>> + *
>> + * Which can be simplified to:
>> + *
>> + *      -3 x stddev < value - avg < 3 x stddev
>> + *
>> + *      abs(value - avg) < 3 x stddev
>> + *
>> + * In order to save a costly square root computation, we use the
>> + * variance. For the record, stddev = sqrt(variance). The equation
>> + * above becomes:
>> + *
>> + *      abs(value - avg) < 3 x sqrt(variance)
>> + *
>> + * And finally we square it:
>> + *
>> + *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
>> + *
>> + *      (value - avg) x (value - avg) < 9 x variance
>> + *
>> + * Statistically speaking, any values out of this interval is
>> + * considered as an anomaly and is discarded. However, a normal
>> + * distribution appears when the number of samples is 30 (it is the
>> + * rule of thumb in statistics, cf. "30 samples" on Internet). When
>> + * there are three consecutive anomalies, the statistics are resetted.
>> + *
>> + */
>> +static void irqs_update(struct irqt_stat *irqs, u64 ts)
>> +{
>> +	u64 old_ts = irqs->lts;
>> +	u64 variance = 0;
>> +	u64 interval;
>> +	s64 diff;
>> +
>> +	/*
>> +	 * The timestamps are absolute time values, we need to compute
>> +	 * the timing interval between two interrupts.
>> +	 */
>> +	irqs->lts = ts;
>> +
>> +	/*
>> +	 * The interval type is u64 in order to deal with the same
>> +	 * type in our computation, that prevent mindfuck issues with
>> +	 * overflow, sign and division.
>> +	 */
>> +	interval = ts - old_ts;
>> +
>> +	/*
>> +	 * The interrupt triggered more than one second apart, that
>> +	 * ends the sequence as predictible for our purpose. In this
>> +	 * case, assume we have the beginning of a sequence and the
>> +	 * timestamp is the first value. As it is impossible to
>> +	 * predict anything at this point, return.
>> +	 *
>> +	 * Note the first timestamp of the sequence will always fall
>> +	 * in this test because the old_ts is zero. That is what we
>> +	 * want as we need another timestamp to compute an interval.
>> +	 */
>> +	if (interval >= NSEC_PER_SEC) {
>> +		memset(irqs, 0, sizeof(*irqs));
>> +		irqs->lts = ts;
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * Pre-compute the delta with the average as the result is
>> +	 * used several times in this function.
>> +	 */
>> +	diff = interval - irqs->avg;
>> +
>> +	/*
>> +	 * Increment the number of samples.
>> +	 */
>> +	irqs->count++;
>> +
>> +	/*
>> +	 * Online variance divided by the number of elements if there
>> +	 * is more than one sample.  Normally the formula is division
>> +	 * by count - 1 but we assume the number of element will be
>> +	 * more than 32 and dividing by 32 instead of 31 is enough
>> +	 * precise.
>> +	 */
>> +	if (likely(irqs->count > 1))
>> +		variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
>> +
>> +	/*
>> +	 * The rule of thumb in statistics for the normal distribution
>> +	 * is having at least 30 samples in order to have the model to
>> +	 * apply. Values outside the interval are considered as an
>> +	 * anomaly.
>> +	 */
>> +	if ((irqs->count >= 30) && ((diff * diff) > (9 * variance))) {
>> +		/*
>> +		 * After three consecutive anomalies, we reset the
>> +		 * stats as it is no longer stable enough.
>> +		 */
>> +		if (irqs->anomalies++ >= 3) {
>> +			memset(irqs, 0, sizeof(*irqs));
>> +			irqs->lts = ts;
>> +			return;
>> +		}
>> +	} else {
>> +		/*
>> +		 * The anomalies must be consecutives, so at this
>> +		 * point, we reset the anomalies counter.
>> +		 */
>> +		irqs->anomalies = 0;
>> +	}
>> +
>> +	/*
>> +	 * The interrupt is considered stable enough to try to predict
>> +	 * the next event on it.
>> +	 */
>> +	irqs->valid = 1;
>> +
>> +	/*
>> +	 * Online average algorithm:
>> +	 *
>> +	 *  new_average = average + ((value - average) / count)
>> +	 *
>> +	 * The variance computation depends on the new average
>> +	 * to be computed here first.
>> +	 *
>> +	 */
>> +	irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
>> +
>> +	/*
>> +	 * Online variance algorithm:
>> +	 *
>> +	 *  new_variance = variance + (value - average) x (value - new_average)
>> +	 *
>> +	 * Warning: irqs->avg is updated with the line above, hence
>> +	 * 'interval - irqs->avg' is no longer equal to 'diff'
>> +	 */
>> +	irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
>> +
>> +	/*
>> +	 * Update the next event
>> +	 */
>> +	irqs->ne = ts + irqs->avg;
>> +}
>> +
>> +/**
>> + * irq_timings_next_event - Return when the next event is supposed to arrive
>> + *
>> + * *** This function must be called with the local irq disabled ***
>> + *
>> + * During the last busy cycle, the number of interrupts is incremented
>> + * and stored in the irq_timings structure. This information is
>> + * necessary to:
>> + *
>> + * - know if the index in the table wrapped up:
>> + *
>> + *      If more than the array size interrupts happened during the
>> + *      last busy/idle cycle, the index wrapped up and we have to
>> + *      begin with the next element in the array which is the last one
>> + *      in the sequence, otherwise it is a the index 0.
>> + *
>> + * - have an indication of the interrupts activity on this CPU
>> + *   (eg. irq/sec)
>> + *
>> + * The values are 'consumed' after inserting in the statistical model,
>> + * thus the count is reinitialized.
>> + *
>> + * The array of values **must** be browsed in the time direction, the
>> + * timestamp must increase between an element and the next one.
>> + *
>> + * Returns a nanosec time based estimation of the earliest interrupt,
>> + * U64_MAX otherwise.
>> + */
>> +u64 irq_timings_next_event(u64 now)
>> +{
>> +	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
>> +	struct irqt_stat *irqs;
>> +	struct irqt_stat __percpu *s;
>> +	u64 ts, ne = U64_MAX;
>> +	int i, irq = 0;
>> +
>> +	/*
>> +	 * Number of elements in the circular buffer: If it happens it
>> +	 * was flushed before, then the number of elements could be
>> +	 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
>> +	 * otherwise the array size is used as we wrapped. The index
>> +	 * begins from zero when we did not wrap. That could be done
>> +	 * in a nicer way with the proper circular array structure
>> +	 * type but with the cost of extra computation in the
>> +	 * interrupt handler hot path. We choose efficiency.
>> +	 *
>> +	 * Inject measured irq/timestamp to the statistical model
>> +	 * while decrementing the counter because we consume the data
>> +	 * from our circular buffer.
>> +	 */
>> +	for (i = irqts->count & IRQ_TIMINGS_MASK,
>> +		     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
>> +	     irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
>> +
>> +		irq_timing_decode(irqts->values[i], &ts, &irq);
>> +
>> +		s = idr_find(&irqt_stats, irq);
>> +		if (s) {
>> +			irqs = this_cpu_ptr(s);
>> +			irqs_update(irqs, ts);
>> +		}
>> +	}
>> +
>> +	/*
>> +	 * Look in the list of interrupts' statistics, the earliest
>> +	 * next event.
>> +	 */
>> +	idr_for_each_entry(&irqt_stats, s, i) {
>> +
>> +		irqs = this_cpu_ptr(s);
>> +
>> +		if (!irqs->valid)
>> +			continue;
>> +
>> +		if (irqs->ne <= now) {
>> +			irq = i;
>> +			ne = now;
>> +
>> +			/*
>> +			 * This interrupt mustn't use in the future
>> +			 * until new events occur and update the
>> +			 * statistics.
>> +			 */
>> +			irqs->valid = 0;
>> +			break;
>> +		}
>> +
>> +		if (irqs->ne < ne) {
>> +			irq = i;
>> +			ne = irqs->ne;
>> +		}
>> +	}
>> +
>> +	return ne;
>> +}
>> +
>> +void irq_timings_free(int irq)
>> +{
>> +	struct irqt_stat __percpu *s;
>> +
>> +	s = idr_find(&irqt_stats, irq);
>> +	if (s) {
>> +		free_percpu(s);
>> +		idr_remove(&irqt_stats, irq);
>> +	}
>> +}
>> +
>> +int irq_timings_alloc(int irq)
>> +{
>> +	int id;
>> +	struct irqt_stat __percpu *s;
>> +
>> +	/*
>> +	 * Some platforms can have the same private interrupt per cpu,
>> +	 * so this function may be be called several times with the
>> +	 * same interrupt number. Just bail out in case the per cpu
>> +	 * stat structure is already allocated.
>> +	 */
>> +	s = idr_find(&irqt_stats, irq);
>> +	if (s)
>> +		return 0;
>> +
>> +	s = alloc_percpu(*s);
>> +	if (!s)
>> +		return -ENOMEM;
>> +
>> +	idr_preload(GFP_KERNEL);
>> +	id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
>> +	idr_preload_end();
>> +
>> +	if (id < 0) {
>> +		free_percpu(s);
>> +		return id;
>> +	}
>> +
>> +	return 0;
>> +}
>>
> 
> 


-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog