Given a set of objects, floating proportions aims to efficiently give the proportional 'activity' of a single item as compared to the whole set. Where 'activity' is a measure of a temporal property of the items. It is efficient in that it need not inspect any other items of the set in order to provide the answer. It is not even needed to know how many other items there are. It has one parameter, and that is the period of 'time' over which the 'activity' is measured. Signed-off-by: Peter Zijlstra --- include/linux/proportions.h | 81 +++++++++++++ lib/Makefile | 3 lib/proportions.c | 258 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 341 insertions(+), 1 deletion(-) Index: linux-2.6/lib/proportions.c =================================================================== --- /dev/null +++ linux-2.6/lib/proportions.c @@ -0,0 +1,258 @@ +/* + * FLoating proportions + * + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * Description: + * + * The floating proportion is a time derivative with an exponentially decaying + * history: + * + * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) + * + * Where j is an element from {prop_local}, x_{j} is j's number of events, + * and i the time period over which the differential is taken. So d/dt_{-i} is + * the differential over the i-th last period. + * + * The decaying history gives smooth transitions. The time differential carries + * the notion of speed. + * + * The denominator is 2^(1+i) because we want the series to be normalised, ie. + * + * \Sum_{i=0} 1/2^(1+i) = 1 + * + * Further more, if we measure time (t) in the same events as x; so that: + * + * t = \Sum_{j} x_{j} + * + * we get that: + * + * \Sum_{j} p_{j} = 1 + * + * Writing this in an iterative fashion we get (dropping the 'd's): + * + * if (++x_{j}, ++t > period) + * t /= 2; + * for_each (j) + * x_{j} /= 2; + * + * so that: + * + * p_{j} = x_{j} / t; + * + * We optimize away the '/= 2' for the global time delta by noting that: + * + * if (++t > period) t /= 2: + * + * Can be approximated by: + * + * period/2 + (++t % period/2) + * + * [ Furthermore, when we choose period to be 2^n it can be written in terms of + * binary operations and wraparound artefacts disappear. ] + * + * Also note that this yields a natural counter of the elapsed periods: + * + * c = t / (period/2) + * + * [ Its monotonic increasing property can be applied to mitigate the wrap- + * around issue. ] + * + * This allows us to do away with the loop over all prop_locals on each period + * expiration. By remembering the period count under which it was last accessed + * as c_{j}, we can obtain the number of 'missed' cycles from: + * + * c - c_{j} + * + * We can then lazily catch up to the global period count every time we are + * going to use x_{j}, by doing: + * + * x_{j} /= 2^(c - c_{j}), c_{j} = c + */ + +#include +#include + +void prop_descriptor_init(struct prop_descriptor *pd, int shift) +{ + pd->index = 0; + pd->pg[0].shift = shift; + percpu_counter_init_irq(&pd->pg[0].events, 0); + percpu_counter_init_irq(&pd->pg[1].events, 0); + mutex_init(&pd->mutex); +} + +/* + * We have two copies, and flip between them to make it seem like an atomic + * update. The update is not really atomic wrt the events counter, but + * it is internally consistent with the bit layout depending on shift. + * + * We copy the events count, move the bits around and flip the index. + */ +void prop_change_shift(struct prop_descriptor *pd, int shift) +{ + int index; + int offset; + u64 events; + unsigned long flags; + + mutex_lock(&pd->mutex); + + index = pd->index ^ 1; + offset = pd->pg[pd->index].shift - shift; + if (!offset) + goto out; + + pd->pg[index].shift = shift; + + local_irq_save(flags); + events = percpu_counter_sum_signed( + &pd->pg[pd->index].events); + if (offset < 0) + events <<= -offset; + else + events >>= offset; + percpu_counter_set(&pd->pg[index].events, events); + + /* + * ensure the new pg is fully written before the switch + */ + smp_wmb(); + pd->index = index; + local_irq_restore(flags); + + synchronize_rcu(); + +out: + mutex_unlock(&pd->mutex); +} + +/* + * wrap the access to the data in an rcu_read_lock() section; + * this is used to track the active references. + */ +struct prop_global *prop_get_global(struct prop_descriptor *pd) +{ + int index; + + rcu_read_lock(); + index = pd->index; + /* + * match the wmb from vcd_flip() + */ + smp_rmb(); + return &pd->pg[index]; +} + +void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) +{ + rcu_read_unlock(); +} + +static void prop_adjust_shift(struct prop_local *pl, int new_shift) +{ + int offset = pl->shift - new_shift; + + if (!offset) + return; + + if (offset < 0) + pl->period <<= -offset; + else + pl->period >>= offset; + + pl->shift = new_shift; +} + +void prop_local_init(struct prop_local *pl) +{ + spin_lock_init(&pl->lock); + pl->shift = 0; + pl->period = 0; + percpu_counter_init_irq(&pl->events, 0); +} + +void prop_local_destroy(struct prop_local *pl) +{ + percpu_counter_destroy(&pl->events); +} + +/* + * Catch up with missed period expirations. + * + * until (c_{j} == c) + * x_{j} -= x_{j}/2; + * c_{j}++; + */ +void prop_norm(struct prop_global *pg, + struct prop_local *pl) +{ + unsigned long period = 1UL << (pg->shift - 1); + unsigned long period_mask = ~(period - 1); + unsigned long global_period; + unsigned long flags; + + global_period = percpu_counter_read(&pg->events); + global_period &= period_mask; + + /* + * Fast path - check if the local and global period count still match + * outside of the lock. + */ + if (pl->period == global_period) + return; + + spin_lock_irqsave(&pl->lock, flags); + prop_adjust_shift(pl, pg->shift); + /* + * For each missed period, we half the local counter. + * basically: + * pl->events >> (global_period - pl->period); + * + * but since the distributed nature of percpu counters make division + * rather hard, use a regular subtraction loop. This is safe, because + * the events will only every be incremented, hence the subtraction + * can never result in a negative number. + */ + while (pl->period != global_period) { + unsigned long val = percpu_counter_read(&pl->events); + unsigned long half = (val + 1) >> 1; + + /* + * Half of zero won't be much less, break out. + * This limits the loop to shift iterations, even + * if we missed a million. + */ + if (!val) + break; + + /* + * Iff shift >32 half might exceed the limits of + * the regular percpu_counter_mod. + */ + percpu_counter_mod64(&pl->events, -half); + pl->period += period; + } + pl->period = global_period; + spin_unlock_irqrestore(&pl->lock, flags); +} + +/* + * Obtain an fraction of this proportion + * + * p_{j} = x_{j} / (period/2 + t % period/2) + */ +void prop_fraction(struct prop_global *pg, struct prop_local *pl, + long *numerator, long *denominator) +{ + unsigned long period_2 = 1UL << (pg->shift - 1); + unsigned long counter_mask = period_2 - 1; + unsigned long global_count; + + prop_norm(pg, pl); + *numerator = percpu_counter_read_positive(&pl->events); + + global_count = percpu_counter_read(&pg->events); + *denominator = period_2 + (global_count & counter_mask); +} + Index: linux-2.6/include/linux/proportions.h =================================================================== --- /dev/null +++ linux-2.6/include/linux/proportions.h @@ -0,0 +1,81 @@ +/* + * FLoating proportions + * + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * This file contains the public data structure and API definitions. + */ + +#ifndef _LINUX_PROPORTIONS_H +#define _LINUX_PROPORTIONS_H + +#include +#include +#include + +struct prop_global { + /* + * The period over which we differentiate + * + * period = 2^shift + */ + int shift; + /* + * The total event counter aka 'time'. + * + * Treated as an unsigned long; the lower 'shift - 1' bits are the + * counter bits, the remaining upper bits the period counter. + */ + struct percpu_counter events; +}; + +/* + * global proportion descriptor + * + * this is needed to consitently flip prop_global structures. + */ +struct prop_descriptor { + int index; + struct prop_global pg[2]; + struct mutex mutex; /* serialize the prop_global switch */ +}; + +void prop_descriptor_init(struct prop_descriptor *pd, int shift); +void prop_change_shift(struct prop_descriptor *pd, int new_shift); +struct prop_global *prop_get_global(struct prop_descriptor *pd); +void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg); + +struct prop_local { + /* + * the local events counter + */ + struct percpu_counter events; + + /* + * snapshot of the last seen global state + */ + int shift; + unsigned long period; + spinlock_t lock; /* protect the snapshot state */ +}; + +void prop_local_init(struct prop_local *pl); +void prop_local_destroy(struct prop_local *pl); + +void prop_norm(struct prop_global *pg, struct prop_local *pl); + +/* + * ++x_{j}, ++t + */ +static inline +void __prop_inc(struct prop_global *pg, struct prop_local *pl) +{ + prop_norm(pg, pl); + percpu_counter_mod(&pl->events, 1); + percpu_counter_mod(&pg->events, 1); +} + +void prop_fraction(struct prop_global *pg, struct prop_local *pl, + long *numerator, long *denominator); + +#endif /* _LINUX_PROPORTIONS_H */ Index: linux-2.6/lib/Makefile =================================================================== --- linux-2.6.orig/lib/Makefile +++ linux-2.6/lib/Makefile @@ -5,7 +5,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ - sha1.o irq_regs.o reciprocal_div.o argv_split.o + sha1.o irq_regs.o reciprocal_div.o argv_split.o \ + proportions.o lib-$(CONFIG_MMU) += ioremap.o pagewalk.o lib-$(CONFIG_SMP) += cpumask.o -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/