[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160307230329.GT6344@twins.programming.kicks-ass.net>
Date: Tue, 8 Mar 2016 00:03:29 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Vikas Shivappa <vikas.shivappa@...ux.intel.com>
Cc: vikas.shivappa@...el.com, linux-kernel@...r.kernel.org,
x86@...nel.org, hpa@...or.com, tglx@...utronix.de,
mingo@...nel.org, ravi.v.shankar@...el.com, tony.luck@...el.com,
fenghua.yu@...el.com, h.peter.anvin@...el.com
Subject: Re: [PATCH 4/6] x86/mbm: Memory bandwidth monitoring event management
On Tue, Mar 01, 2016 at 03:48:26PM -0800, Vikas Shivappa wrote:
> Lot of the scheduling code was taken out from Tony's patch and a 3-4
> lines of change were added in the intel_cqm_event_read. Since the timer
> is no more added on every context switch this change was made.
It this here to confuse people or is there some actual information in
it?
> +/*
> + * MBM Counter is 24bits wide. MBM_CNTR_MAX defines max counter
> + * value
> + */
> +#define MBM_CNTR_MAX 0xffffff
#define MBM_CNTR_WIDTH 24
#define MBM_CNTR_MAX ((1U << MBM_CNTR_WIDTH) - 1)
> #define QOS_L3_OCCUP_EVENT_ID (1 << 0)
> +/*
> + * MBM Event IDs as defined in SDM section 17.15.5
> + * Event IDs are used to program EVTSEL MSRs before reading mbm event counters
> + */
> +enum mbm_evt_type {
> + QOS_MBM_TOTAL_EVENT_ID = 0x02,
> + QOS_MBM_LOCAL_EVENT_ID,
> + QOS_MBM_TOTAL_BW_EVENT_ID,
> + QOS_MBM_LOCAL_BW_EVENT_ID,
> +};
QOS_L3_*_EVENT_ID is a define, these are an enum. Rather inconsistent.
> struct rmid_read {
> u32 rmid;
Hole, you could've filled with the enum (which ends up being an int I
think).
> atomic64_t value;
> + enum mbm_evt_type evt_type;
> };
> +static bool is_mbm_event(int e)
You had an enum type, you might as well use it.
> +{
> + return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_BW_EVENT_ID);
> +}
>
> +static struct sample *update_sample(unsigned int rmid,
> + enum mbm_evt_type evt_type, int first)
> +{
> + ktime_t cur_time;
> + struct sample *mbm_current;
> + u32 vrmid = rmid_2_index(rmid);
> + u64 val, bytes, diff_time;
> + u32 eventid;
> +
> + if (evt_type & QOS_MBM_LOCAL_EVENT_MASK) {
> + mbm_current = &mbm_local[vrmid];
> + eventid = QOS_MBM_LOCAL_EVENT_ID;
> + } else {
> + mbm_current = &mbm_total[vrmid];
> + eventid = QOS_MBM_TOTAL_EVENT_ID;
> + }
> +
> + cur_time = ktime_get();
> + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
> + rdmsrl(MSR_IA32_QM_CTR, val);
> + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
> + return mbm_current;
> + val &= MBM_CNTR_MAX;
> + if (val < mbm_current->prev_msr)
> + bytes = MBM_CNTR_MAX - mbm_current->prev_msr + val + 1;
> + else
> + bytes = val - mbm_current->prev_msr;
Would not something like:
shift = 64 - MBM_CNTR_WIDTH;
bytes = (val << shift) - (prev << shift);
bytes >>= shift;
be less obtuse? (and consistent with how every other perf update
function does it).
What guarantee is there we didn't wrap multiple times? Doesn't that
deserve a comment?
> + bytes *= cqm_l3_scale;
> +
> + mbm_current->total_bytes += bytes;
> + mbm_current->interval_bytes += bytes;
> + mbm_current->prev_msr = val;
> + diff_time = ktime_ms_delta(cur_time, mbm_current->interval_start);
Here we do a / 1e6
> +
> + /*
> + * The b/w measured is really the most recent/current b/w.
> + * We wait till enough time has passed to avoid
> + * arthmetic rounding problems.Having it at >=100ms,
> + * such errors would be <=1%.
> + */
> + if (diff_time > 100) {
This could well be > 100e6 instead, avoiding the above division most of
the time.
> + bytes = mbm_current->interval_bytes * MSEC_PER_SEC;
> + do_div(bytes, diff_time);
> + mbm_current->bandwidth = bytes;
> + mbm_current->interval_bytes = 0;
> + mbm_current->interval_start = cur_time;
> + }
> +
> + return mbm_current;
> +}
How does the above time tracking deal with the event not actually having
been scheduled the whole time?
> +static void init_mbm_sample(u32 rmid, enum mbm_evt_type evt_type)
> +{
> + struct rmid_read rr = {
> + .value = ATOMIC64_INIT(0),
> + };
> +
> + rr.rmid = rmid;
> + rr.evt_type = evt_type;
That's just sad.. put those two in the struct init as well.
> + /* on each socket, init sample */
> + on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
> +}
Powered by blists - more mailing lists