[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190319152712.GI6521@hirez.programming.kicks-ass.net>
Date: Tue, 19 Mar 2019 16:27:12 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: kan.liang@...ux.intel.com
Cc: acme@...nel.org, mingo@...hat.com, linux-kernel@...r.kernel.org,
tglx@...utronix.de, jolsa@...nel.org, eranian@...gle.com,
alexander.shishkin@...ux.intel.com, ak@...ux.intel.com
Subject: Re: [PATCH 05/22] perf/x86: Support constraint ranges
On Tue, Mar 19, 2019 at 03:53:09PM +0100, Peter Zijlstra wrote:
> On Mon, Mar 18, 2019 at 02:41:27PM -0700, kan.liang@...ux.intel.com wrote:
> > The changes costs ~2k text size according to 0day report.
>
> Where?! there isn't much code here.
> > @@ -71,6 +72,12 @@ struct event_constraint {
> > #define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
> > #define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */
> >
> > +static inline bool constraint_match(struct event_constraint *c, u64 ecode)
> > +{
> > + ecode &= c->cmask;
> > + return ecode == c->code ||
> > + (c->range_end && ecode >= c->code && ecode <= c->range_end);
> > +}
> >
> > struct amd_nb {
> > int nb_id; /* NorthBridge id */
>
> That's all the code, how does that add up to 2k ?
By my counting it adds all of 108 bytes of text. What it does do is add
5784 bytes of data. But that too appears not to be required.
The below patch does the same, it adds 37 bytes of text and no
additional data.
It mostly works because of how Intel event codes are 'small'. We can
easily compress the constraint to allow a u64 size, but I don't think
that is needed. If we need to cover AMD64_EVENTSEL_EVENT the range
compare needs to get fixed anyway.
---
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2688,7 +2688,7 @@ x86_get_event_constraints(struct cpu_hw_
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
+ if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
}
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -858,7 +858,7 @@ struct event_constraint *intel_pebs_cons
if (x86_pmu.pebs_constraints) {
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
+ if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
}
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -54,6 +54,7 @@ struct event_constraint {
int weight;
int overlap;
int flags;
+ unsigned int size;
};
/*
* struct hw_perf_event.flags flags
@@ -71,6 +72,10 @@ struct event_constraint {
#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */
+static inline bool constraint_match(struct event_constraint *c, u64 ecode)
+{
+ return ((ecode & c->cmask) - c->code) <= (u64)c->size;
+}
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -257,18 +262,25 @@ struct cpu_hw_events {
void *kfree_on_online[X86_PERF_KFREE_MAX];
};
-#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \
{ .idxmsk64 = (n) }, \
.code = (c), \
+ .size = (e) - (c), \
.cmask = (m), \
.weight = (w), \
.overlap = (o), \
.flags = f, \
}
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \
+ __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f)
+
#define EVENT_CONSTRAINT(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
+#define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
+ __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
+
#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
0, PERF_X86_EVENT_EXCL)
@@ -304,6 +316,12 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
/*
+ * Constraint on a range of Event codes
+ */
+#define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \
+ EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT)
+
+/*
* Constraint on the Event code + UMask + fixed-mask
*
* filter mask to validate fixed counter events.
@@ -350,6 +368,9 @@ struct cpu_hw_events {
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \
+ EVENT_CONSTRAINT_RANGE(c, e, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
/* Check only flags, but allow all event/umask */
#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
@@ -366,6 +387,11 @@ struct cpu_hw_events {
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \
+ __EVENT_CONSTRAINT_RANGE(code, end, n, \
+ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
__EVENT_CONSTRAINT(code, n, \
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
Powered by blists - more mailing lists