[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1422613866-113186-1-git-send-email-alexander.shishkin@linux.intel.com>
Date: Fri, 30 Jan 2015 12:31:06 +0200
From: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
To: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
Robert Richter <rric@...nel.org>,
Frederic Weisbecker <fweisbec@...il.com>,
Mike Galbraith <efault@....de>,
Paul Mackerras <paulus@...ba.org>,
Stephane Eranian <eranian@...gle.com>,
Andi Kleen <ak@...ux.intel.com>, kan.liang@...el.com,
adrian.hunter@...el.com, markus.t.metzger@...el.com,
mathieu.poirier@...aro.org, Kaixu Xia <kaixu.xia@...aro.org>,
acme@...radead.org,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Subject: [PATCH] perf: Add a pmu capability for "exclusive" events
Usually, pmus that do, for example, instruction tracing, would only ever
be able to have one event per task per cpu (or per perf_event_context). For
such pmus it makes sense to disallow creating conflicting events early on,
so as to provide consistent behavior for the user.
This patch adds a pmu capability that indicates such constraint on event
creation.
Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
---
include/linux/perf_event.h | 2 +
kernel/events/core.c | 119 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 119 insertions(+), 2 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 572e4fb508..60aa395a19 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -168,6 +168,7 @@ struct perf_event;
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
#define PERF_PMU_CAP_AUX_NO_SG 0x02
#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x04
+#define PERF_PMU_CAP_EXCLUSIVE 0x08
/**
* struct pmu - generic performance monitoring unit
@@ -188,6 +189,7 @@ struct pmu {
int * __percpu pmu_disable_count;
struct perf_cpu_context * __percpu pmu_cpu_context;
+ atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 67643035de..0f2835f5c6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3335,6 +3335,91 @@ static void unaccount_event(struct perf_event *event)
unaccount_event_cpu(event, event->cpu);
}
+/*
+ * The following implement mutual exclusion of events on "exclusive" pmus
+ * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
+ * at a time, so we disallow creating events that might conflict, namely:
+ *
+ * 1) cpu-wide events in the presence of per-task events,
+ * 2) per-task events in the presence of cpu-wide events,
+ * 3) two matching events on the same context.
+ *
+ * The former two cases are handled in the allocation path (perf_event_alloc(),
+ * __free_event()), the latter -- before the first perf_install_in_context().
+ */
+static int exclusive_event_init(struct perf_event *event)
+{
+ struct pmu *pmu = event->pmu;
+
+ if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+ return 0;
+
+ /*
+ * Prevent co-existence of per-task and cpu-wide events on the
+ * same exclusive pmu.
+ *
+ * Negative pmu::exclusive_cnt means there are cpu-wide
+ * events on this "exclusive" pmu, positive means there are
+ * per-task events.
+ *
+ * Since this is called in perf_event_alloc() path, event::ctx
+ * doesn't exist yet; it is, however, safe to use PERF_ATTACH_TASK
+ * to mean "per-task event", because unlike other attach states it
+ * never gets cleared.
+ */
+ if (event->attach_state & PERF_ATTACH_TASK) {
+ if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
+ return -EBUSY;
+ } else {
+ if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static void exclusive_event_destroy(struct perf_event *event)
+{
+ struct pmu *pmu = event->pmu;
+
+ if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+ return;
+
+ /* see comment in exclusive_event_init() */
+ if (event->attach_state & PERF_ATTACH_TASK)
+ atomic_dec(&pmu->exclusive_cnt);
+ else
+ atomic_inc(&pmu->exclusive_cnt);
+}
+
+static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
+{
+ if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+ (e1->cpu == e2->cpu ||
+ e1->cpu == -1 ||
+ e2->cpu == -1))
+ return true;
+ return false;
+}
+
+/* Called under the same ctx::mutex as perf_install_in_context() */
+static bool exclusive_event_installable(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ struct perf_event *iter_event;
+ struct pmu *pmu = event->pmu;
+
+ if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+ return true;
+
+ list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
+ if (exclusive_event_match(iter_event, event))
+ return false;
+ }
+
+ return true;
+}
+
static void __free_event(struct perf_event *event)
{
if (!event->parent) {
@@ -3348,8 +3433,10 @@ static void __free_event(struct perf_event *event)
if (event->ctx)
put_ctx(event->ctx);
- if (event->pmu)
+ if (event->pmu) {
+ exclusive_event_destroy(event);
module_put(event->pmu->module);
+ }
call_rcu(&event->rcu_head, free_event_rcu);
}
@@ -6907,6 +6994,7 @@ got_cpu_context:
pmu->event_idx = perf_event_idx_default;
list_add_rcu(&pmu->entry, &pmus);
+ atomic_set(&pmu->exclusive_cnt, 0);
ret = 0;
unlock:
mutex_unlock(&pmus_lock);
@@ -7142,16 +7230,23 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
goto err_ns;
}
+ err = exclusive_event_init(event);
+ if (err)
+ goto err_pmu;
+
if (!event->parent) {
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
err = get_callchain_buffers();
if (err)
- goto err_pmu;
+ goto err_per_task;
}
}
return event;
+err_per_task:
+ exclusive_event_destroy(event);
+
err_pmu:
if (event->destroy)
event->destroy(event);
@@ -7500,6 +7595,11 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_alloc;
}
+ if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) {
+ err = -EBUSY;
+ goto err_context;
+ }
+
if (task) {
put_task_struct(task);
task = NULL;
@@ -7597,6 +7697,13 @@ SYSCALL_DEFINE5(perf_event_open,
}
}
+ if (!exclusive_event_installable(event, ctx)) {
+ err = -EBUSY;
+ mutex_unlock(&ctx->mutex);
+ fput(event_file);
+ goto err_context;
+ }
+
perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
@@ -7683,6 +7790,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
+ if (!exclusive_event_installable(event, ctx)) {
+ mutex_unlock(&ctx->mutex);
+ perf_unpin_context(ctx);
+ put_ctx(ctx);
+ err = -EBUSY;
+ goto err_free;
+ }
+
perf_install_in_context(ctx, event, cpu);
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
--
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists