linux-kernel - Re: [PATCH v9 12/14] x86: perf: intel

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150129115918.GA26304@twins.programming.kicks-ass.net>
Date:	Thu, 29 Jan 2015 12:59:18 +0100
From:	Peter Zijlstra <peterz@...radead.org>
To:	Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc:	Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
	Robert Richter <rric@...nel.org>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Mike Galbraith <efault@....de>,
	Paul Mackerras <paulus@...ba.org>,
	Stephane Eranian <eranian@...gle.com>,
	Andi Kleen <ak@...ux.intel.com>, kan.liang@...el.com,
	adrian.hunter@...el.com, markus.t.metzger@...el.com,
	mathieu.poirier@...aro.org, Kaixu Xia <kaixu.xia@...aro.org>,
	acme@...radead.org
Subject: Re: [PATCH v9 12/14] x86: perf: intel_pt: Intel PT PMU driver

On Tue, Jan 27, 2015 at 08:03:47PM +0200, Alexander Shishkin wrote:
> +static int account_per_task_counter(struct perf_event *event)
> +{
> +	struct pmu *pmu = event->pmu;
> +
> +	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
> +		return 0;
> +
> +	/*
> +	 * Prevent co-existence of per-task and cpu-wide events on the
> +	 * same exclusive pmu.
> +	 *
> +	 * Negative pmu::nr_per_task_events means there are cpu-wide
> +	 * events on this "exclusive" pmu, positive means there are
> +	 * per-task events.
> +	 */
> +	if (event->cpu == -1 &&
> +	    !atomic_inc_unless_negative(&pmu->nr_per_task_events))
> +		return -EBUSY;
> +	else if (!(event->attach_state & PERF_ATTACH_TASK) &&
> +		 !atomic_dec_unless_positive(&pmu->nr_per_task_events))
> +		return -EBUSY;
> +
> +	return 0;
> +}

I think that logic fails for per-task events that have a cpu set.


> +static bool exclusive_event_ok(struct perf_event *event,
> +			       struct perf_event_context *ctx)
> +{
> +	struct pmu *pmu = event->pmu;
> +	bool ret = true;
> +
> +	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
> +		return true;
> +
> +	if (!__exclusive_event_ok(event, ctx))
> +		return false;
> +
> +	if (ctx->task) {
> +		if (event->cpu != -1) {
> +			struct perf_event_context *cpuctx;
> +
> +			cpuctx = &per_cpu_ptr(pmu->pmu_cpu_context, event->cpu)->ctx;
> +
> +			mutex_lock(&cpuctx->mutex);
> +			ret = __exclusive_event_ok(event, cpuctx);
> +			mutex_unlock(&cpuctx->mutex);

We're already holding ctx->mutex, this should have made lockdep scream.

> +		}
> +	}
> +
> +	return ret;
> +}


Would something like this work?

---
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -168,6 +168,7 @@ struct perf_event;
 #define PERF_PMU_CAP_NO_INTERRUPT		0x01
 #define PERF_PMU_CAP_AUX_NO_SG			0x02
 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x04
+#define PERF_PMU_CAP_EXCLUSIVE			0x08
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -188,6 +189,7 @@ struct pmu {
 
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
+	atomic_t			exclusive_cnt; /* <0: cpu, >0: tsk */
 	int				task_ctx_nr;
 	int				hrtimer_interval_ms;
 
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3487,6 +3487,9 @@ static void __free_event(struct perf_eve
 	if (event->destroy)
 		event->destroy(event);
 
+	if (event->pmu && event->ctx)
+		exclusive_event_release(event);
+
 	if (event->ctx)
 		put_ctx(event->ctx);
 
@@ -7092,6 +7095,7 @@ int perf_pmu_register(struct pmu *pmu, c
 		pmu->event_idx = perf_event_idx_default;
 
 	list_add_rcu(&pmu->entry, &pmus);
+	atomic_set(&pmu->exclusive_cnt, 0);
 	ret = 0;
 unlock:
 	mutex_unlock(&pmus_lock);
@@ -7537,6 +7541,78 @@ perf_event_set_output(struct perf_event
 	return ret;
 }
 
+static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
+{
+	if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+	    (e1->cpu == e2->cpu ||
+	     e1->cpu == -1 ||
+	     e2->cpu == -1))
+		return true;
+	return false;
+}
+
+static bool __exclusive_event_ok(struct perf_event *event,
+				 struct perf_event_context *ctx)
+{
+	struct perf_event *iter_event;
+
+	list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
+		if (exclusive_event_match(iter_event, event))
+			return false;
+	}
+
+	return true;
+}
+
+static bool exclusive_event_ok(struct perf_event *event,
+			       struct perf_event_context *ctx)
+{
+	struct pmu *pmu = event->pmu;
+	bool ret;
+
+	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+		return true;
+
+	/*
+	 * exclusive_cnt <0: cpu
+	 *               >0: tsk
+	 */
+	if (ctx->task) {
+		if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
+			return false;
+	} else {
+		if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
+			return false;
+	}
+
+	mutex_lock(&ctx->lock);
+	ret = __exclusive_event_ok(event, ctx);
+	mutex_unlock(&ctx->lock);
+
+	if (!ret) {
+		if (ctx->task)
+			atomic_dec(&pmu->exclusive_cnt);
+		else
+			atomic_inc(&pmu->exclusive_cnt);
+	}
+
+	return ret;
+}
+
+static void exclusive_event_release(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct pmu *pmu = event->pmu;
+
+	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+		return;
+
+	if (ctx->task)
+		atomic_dec(&pmu->exclusive_cnt);
+	else
+		atomic_inc(&pmu->exclusive_cnt);
+}
+
 static void mutex_lock_double(struct mutex *a, struct mutex *b)
 {
 	if (b < a)
@@ -7702,6 +7778,15 @@ SYSCALL_DEFINE5(perf_event_open,
 		task = NULL;
 	}
 
+	if (pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) {
+		err = -EBUSY;
+		if (group_leader)
+			goto err_context;
+
+		if (!exclusive_event_ok(event, ctx))
+			goto err_context;
+	}
+
 	/*
 	 * Look up the group leader (we will attach this event to it):
 	 */
@@ -7903,6 +7988,11 @@ perf_event_create_kernel_counter(struct
 		goto err_free;
 	}
 
+	if (!exclusive_event_ok(event, ctx)) {
+		err = -EBUSY;
+		goto err_context;
+	}
+
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_install_in_context(ctx, event, cpu);
@@ -7911,6 +8001,9 @@ perf_event_create_kernel_counter(struct
 
 	return event;
 
+err_context:
+	perf_unpin_context(ctx);
+	put_ctx(ctx);
 err_free:
 	free_event(event);
 err:
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/