lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1477787923-61185-33-git-send-email-davidcc@google.com>
Date:   Sat, 29 Oct 2016 17:38:29 -0700
From:   David Carrillo-Cisneros <davidcc@...gle.com>
To:     linux-kernel@...r.kernel.org
Cc:     "x86@...nel.org" <x86@...nel.org>, Ingo Molnar <mingo@...hat.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Andi Kleen <ak@...ux.intel.com>,
        Kan Liang <kan.liang@...el.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Vegard Nossum <vegard.nossum@...il.com>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        Nilay Vaish <nilayvaish@...il.com>,
        Borislav Petkov <bp@...e.de>,
        Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
        Ravi V Shankar <ravi.v.shankar@...el.com>,
        Fenghua Yu <fenghua.yu@...el.com>,
        Paul Turner <pjt@...gle.com>,
        Stephane Eranian <eranian@...gle.com>,
        David Carrillo-Cisneros <davidcc@...gle.com>
Subject: [PATCH v3 32/46] perf/core: Add PERF_EV_CAP_READ_ANY_{CPU_,}PKG flags

Introduce two new PERF_EV_CAP_READ capabilities to save unnecessary IPIs.
Since PMU hw keeps track of rmids at all times, both capabilities in this
patch allow to read events even when inactive.

These capabilities also remove the need to read the value of an event on
pmu->stop (already baked in in previous patches).

Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
---
 include/linux/perf_event.h | 16 +++++++--
 kernel/events/core.c       | 84 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9120640..72fe105 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -510,13 +510,23 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
 
 /*
  * Event capabilities. For event_caps and groups caps.
+ * Only one of the PERF_EV_CAP_READ_* can be set at a time.
  *
- * PERF_EV_CAP_SOFTWARE: Is a software event.
- * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
- * from any CPU in the package where it is active.
+ * PERF_EV_CAP_SOFTWARE: A software event.
+ *
+ * PERF_EV_CAP_READ_ACTIVE_PKG: An event readable from any CPU in the
+ * package where it is active.
+ *
+ * PERF_EV_CAP_READ_ANY_CPU_PKG: A CPU (or cgroup) event readable from any
+ * CPU in its event->cpu's package, even if inactive.
+ *
+ * PERF_EV_CAP_READ_ANY_PKG: An event readable from any CPU in any package,
+ * even if inactive.
  */
 #define PERF_EV_CAP_SOFTWARE		BIT(0)
 #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
+#define PERF_EV_CAP_READ_ANY_CPU_PKG	BIT(2)
+#define PERF_EV_CAP_READ_ANY_PKG	BIT(3)
 
 #define SWEVENT_HLIST_BITS		8
 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 059e5bb..77afd68 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3432,22 +3432,55 @@ static void perf_event_enable_on_exec(int ctxn)
 struct perf_read_data {
 	struct perf_event *event;
 	bool group;
+	bool read_inactive;
 	int ret;
 };
 
-static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+static int find_cpu_to_read(struct perf_event *event, bool *read_inactive)
 {
-	int event_cpu = event->oncpu;
+	bool active = event->state == PERF_EVENT_STATE_ACTIVE;
+	int local_cpu, event_cpu = active ? event->oncpu : event->cpu;
 	u16 local_pkg, event_pkg;
 
+	/* Do not read if event is neither Active nor Inactive. */
+	if (event->state <= PERF_EVENT_STATE_OFF) {
+		*read_inactive = false;
+		return -1;
+	}
+
+	local_cpu = get_cpu();
+	if (event->group_caps & PERF_EV_CAP_READ_ANY_PKG) {
+		*read_inactive = true;
+		event_cpu = local_cpu;
+		goto exit;
+	}
+
+	/* Neither Active nor CPU or cgroup event. */
+	if (event_cpu < 0) {
+		*read_inactive = false;
+		goto exit;
+	}
+
+	*read_inactive = event->group_caps & PERF_EV_CAP_READ_ANY_CPU_PKG;
+	if (!active && !*read_inactive)
+		goto exit;
+
+	/* Could be Inactive and have PERF_EV_CAP_READ_INACTIVE_CPU_PKG. */
 	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
 		event_pkg =  topology_physical_package_id(event_cpu);
 		local_pkg =  topology_physical_package_id(local_cpu);
 
 		if (event_pkg == local_pkg)
-			return local_cpu;
+			event_cpu = local_cpu;
 	}
 
+exit:
+	/*
+	 *  __perf_event_read tolerates change of local cpu.
+	 * There is no need to keep CPU pinned.
+	 */
+	put_cpu();
+
 	return event_cpu;
 }
 
@@ -3461,15 +3494,16 @@ static void __perf_event_read(void *info)
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct pmu *pmu = event->pmu;
+	bool active, read_inactive = data->read_inactive;
 
 	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu.  If not it has been
-	 * scheduled out before the smp call arrived.  In that case
-	 * event->count would have been updated to a recent sample
-	 * when the event was scheduled out.
+	 * If this is a task context and !read_inactive, we need to check
+	 * whether it is the current task context of this cpu.
+	 * If not it has been scheduled out before the smp call arrived.
+	 * In that case event->count would have been updated to a recent
+	 * sample when the event was scheduled out.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
+	if (ctx->task && cpuctx->task_ctx != ctx && !read_inactive)
 		return;
 
 	raw_spin_lock(&ctx->lock);
@@ -3480,7 +3514,13 @@ static void __perf_event_read(void *info)
 	}
 
 	update_event_times(event);
-	if (event->state != PERF_EVENT_STATE_ACTIVE)
+
+	if (event->state <= PERF_EVENT_STATE_OFF)
+		goto unlock;
+
+	/* If event->state > Off, then it's either Active or Inactive. */
+	active = event->state == PERF_EVENT_STATE_ACTIVE;
+	if (!active && !read_inactive)
 		goto unlock;
 
 	if (!data->group) {
@@ -3496,7 +3536,12 @@ static void __perf_event_read(void *info)
 
 	list_for_each_entry(sub, &event->sibling_list, group_entry) {
 		update_event_times(sub);
-		if (sub->state == PERF_EVENT_STATE_ACTIVE) {
+		/*
+		 * Since leader is Active, siblings are either Active or
+		 * Inactive.
+		 */
+		active = sub->state == PERF_EVENT_STATE_ACTIVE;
+		if (active || read_inactive) {
 			/*
 			 * Use sibling's PMU rather than @event's since
 			 * sibling could be on different (eg: software) PMU.
@@ -3567,23 +3612,18 @@ u64 perf_event_read_local(struct perf_event *event)
 
 static int perf_event_read(struct perf_event *event, bool group)
 {
-	int ret = 0, cpu_to_read, local_cpu;
+	bool read_inactive;
+	int ret = 0, cpu_to_read;
 
-	/*
-	 * If event is enabled and currently active on a CPU, update the
-	 * value in the event structure:
-	 */
-	if (event->state == PERF_EVENT_STATE_ACTIVE) {
+	cpu_to_read = find_cpu_to_read(event, &read_inactive);
+
+	if (cpu_to_read >= 0) {
 		struct perf_read_data data = {
 			.event = event,
 			.group = group,
+			.read_inactive = read_inactive,
 			.ret = 0,
 		};
-
-		local_cpu = get_cpu();
-		cpu_to_read = find_cpu_to_read(event, local_cpu);
-		put_cpu();
-
 		/*
 		 * Purposely ignore the smp_call_function_single() return
 		 * value.
-- 
2.8.0.rc3.226.g39d4020

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ