linux-kernel - Re: [GIT PULL 0/8] perf/pt -> Intel PT/BTS

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <87pp49nwi7.fsf@ashishki-desk.ger.corp.intel.com>
Date:	Fri, 03 Jul 2015 17:31:12 +0300
From:	Alexander Shishkin <alexander.shishkin@...ux.intel.com>
To:	Ingo Molnar <mingo@...nel.org>,
	Adrian Hunter <adrian.hunter@...el.com>
Cc:	Arnaldo Carvalho de Melo <acme@...nel.org>,
	linux-kernel@...r.kernel.org, Jiri Olsa <jolsa@...hat.com>,
	David Ahern <dsahern@...il.com>,
	Namhyung Kim <namhyung@...nel.org>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Stephane Eranian <eranian@...gle.com>,
	Arnaldo Carvalho de Melo <acme@...hat.com>
Subject: Re: [GIT PULL 0/8] perf/pt -> Intel PT/BTS

Ingo Molnar <mingo@...nel.org> writes:

> So I really think we need an extended error reporting feature on the perf kernel 
> side, so that a 'natural' error (plus a string) is reported back to tooling, 
> instead of the current -EINVAL.
>
> No need to do it for everything, doing it for BTS and related functionality would 
> be a good first step to start this.
>
> If you are interested you could try this, or I can try to write something (after 
> the merge window).
>
> So the idea would be to convert such opaque error returns:
>
>         if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
>             !attr->freq && hwc->sample_period == 1) {
>                 /* BTS is not supported by this architecture. */
>                 if (!x86_pmu.bts_active)
>                         return -EOPNOTSUPP;
>
> into:
>
> 			return perf_err(event, -EOPNOTSUPP, "The BTS hardware feature is not available on this CPU.");

So I poked around this a bit and came up with the patch below to give
this topic some more momentum.

Your average error will then be like this:

#define PERF_MODNAME "perf/x86"

...

               /* BTS is currently only allowed for user-mode. */
               if (!attr->exclude_kernel)
                       return perf_err(event, -EOPNOTSUPP,
                                       "BTS sampling not allowed for kernel space");

which in userspace will translate into:

---cut---
$ perf record -e branches:uk -c1 ls
kernel says: {
        "code": -95,
        "module": "perf/x86",
        "message": "BTS sampling not allowed for kernel space"
}

Error:
No hardware sampling interrupt available.
No APIC? If so then you can boot the kernel with the "lapic" boot parameter to force-enable it.
---cut---

The way I hacked it into tools/perf, it still prints out the old error
message (which, to prove the point once again, is neither here nor
there).

The patch below attaches the error to struct perf_event or copies it
directly to user's buffer in case if we don't have an event at the point
of error. Maybe a slightly easier way of doing it would be to strap it
to task_struct instead, but let's not stir the pot too much this time
around.

Also, to add an extra spin, the report is formatted as a JSON object so
that we could include both human-readable and machine-readable bits.

>From a7de169ef2ca5425cd57286bfb61bfd5f8d15738 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Date: Fri, 3 Jul 2015 17:12:54 +0300
Subject: [PATCH] perf: Introduce extended syscall error reporting

It has been pointed several times out that perf syscall error reporting
leaves a lot to be desired [1].

This patch introduces a fairly simple extension that allows call sites
to annotate their error codes with arbitrary strings, which will then
be copied to userspace (if they asked for it) along with the module
name that produced the error message in JSON format. This way, we can
provide both human-readable and machine-parsable information to user and
leave room for extensions in the future.

[1] http://marc.info/?l=linux-kernel&m=141470811013082

Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
---
 include/linux/perf_event.h      | 38 ++++++++++++++++++++++++
 include/uapi/linux/perf_event.h |  8 ++++-
 kernel/events/core.c            | 66 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1b82d44b0a..15bbef478f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -56,6 +56,37 @@ struct perf_guest_info_callbacks {
 #include <linux/cgroup.h>
 #include <asm/local.h>
 
+#ifndef PERF_MODNAME
+#define PERF_MODNAME KBUILD_MODNAME
+#endif
+
+/*
+ * Extended error reporting: annotate an error code with a string
+ * and a module name to help users diagnase problems with their
+ * attributes and whatnot.
+ */
+struct perf_err_site {
+	const char		*message;
+	const char		*owner;
+	const int		code;
+};
+
+#ifdef CONFIG_PERF_EVENTS
+
+#define __perf_err(__e, __c, __m) ({			\
+	static struct perf_err_site __err_site = {	\
+		.message	= (__m),		\
+		.owner		= PERF_MODNAME,		\
+		.code		= (__c),		\
+	};						\
+	(__e) = &__err_site;				\
+	(__c);						\
+})
+
+#define perf_err(__evt, __c, __m) ({ __perf_err((__evt)->error, (__c), (__m)); })
+
+#endif
+
 struct perf_callchain_entry {
 	__u64				nr;
 	__u64				ip[PERF_MAX_STACK_DEPTH];
@@ -447,6 +478,13 @@ struct perf_event {
 	struct list_head		owner_entry;
 	struct task_struct		*owner;
 
+	/*
+	 * Extended error reporting
+	 */
+	const struct perf_err_site	*error;
+	void __user			*error_buffer;
+	size_t				error_buffer_size;
+
 	/* mmap bits */
 	struct mutex			mmap_mutex;
 	atomic_t			mmap_count;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d97f84c080..d1ae1a079c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -264,6 +264,7 @@ enum perf_event_read_format {
 					/* add: sample_stack_user */
 #define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
 #define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6	120	/* add: perf_err */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -374,7 +375,12 @@ struct perf_event_attr {
 	 * Wakeup watermark for AUX area
 	 */
 	__u32	aux_watermark;
-	__u32	__reserved_2;	/* align to __u64 */
+
+	/*
+	 * Extended error reporting buffer
+	 */
+	__u32	perf_err_size;
+	__u64	perf_err;
 };
 
 #define perf_flags(attr)	(*(&(attr)->read_format + 1))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8e13f3e54e..fd345c96d1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,6 +49,60 @@
 
 #include <asm/irq_regs.h>
 
+static bool extended_reporting_enabled(struct perf_event_attr *attr)
+{
+	if (attr->size >= PERF_ATTR_SIZE_VER6 &&
+	    attr->perf_err_size > 0)
+		return true;
+
+	return false;
+}
+
+/*
+ * Provide a JSON formatted error report to the user if they asked for it.
+ */
+static void __perf_error_report(struct perf_event_attr *attr,
+				const struct perf_err_site *err_site)
+{
+	void *buffer;
+
+	if (!err_site || !extended_reporting_enabled(attr))
+		return;
+
+	buffer = kasprintf(GFP_KERNEL,
+			   "{\n"
+			   "\t\"code\": %d,\n"
+			   "\t\"module\": \"%s\",\n"
+			   "\t\"message\": \"%s\"\n"
+			   "}\n",
+			   err_site->code, err_site->owner, err_site->message);
+	if (!buffer)
+		return;
+
+	(void)copy_to_user((void __user *)attr->perf_err, buffer,
+			   attr->perf_err_size);
+	kfree(buffer);
+}
+
+/*
+ * Synchronous version of perf_err(), for the paths where we don't have
+ * an event.
+ */
+#define perf_err_attr(__attr, __c, __m) ({		\
+	struct perf_err_site *__site;			\
+	__perf_err(__site, (__c), (__m));		\
+	__perf_error_report(__attr, __site);		\
+	(__c);						\
+})
+
+/*
+ * Report an error before returning from a syscall
+ */
+static void perf_error_report(struct perf_event *event)
+{
+	__perf_error_report(&event->attr, event->error);
+}
+
 static struct workqueue_struct *perf_wq;
 
 typedef int (*remote_function_f)(void *);
@@ -3594,6 +3648,8 @@ static void _free_event(struct perf_event *event)
  */
 static void free_event(struct perf_event *event)
 {
+	perf_error_report(event);
+
 	if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
 				"unexpected event refcount: %ld; ptr=%p\n",
 				atomic_long_read(&event->refcount), event)) {
@@ -3661,6 +3717,8 @@ static void put_event(struct perf_event *event)
 	if (!atomic_long_dec_and_test(&event->refcount))
 		return;
 
+	perf_error_report(event);
+
 	if (!is_kernel_event(event))
 		perf_remove_from_owner(event);
 
@@ -7634,6 +7692,7 @@ err_ns:
 		perf_detach_cgroup(event);
 	if (event->ns)
 		put_pid_ns(event->ns);
+	perf_error_report(event);
 	kfree(event);
 
 	return ERR_PTR(err);
@@ -7912,15 +7971,16 @@ SYSCALL_DEFINE5(perf_event_open,
 
 	if (!attr.exclude_kernel) {
 		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
+			return perf_err_attr(&attr, -EACCES,
+					     "kernel tracing forbidden for the unprivileged");
 	}
 
 	if (attr.freq) {
 		if (attr.sample_freq > sysctl_perf_event_sample_rate)
-			return -EINVAL;
+			return perf_err_attr(&attr, -EINVAL, "sample_freq too high");
 	} else {
 		if (attr.sample_period & (1ULL << 63))
-			return -EINVAL;
+			return perf_err_attr(&attr, -EINVAL, "sample_period too high");
 	}
 
 	/*
-- 
2.1.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/