lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1437738359-23920-2-git-send-email-alexander.shishkin@linux.intel.com>
Date:	Fri, 24 Jul 2015 14:45:56 +0300
From:	Alexander Shishkin <alexander.shishkin@...ux.intel.com>
To:	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Ingo Molnar <mingo@...hat.com>
Cc:	linux-kernel@...r.kernel.org, adrian.hunter@...el.com,
	Arnaldo Carvalho de Melo <acme@...radead.org>,
	Vince Weaver <vince@...ter.net>,
	Stephane Eranian <eranian@...gle.com>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Subject: [PATCH RFC v1 1/4] perf: Introduce extended syscall error reporting

It has been pointed several times out that perf syscall error reporting
leaves a lot to be desired [1].

This patch introduces a fairly simple extension that allows call sites
to annotate their error codes with arbitrary strings, which will then
be copied to userspace (if they asked for it) along with the module
name that produced the error message in JSON format. This way, we can
provide both human-readable and machine-parsable information to user and
leave room for extensions in the future, such as file name and line
number if kernel debugging is enabled.

Each error "site" is referred to by its index, which is folded into an
integer error value within the range of [-PERF_ERRNO, -MAX_ERRNO], where
PERF_ERRNO is chosen to be below any known error codes, but still leaving
enough room to enumerate error sites. This way, all the traditional macros
will still handle these as error codes and we'd only have to convert them
to their original values right before returning to userspace. This way we
also don't have to worry about keeping a separate pointer to the error
message inside a perf_event.

[1] http://marc.info/?l=linux-kernel&m=141470811013082

NYet-Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
---
 include/linux/perf_event.h      | 76 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/perf_event.h |  8 +++-
 kernel/events/core.c            | 81 ++++++++++++++++++++++++++++++++++++++---
 3 files changed, 159 insertions(+), 6 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2027809433..9e9af962f6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -56,6 +56,82 @@ struct perf_guest_info_callbacks {
 #include <linux/cgroup.h>
 #include <asm/local.h>
 
+#ifndef PERF_MODNAME
+#define PERF_MODNAME "perf"
+#endif
+
+/*
+ * Extended error reporting: annotate an error code with a string
+ * and a module name to help users diagnase problems with their
+ * attributes and whatnot.
+ */
+struct perf_err_site {
+	const char		*message;
+	const char		*owner;
+	const int		code;
+};
+
+#ifdef CONFIG_PERF_EVENTS
+
+/*
+ * Place all occurrences of struct perf_err_site into a special section,
+ * so that we can find out their offsets, which we'll use to refer back
+ * to the error sites.
+ */
+extern const struct perf_err_site __start___perf_err[], __stop___perf_err[];
+
+#define __perf_err(__e, __c, __m) ({				\
+	static struct perf_err_site				\
+	__attribute__ ((unused,__section__("__perf_err")))	\
+	__err_site = {						\
+		.message	= (__m),			\
+		.owner		= PERF_MODNAME,			\
+		.code		= __builtin_constant_p((__c)) ?	\
+		(__c) : 0,					\
+	};							\
+	(__e) = &__err_site;					\
+})
+
+/*
+ * Use part of the [-1, -MAX_ERRNO] errno range for perf's extended error
+ * reporting. Anything within [-PERF_ERRNO, -MAX_ERRNO] is an index of a
+ * perf_err_site structure within __perf_err section. 3.5k should be enough
+ * for everybody, but let's add a boot-time warning just in case it overflows
+ * one day.
+ */
+#define PERF_ERRNO 512
+
+static inline int perf_errno(const struct perf_err_site *site)
+{
+	unsigned long err = site - __start___perf_err;
+
+	trace_printk("[%ld] %s:%d, %d\n", err, site->file, site->line, site->code);
+	return -(int)err - PERF_ERRNO;
+}
+
+static inline const struct perf_err_site *perf_errno_to_site(int err)
+{
+	return __start___perf_err - err - PERF_ERRNO;
+}
+
+#ifdef MODULE
+/*
+ * Module support is a tad trickier, but far from rocket surgery. Let's
+ * bypass it for now.
+ */
+#define perf_err(__c, __m) (__c)
+#else
+#define perf_err(__c, __m) ({					\
+	struct perf_err_site *s;				\
+	__perf_err(s, (__c), (__m));				\
+	perf_errno(s);						\
+})
+#endif
+
+#define PERF_ERR_PTR(__e, __m)	(ERR_PTR(perf_err(__e, __m)))
+
+#endif
+
 struct perf_callchain_entry {
 	__u64				nr;
 	__u64				ip[PERF_MAX_STACK_DEPTH];
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d97f84c080..d1ae1a079c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -264,6 +264,7 @@ enum perf_event_read_format {
 					/* add: sample_stack_user */
 #define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
 #define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6	120	/* add: perf_err */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -374,7 +375,12 @@ struct perf_event_attr {
 	 * Wakeup watermark for AUX area
 	 */
 	__u32	aux_watermark;
-	__u32	__reserved_2;	/* align to __u64 */
+
+	/*
+	 * Extended error reporting buffer
+	 */
+	__u32	perf_err_size;
+	__u64	perf_err;
 };
 
 #define perf_flags(attr)	(*(&(attr)->read_format + 1))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d3dae3419b..85bcf3a5f9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,6 +49,74 @@
 
 #include <asm/irq_regs.h>
 
+static bool extended_reporting_enabled(struct perf_event_attr *attr)
+{
+	if (attr->size >= PERF_ATTR_SIZE_VER6 &&
+	    attr->perf_err_size > 0)
+		return true;
+
+	return false;
+}
+
+/*
+ * Provide a JSON formatted error report to the user if they asked for it.
+ */
+static void perf_error_report_site(struct perf_event_attr *attr,
+				   const struct perf_err_site *site)
+{
+	void *buffer;
+
+	if (!site || !extended_reporting_enabled(attr))
+		return;
+
+	/* in case of nested perf_err()s, which you shouldn't really do */
+	while (site->code <= -PERF_ERRNO)
+		site = perf_errno_to_site(site->code);
+
+	buffer = kasprintf(GFP_KERNEL,
+			   "{\n"
+			   "\t\"code\": %d,\n"
+			   "\t\"module\": \"%s\",\n"
+			   "\t\"message\": \"%s\"\n"
+			   "}\n",
+			   site->code, site->owner, site->message
+			   );
+	if (!buffer)
+		return;
+
+	if (copy_to_user((void __user *)attr->perf_err, buffer,
+			 attr->perf_err_size)) {
+		/* if we failed to copy once, don't bother later */
+		attr->perf_err_size = 0;
+	}
+
+	kfree(buffer);
+}
+
+/*
+ * Synchronous version of perf_err(), for the paths where we return immediately
+ * back to userspace.
+ */
+#define perf_err_sync(__attr, __c, __m) ({		\
+	struct perf_err_site *__site;			\
+	__perf_err(__site, (__c), (__m));		\
+	perf_error_report_site(__attr, __site);		\
+	(__c);						\
+})
+
+static int perf_error_report(struct perf_event_attr *attr, int err)
+{
+	const struct perf_err_site *site;
+
+	if (err > -PERF_ERRNO)
+		return err;
+
+	site = perf_errno_to_site(err);
+	perf_error_report_site(attr, site);
+
+	return site->code;
+}
+
 static struct workqueue_struct *perf_wq;
 
 typedef int (*remote_function_f)(void *);
@@ -3890,7 +3958,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	ret = perf_read_hw(event, buf, count);
 	perf_event_ctx_unlock(event, ctx);
 
-	return ret;
+	return perf_error_report(&event->attr, ret);
 }
 
 static unsigned int perf_poll(struct file *file, poll_table *wait)
@@ -4103,7 +4171,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	ret = _perf_ioctl(event, cmd, arg);
 	perf_event_ctx_unlock(event, ctx);
 
-	return ret;
+	return perf_error_report(&event->attr, ret);
 }
 
 #ifdef CONFIG_COMPAT
@@ -4703,7 +4771,7 @@ aux_unlock:
 	if (event->pmu->event_mapped)
 		event->pmu->event_mapped(event);
 
-	return ret;
+	return perf_error_report(&event->attr, ret);
 }
 
 static int perf_fasync(int fd, struct file *filp, int on)
@@ -4717,7 +4785,7 @@ static int perf_fasync(int fd, struct file *filp, int on)
 	mutex_unlock(&inode->i_mutex);
 
 	if (retval < 0)
-		return retval;
+		return perf_error_report(&event->attr, retval);
 
 	return 0;
 }
@@ -8208,7 +8276,7 @@ err_group_fd:
 	fdput(group);
 err_fd:
 	put_unused_fd(event_fd);
-	return err;
+	return perf_error_report(&attr, err);
 }
 
 /**
@@ -8986,6 +9054,9 @@ void __init perf_event_init(void)
 	 */
 	BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
 		     != 1024);
+
+	/* Too many error sites; see the comment at PERF_ERRNO definition */
+	WARN_ON(__stop___perf_err - __start___perf_err > MAX_ERRNO - PERF_ERRNO);
 }
 
 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ