lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170905133026.13689-7-alexander.shishkin@linux.intel.com>
Date:   Tue,  5 Sep 2017 16:30:15 +0300
From:   Alexander Shishkin <alexander.shishkin@...ux.intel.com>
To:     Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc:     Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
        acme@...hat.com, kirill.shutemov@...ux.intel.com,
        Borislav Petkov <bp@...en8.de>, rric@...nel.org,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Subject: [RFC PATCH 06/17] perf: Add buffers to the detached events

Detached events make much more sense with ring buffers, which the user
can mmap and read a snapshot of. Unlike the normal perf events, these
ring buffers are allocated by the perf syscall, the sizes of data and
aux areas are specified in the event attribute.

These ring buffers can be mmapped read-only.

Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
---
 include/uapi/linux/perf_event.h |  3 +++
 kernel/events/core.c            | 19 ++++++++++++++++
 kernel/events/internal.h        |  2 ++
 kernel/events/ring_buffer.c     | 50 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 89355584fa..3d64d9ea80 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -297,6 +297,7 @@ enum perf_event_read_format {
 					/* add: sample_stack_user */
 #define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
 #define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6	120	/* add: detached_* */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -415,6 +416,8 @@ struct perf_event_attr {
 	__u32	aux_watermark;
 	__u16	sample_max_stack;
 	__u16	__reserved_2;	/* align to __u64 */
+	__u32	detached_nr_pages;
+	__u32	detached_aux_nr_pages;
 };
 
 #define perf_flags(attr)	(*(&(attr)->read_format + 1))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 320070410d..fef1f97974 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4185,6 +4185,9 @@ static void _free_event(struct perf_event *event)
 		tracefs_remove(event->dent);
 
 		event->attach_state &= ~PERF_ATTACH_DETACHED;
+
+		ring_buffer_unaccount(event->rb, false);
+		rb_free_detached(event->rb, event);
 	}
 
 	if (event->rb) {
@@ -5012,6 +5015,10 @@ static int perf_mmap_fault(struct vm_fault *vmf)
 	int ret = VM_FAULT_SIGBUS;
 
 	if (vmf->flags & FAULT_FLAG_MKWRITE) {
+		/* detached events R/O only */
+		if (event->dent)
+			return ret;
+
 		if (vmf->pgoff == 0)
 			ret = 0;
 		return ret;
@@ -9420,6 +9427,7 @@ static int perf_event_detach(struct perf_event *event, struct task_struct *task,
 			     struct mm_struct *mm)
 {
 	char *filename;
+	int err;
 
 	filename = kasprintf(GFP_KERNEL, "%s:%x.event",
 			     task ? "task" : "cpu",
@@ -9435,6 +9443,13 @@ static int perf_event_detach(struct perf_event *event, struct task_struct *task,
 	if (!event->dent)
 		return -ENOMEM;
 
+	err = rb_alloc_detached(event);
+	if (err) {
+		tracefs_remove(event->dent);
+		event->dent = NULL;
+		return err;
+	}
+
 	return 0;
 }
 /*
@@ -10017,6 +10032,9 @@ SYSCALL_DEFINE5(perf_event_open,
 		if (output_event || (group_fd != -1))
 			goto err_task;
 
+		if (!attr.detached_nr_pages)
+			goto err_task;
+
 		detached = 1;
 	}
 
@@ -10174,6 +10192,7 @@ SYSCALL_DEFINE5(perf_event_open,
 			goto err_context;
 
 		atomic_long_inc(&event->refcount);
+		atomic_inc(&event->mmap_count);
 
 		event_file->private_data = event;
 	}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 59136a0e98..8e267d8faa 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -82,6 +82,8 @@ extern void perf_event_wakeup(struct perf_event *event);
 extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 			pgoff_t pgoff, int nr_pages, long watermark, int flags);
 extern void rb_free_aux(struct ring_buffer *rb);
+extern int rb_alloc_detached(struct perf_event *event);
+extern void rb_free_detached(struct ring_buffer *rb, struct perf_event *event);
 extern struct ring_buffer *ring_buffer_get(struct perf_event *event);
 extern void ring_buffer_put(struct ring_buffer *rb);
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index d36f169cae..b4d7841025 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -760,6 +760,56 @@ void rb_free_aux(struct ring_buffer *rb)
 	}
 }
 
+/*
+ * Allocate a ring_buffer for a detached event and attach it to this event.
+ * There's one ring_buffer per detached event and vice versa, so
+ * ring_buffer_attach() does not apply.
+ */
+int rb_alloc_detached(struct perf_event *event)
+{
+	int aux_nr_pages = event->attr.detached_aux_nr_pages;
+	int nr_pages = event->attr.detached_nr_pages;
+	struct ring_buffer *rb;
+	int ret, pgoff = nr_pages + 1;
+
+	/*
+	 * Use overwrite mode (!RING_BUFFER_WRITABLE) for both data and aux
+	 * areas as we don't want wakeups or interrupts.
+	 */
+	rb = rb_alloc(NULL, nr_pages, 0, event->cpu, 0);
+	if (IS_ERR(rb))
+		return PTR_ERR(rb);
+
+	ret = rb_alloc_aux(rb, event, pgoff, aux_nr_pages, 0, 0);
+	if (ret) {
+		rb_free(rb);
+		return ret;
+	}
+
+	atomic_set(&rb->mmap_count, 1);
+	if (aux_nr_pages)
+		atomic_set(&rb->aux_mmap_count, 1);
+
+	/*
+	 * Detached events don't need ring buffer wakeups, therefore we don't
+	 * use ring_buffer_attach() here and event->rb_entry stays empty.
+	 */
+	rcu_assign_pointer(event->rb, rb);
+
+	return 0;
+}
+
+void rb_free_detached(struct ring_buffer *rb, struct perf_event *event)
+{
+	/* Must be the last one */
+	WARN_ON_ONCE(atomic_read(&rb->refcount) != 1);
+
+	atomic_set(&rb->aux_mmap_count, 0);
+	rcu_assign_pointer(event->rb, NULL);
+	rb_free_aux(rb);
+	rb_free(rb);
+}
+
 #ifndef CONFIG_PERF_USE_VMALLOC
 
 /*
-- 
2.14.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ