lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu,  6 Feb 2014 12:50:25 +0200
From:	Alexander Shishkin <alexander.shishkin@...ux.intel.com>
To:	Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc:	Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
	Frederic Weisbecker <fweisbec@...il.com>,
	Mike Galbraith <efault@....de>,
	Paul Mackerras <paulus@...ba.org>,
	Stephane Eranian <eranian@...gle.com>,
	Andi Kleen <ak@...ux.intel.com>,
	Adrian Hunter <adrian.hunter@...el.com>,
	Matt Fleming <matt.fleming@...el.com>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Subject: [PATCH v1 02/11] perf: Abstract ring_buffer backing store operations

This patch extends perf's ring_buffer code so that buffers with different
backing can be allocated simultaneously with rb_alloc(). This allows the reuse
of ring_buffer code for exporting hardware-written trace buffers (such as
those of Intel PT) to userspace.

Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
---
 kernel/events/core.c        |   4 +-
 kernel/events/internal.h    |  32 +++++++-
 kernel/events/ring_buffer.c | 176 +++++++++++++++++++++++++++-----------------
 3 files changed, 143 insertions(+), 69 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 56003c6..6899741 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4105,9 +4105,9 @@ again:
 	if (vma->vm_flags & VM_WRITE)
 		flags |= RING_BUFFER_WRITABLE;
 
-	rb = rb_alloc(nr_pages, 
+	rb = rb_alloc(event, nr_pages,
 		event->attr.watermark ? event->attr.wakeup_watermark : 0,
-		event->cpu, flags);
+		event->cpu, flags, NULL);
 
 	if (!rb) {
 		ret = -ENOMEM;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 569b2187..6cb208f 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -6,6 +6,33 @@
 
 /* Buffer handling */
 
+struct ring_buffer;
+
+struct ring_buffer_ops {
+	/*
+	 * How much memory should be allocated for struct ring_buffer, taking into
+	 * account data_pages[] array.
+	 */
+	unsigned long	(*get_size)(int);
+	/*
+	 * Allocate user_page for this buffer, can be NULL, in which case it is
+	 * allocated by alloc_data_page().
+	 */
+	int		(*alloc_user_page)(struct ring_buffer *, int, int);
+	/*
+	 * Allocate data_pages for this buffer.
+	 */
+	int		(*alloc_data_page)(struct ring_buffer *, int, int, int);
+	/*
+	 * Free the buffer.
+	 */
+	void		(*free_buffer)(struct ring_buffer *);
+	/*
+	 * Get a struct page for a given page index in the buffer.
+	 */
+	struct page	*(*mmap_to_page)(struct ring_buffer *, unsigned long);
+};
+
 #define RING_BUFFER_WRITABLE		0x01
 
 struct ring_buffer {
@@ -15,6 +42,8 @@ struct ring_buffer {
 	struct work_struct		work;
 	int				page_order;	/* allocation order  */
 #endif
+	struct ring_buffer_ops		*ops;
+	struct perf_event		*event;
 	int				nr_pages;	/* nr of data pages  */
 	int				overwrite;	/* can overwrite itself */
 
@@ -41,7 +70,8 @@ struct ring_buffer {
 
 extern void rb_free(struct ring_buffer *rb);
 extern struct ring_buffer *
-rb_alloc(int nr_pages, long watermark, int cpu, int flags);
+rb_alloc(struct perf_event *event, int nr_pages, long watermark, int cpu,
+	 int flags, struct ring_buffer_ops *rb_ops);
 extern void perf_event_wakeup(struct perf_event *event);
 
 extern void
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 146a579..161a676 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -248,18 +248,6 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
  * Back perf_mmap() with regular GFP_KERNEL-0 pages.
  */
 
-struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
-{
-	if (pgoff > rb->nr_pages)
-		return NULL;
-
-	if (pgoff == 0)
-		return virt_to_page(rb->user_page);
-
-	return virt_to_page(rb->data_pages[pgoff - 1]);
-}
-
 static void *perf_mmap_alloc_page(int cpu)
 {
 	struct page *page;
@@ -273,46 +261,31 @@ static void *perf_mmap_alloc_page(int cpu)
 	return page_address(page);
 }
 
-struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
+static int perf_mmap_alloc_user_page(struct ring_buffer *rb, int cpu,
+				     int flags)
 {
-	struct ring_buffer *rb;
-	unsigned long size;
-	int i;
-
-	size = sizeof(struct ring_buffer);
-	size += nr_pages * sizeof(void *);
-
-	rb = kzalloc(size, GFP_KERNEL);
-	if (!rb)
-		goto fail;
-
 	rb->user_page = perf_mmap_alloc_page(cpu);
 	if (!rb->user_page)
-		goto fail_user_page;
-
-	for (i = 0; i < nr_pages; i++) {
-		rb->data_pages[i] = perf_mmap_alloc_page(cpu);
-		if (!rb->data_pages[i])
-			goto fail_data_pages;
-	}
+		return -ENOMEM;
 
-	rb->nr_pages = nr_pages;
-
-	ring_buffer_init(rb, watermark, flags);
+	return 0;
+}
 
-	return rb;
+static int perf_mmap_alloc_data_page(struct ring_buffer *rb, int cpu,
+				     int nr_pages, int flags)
+{
+	void *data;
 
-fail_data_pages:
-	for (i--; i >= 0; i--)
-		free_page((unsigned long)rb->data_pages[i]);
+	if (nr_pages != 1)
+		return -EINVAL;
 
-	free_page((unsigned long)rb->user_page);
+	data = perf_mmap_alloc_page(cpu);
+	if (!data)
+		return -ENOMEM;
 
-fail_user_page:
-	kfree(rb);
+	rb->data_pages[rb->nr_pages] = data;
 
-fail:
-	return NULL;
+	return 0;
 }
 
 static void perf_mmap_free_page(unsigned long addr)
@@ -323,24 +296,51 @@ static void perf_mmap_free_page(unsigned long addr)
 	__free_page(page);
 }
 
-void rb_free(struct ring_buffer *rb)
+static void perf_mmap_gfp0_free(struct ring_buffer *rb)
 {
 	int i;
 
-	perf_mmap_free_page((unsigned long)rb->user_page);
+	if (rb->user_page)
+		perf_mmap_free_page((unsigned long)rb->user_page);
 	for (i = 0; i < rb->nr_pages; i++)
 		perf_mmap_free_page((unsigned long)rb->data_pages[i]);
 	kfree(rb);
 }
 
+struct page *
+perf_mmap_gfp0_to_page(struct ring_buffer *rb, unsigned long pgoff)
+{
+	if (pgoff > rb->nr_pages)
+		return NULL;
+
+	if (pgoff == 0)
+		return virt_to_page(rb->user_page);
+
+	return virt_to_page(rb->data_pages[pgoff - 1]);
+}
+
+static unsigned long perf_mmap_gfp0_get_size(int nr_pages)
+{
+	return sizeof(struct ring_buffer) + sizeof(void *) * nr_pages;
+}
+
+struct ring_buffer_ops perf_rb_ops = {
+	.get_size		= perf_mmap_gfp0_get_size,
+	.alloc_user_page	= perf_mmap_alloc_user_page,
+	.alloc_data_page	= perf_mmap_alloc_data_page,
+	.free_buffer		= perf_mmap_gfp0_free,
+	.mmap_to_page		= perf_mmap_gfp0_to_page,
+};
+
 #else
+
 static int data_page_nr(struct ring_buffer *rb)
 {
 	return rb->nr_pages << page_order(rb);
 }
 
 struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+perf_mmap_vmalloc_to_page(struct ring_buffer *rb, unsigned long pgoff)
 {
 	/* The '>' counts in the user page. */
 	if (pgoff > data_page_nr(rb))
@@ -349,14 +349,14 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
 	return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
 }
 
-static void perf_mmap_unmark_page(void *addr)
+static void perf_mmap_vmalloc_unmark_page(void *addr)
 {
 	struct page *page = vmalloc_to_page(addr);
 
 	page->mapping = NULL;
 }
 
-static void rb_free_work(struct work_struct *work)
+static void perf_mmap_vmalloc_free_work(struct work_struct *work)
 {
 	struct ring_buffer *rb;
 	void *base;
@@ -368,50 +368,94 @@ static void rb_free_work(struct work_struct *work)
 	base = rb->user_page;
 	/* The '<=' counts in the user page. */
 	for (i = 0; i <= nr; i++)
-		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
+		perf_mmap_vmalloc_unmark_page(base + (i * PAGE_SIZE));
 
 	vfree(base);
 	kfree(rb);
 }
 
-void rb_free(struct ring_buffer *rb)
+static void perf_mmap_vmalloc_free(struct ring_buffer *rb)
 {
 	schedule_work(&rb->work);
 }
 
-struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
+static int perf_mmap_vmalloc_data_pages(struct ring_buffer *rb, int cpu,
+					int nr_pages, int flags)
 {
-	struct ring_buffer *rb;
-	unsigned long size;
 	void *all_buf;
 
-	size = sizeof(struct ring_buffer);
-	size += sizeof(void *);
-
-	rb = kzalloc(size, GFP_KERNEL);
-	if (!rb)
-		goto fail;
-
-	INIT_WORK(&rb->work, rb_free_work);
+	INIT_WORK(&rb->work, perf_mmap_vmalloc_free_work);
 
 	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
 	if (!all_buf)
-		goto fail_all_buf;
+		return -ENOMEM;
 
 	rb->user_page = all_buf;
 	rb->data_pages[0] = all_buf + PAGE_SIZE;
 	rb->page_order = ilog2(nr_pages);
 	rb->nr_pages = !!nr_pages;
 
+	return 0;
+}
+
+static unsigned long perf_mmap_vmalloc_get_size(int nr_pages)
+{
+	return sizeof(struct ring_buffer) + sizeof(void *);
+}
+
+struct ring_buffer_ops perf_rb_ops = {
+	.get_size		= perf_mmap_vmalloc_get_size,
+	.alloc_data_page	= perf_mmap_vmalloc_data_pages,
+	.free_buffer		= perf_mmap_vmalloc_free,
+	.mmap_to_page		= perf_mmap_vmalloc_to_page,
+};
+
+#endif
+
+struct ring_buffer *rb_alloc(struct perf_event *event, int nr_pages,
+			     long watermark, int cpu, int flags,
+			     struct ring_buffer_ops *rb_ops)
+{
+	struct ring_buffer *rb;
+	int i;
+
+	if (!rb_ops)
+		rb_ops = &perf_rb_ops;
+
+	rb = kzalloc(rb_ops->get_size(nr_pages), GFP_KERNEL);
+	if (!rb)
+		return NULL;
+
+	rb->event = event;
+	rb->ops = rb_ops;
+	if (rb->ops->alloc_user_page) {
+		if (rb->ops->alloc_user_page(rb, cpu, flags))
+			goto fail;
+
+		for (i = 0; i < nr_pages; i++, rb->nr_pages++)
+			if (rb->ops->alloc_data_page(rb, cpu, 1, flags))
+				goto fail;
+	} else {
+		if (rb->ops->alloc_data_page(rb, cpu, nr_pages, flags))
+			goto fail;
+	}
+
 	ring_buffer_init(rb, watermark, flags);
 
 	return rb;
 
-fail_all_buf:
-	kfree(rb);
-
 fail:
+	rb->ops->free_buffer(rb);
 	return NULL;
 }
 
-#endif
+void rb_free(struct ring_buffer *rb)
+{
+	rb->ops->free_buffer(rb);
+}
+
+struct page *
+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+{
+	return rb->ops->mmap_to_page(rb, pgoff);
+}
-- 
1.8.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists