[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1224398496.5303.7.camel@koto.keithp.com>
Date:	Sat, 18 Oct 2008 23:41:36 -0700
From:	Keith Packard <keithp@...thp.com>
To:	Ingo Molnar <mingo@...e.hu>
Cc:	keithp@...thp.com, Linus Torvalds <torvalds@...ux-foundation.org>,
	Nick Piggin <nickpiggin@...oo.com.au>,
	Dave Airlie <airlied@...ux.ie>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	dri-devel@...ts.sf.net, Andrew Morton <akpm@...ux-foundation.org>,
	Yinghai Lu <yinghai@...nel.org>
Subject: Re: [git pull] drm patches for 2.6.27-rc1
On Sat, 2008-10-18 at 21:14 -0700, Keith Packard wrote:
> On Sun, 2008-10-19 at 00:32 +0200, Ingo Molnar wrote:
> 
> > Mind sending patches for this? :-)
Here's a patch for the i915 driver that includes the new API. Tested on
x86_32+HIGHMEM and x86_64. I stuck a new 'io_reserve.h' header in the
i915 directory for this patch, but it should go elsewhere.
The new APIs are:
io_reserve_create_wc
io_reserve_free
io_reserve_map_atomic_wc
io_reserve_unmap_atomic
io_reserve_map_wc
io_reserve_unmap
I added the non-atomic variants at Eric's suggestion so that we can use
the direct map on x86_64, avoiding any use of ioremap at run-time. I
think the resulting code looks quite a bit cleaner now. Also, one
benchmark I tried ran about 18 times faster in 64-bit mode than the
original code.
From 2f6b125cb586a0671a2b9c22aadb03fcafdf99ab Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@...a.keithp.com>
Date: Sat, 18 Oct 2008 22:59:58 -0700
Subject: [PATCH] [drm/i915] Create new 'io_reserve' API for mapping GTT pages
The io_reserve API abstracts away the operations necessary to construct
mappings for our GTT aperture, providing atomic and non-atomic mappings for
GTT pages that work efficiently on x86_64 and x86_32+HIGHMEM configurations.
This eliminates the in-drive abuse of the kmap_atomic_pfn function as well
as improving GEM performance on x86_64 architecture.
Signed-off-by: Keith Packard <keithp@...a.keithp.com>
---
 drivers/gpu/drm/i915/i915_drv.h   |    3 +
 drivers/gpu/drm/i915/i915_gem.c   |   71 ++++++++++++----------
 drivers/gpu/drm/i915/i915_irq.c   |    4 +-
 drivers/gpu/drm/i915/io_reserve.h |  122 +++++++++++++++++++++++++++++++++++++
 4 files changed, 165 insertions(+), 35 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/io_reserve.h
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f20ffe1..c99b9ea 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -31,6 +31,7 @@
 #define _I915_DRV_H_
 
 #include "i915_reg.h"
+#include "io_reserve.h"
 
 /* General customization:
  */
@@ -246,6 +247,8 @@ typedef struct drm_i915_private {
 	struct {
 		struct drm_mm gtt_space;
 
+		struct io_reserve *io_reserve;
+
 		/**
 		 * List of objects currently involved in rendering from the
 		 * ringbuffer.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9255088..cd9e489 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -177,14 +177,14 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 		    struct drm_file *file_priv)
 {
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	ssize_t remain;
-	loff_t offset;
+	loff_t offset, base;
 	char __user *user_data;
 	char __iomem *vaddr;
 	char *vaddr_atomic;
-	int i, o, l;
+	int o, l;
 	int ret = 0;
-	unsigned long pfn;
 	unsigned long unwritten;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
@@ -211,42 +211,41 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	while (remain > 0) {
 		/* Operation in this page
 		 *
-		 * i = page number
+		 * base = page offset within aperture
 		 * o = offset within page
 		 * l = bytes to copy
 		 */
-		i = offset >> PAGE_SHIFT;
+		base = (offset & ~(PAGE_SIZE-1));
 		o = offset & (PAGE_SIZE-1);
 		l = remain;
 		if ((o + l) > PAGE_SIZE)
 			l = PAGE_SIZE - o;
 
-		pfn = (dev->agp->base >> PAGE_SHIFT) + i;
-
-#ifdef CONFIG_HIGHMEM
 		/* This is a workaround for the low performance of iounmap
 		 * (approximate 10% cpu cost on normal 3D workloads).
-		 * kmap_atomic on HIGHMEM kernels happens to let us map card
-		 * memory without taking IPIs.  When the vmap rework lands
-		 * we should be able to dump this hack.
+		 * io_reserve_map_atomic_wc maps card memory
+		 * without taking IPIs.
+		 */
+		vaddr_atomic = io_reserve_map_atomic_wc(dev_priv->mm.io_reserve,
+							base);
+		if (vaddr_atomic) {
+			unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
+								      user_data, l);
+			io_reserve_unmap_atomic(vaddr_atomic);
+		} else
+			unwritten = l;
+			
+		/* If we get a fault while copying data, then (presumably) our
+		 * source page isn't available. In this case, use the
+		 * non-atomic __copy_from_user function
 		 */
-		vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
-#if WATCH_PWRITE
-		DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
-			 i, o, l, pfn, vaddr_atomic);
-#endif
-		unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
-							      user_data, l);
-		kunmap_atomic(vaddr_atomic, KM_USER0);
-
 		if (unwritten)
-#endif /* CONFIG_HIGHMEM */
 		{
-			vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
+			vaddr = io_reserve_map_wc(dev_priv->mm.io_reserve, base);
 #if WATCH_PWRITE
-			DRM_INFO("pwrite slow i %d o %d l %d "
-				 "pfn %ld vaddr %p\n",
-				 i, o, l, pfn, vaddr);
+			DRM_INFO("pwrite slow base %ld o %d l %d "
+				 "vaddr %p\n",
+				 base, o, l, vaddr);
 #endif
 			if (vaddr == NULL) {
 				ret = -EFAULT;
@@ -256,7 +255,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 #if WATCH_PWRITE
 			DRM_INFO("unwritten %ld\n", unwritten);
 #endif
-			iounmap(vaddr);
+			io_reserve_unmap(vaddr);
 			if (unwritten) {
 				ret = -EFAULT;
 				goto fail;
@@ -1489,6 +1488,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 				 struct drm_i915_gem_exec_object *entry)
 {
 	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_relocation_entry reloc;
 	struct drm_i915_gem_relocation_entry __user *relocs;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1621,12 +1621,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 		    (last_reloc_offset & ~(PAGE_SIZE - 1)) !=
 		    (reloc_offset & ~(PAGE_SIZE - 1))) {
 			if (reloc_page != NULL)
-				iounmap(reloc_page);
+				io_reserve_unmap(reloc_page);
 
-			reloc_page = ioremap_wc(dev->agp->base +
-						(reloc_offset &
-						 ~(PAGE_SIZE - 1)),
-						PAGE_SIZE);
+			reloc_page = io_reserve_map_wc(dev_priv->mm.io_reserve,
+						       (reloc_offset & 
+							~(PAGE_SIZE - 1)));
 			last_reloc_offset = reloc_offset;
 			if (reloc_page == NULL) {
 				drm_gem_object_unreference(target_obj);
@@ -1636,7 +1635,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 		}
 
 		reloc_entry = (uint32_t __iomem *)(reloc_page +
-					   (reloc_offset & (PAGE_SIZE - 1)));
+						   (reloc_offset & (PAGE_SIZE - 1)));
 		reloc_val = target_obj_priv->gtt_offset + reloc.delta;
 
 #if WATCH_BUF
@@ -1661,7 +1660,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 	}
 
 	if (reloc_page != NULL)
-		iounmap(reloc_page);
+		io_reserve_unmap(reloc_page);
 
 #if WATCH_BUF
 	if (0)
@@ -2504,6 +2503,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	if (ret != 0)
 		return ret;
 
+	dev_priv->mm.io_reserve = io_reserve_create_wc(dev->agp->base,
+						       dev->agp->agp_info.aper_size
+						       * 1024 * 1024);
+
 	mutex_lock(&dev->struct_mutex);
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
 	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
@@ -2521,11 +2524,13 @@ int
 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
 	ret = i915_gem_idle(dev);
 	drm_irq_uninstall(dev);
 
+	io_reserve_free(dev_priv->mm.io_reserve);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ce866ac..de8e084 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -784,8 +784,8 @@ int i915_vblank_swap(struct drm_device *dev, void *data,
 	if (dev_priv->swaps_pending >= 10) {
 		DRM_DEBUG("Too many swaps queued\n");
 		DRM_DEBUG(" pipe 0: %d pipe 1: %d\n",
-			  drm_vblank_count(dev, 0);
-			  drm_vblank_count(dev, 1);
+			  drm_vblank_count(dev, 0),
+			  drm_vblank_count(dev, 1));
 
 		list_for_each(list, &dev_priv->vbl_swaps.head) {
 			vbl_old = list_entry(list, drm_i915_vbl_swap_t, head);
diff --git a/drivers/gpu/drm/i915/io_reserve.h b/drivers/gpu/drm/i915/io_reserve.h
new file mode 100644
index 0000000..4e90a36
--- /dev/null
+++ b/drivers/gpu/drm/i915/io_reserve.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Packard <keithp@...thp.com>
+ *
+ */
+/* x86_64 style */
+
+#ifndef _IO_RESERVE_H_
+#define _IO_RESERVE_H_
+
+/* this struct isn't actually defined anywhere */
+struct io_reserve;
+
+#ifdef CONFIG_X86_64
+
+/* Create the io_reserve object*/
+static inline struct io_reserve *
+io_reserve_create_wc(unsigned long base, unsigned long size)
+{
+	return (struct io_reserve *) ioremap_wc(base, size);
+}
+
+static inline void
+io_reserve_free(struct io_reserve *reserve)
+{
+	iounmap(reserve);
+}
+
+/* Atomic map/unmap */
+static inline void *
+io_reserve_map_atomic_wc(struct io_reserve *reserve, unsigned long offset)
+{
+	return ((char *) reserve) + offset;
+}
+
+static inline void
+io_reserve_unmap_atomic(void *vaddr)
+{
+}
+
+/* Non-atomic map/unmap */
+static inline void *
+io_reserve_map_wc(struct io_reserve *reserve, unsigned long offset)
+{
+	return ((char *) reserve) + offset;
+}
+
+static inline void
+io_reserve_unmap(void *vaddr)
+{
+}
+
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_32
+static inline struct io_reserve *
+io_reserve_create_wc(unsigned long base, unsigned long size)
+{
+	return (struct io_reserve *) base;
+}
+
+static inline void
+io_reserve_free(struct io_reserve *reserve)
+{
+}
+
+/* Atomic map/unmap */
+static inline void *
+io_reserve_map_atomic_wc(struct io_reserve *reserve, unsigned long offset)
+{
+#ifdef CONFIG_HIGHMEM
+	offset += (unsigned long) reserve;
+	return kmap_atomic_pfn(offset >> PAGE_SHIFT, KM_USER0);
+#else
+	return NULL;
+#endif
+}
+
+static inline void
+io_reserve_unmap_atomic(void *vaddr)
+{
+#ifdef CONFIG_HIGHMEM
+	kunmap_atomic(vaddr, KM_USER0);
+#endif
+}
+
+static inline void *
+io_reserve_map_wc(struct io_reserve *reserve, unsigned long offset)
+{
+	offset += (unsigned long) reserve;
+	return ioremap_wc(offset, PAGE_SIZE);
+}
+
+static inline void
+io_reserve_unmap(void *vaddr)
+{
+	iounmap(vaddr);
+}
+#endif /* CONFIG_X86_32 */
+
+#endif /* _IO_RESERVE_H_ */
-- 
1.5.6.5
-- 
keith.packard@...el.com
Download attachment "signature.asc" of type "application/pgp-signature" (190 bytes)
Powered by blists - more mailing lists
 
