lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 19 May 2016 18:27:52 +0100
From:	Chris Wilson <chris@...is-wilson.co.uk>
To:	Dave Gordon <david.s.gordon@...el.com>
Cc:	Tvrtko Ursulin <tvrtko.ursulin@...ux.intel.com>,
	linux-kernel@...r.kernel.org
Subject: Re: [Intel-gfx] [PATCH 3/3] Introduce & use new lightweight SGL
 iterators

On Tue, May 17, 2016 at 01:05:48PM +0100, Dave Gordon wrote:
> On 17/05/16 11:34, Tvrtko Ursulin wrote:
> >
> >On 16/05/16 16:19, Dave Gordon wrote:
> >>The existing for_each_sg_page() iterator is somewhat heavyweight, and is
> >>limiting i915 driver performance in a few benchmarks. So here we
> >>introduce somewhat lighter weight iterators, primarily for use with GEM
> >>objects or other case where we need only deal with whole aligned pages.
> >
> >Interesting idea, if for nothing then for eliminating the dreaded
> >st->nents of for_each_sg_page. :)
> >
> >Which benchmarks it improves and how much do you know?
> 
> I know nothing :)
> 
> But last time I posted some easy-to-use iterators, Chris Wilson said
> they didn't address his complaint, which was that the existing ones
> were too slow.

These aren't very good either... Compared to the sg iters I have:

         gem:exec:fault:1MiB:    -4.32%
  gem:exec:fault:1MiB:forked:    -5.66%
        gem:exec:fault:16MiB:   -13.33%
 gem:exec:fault:16MiB:forked:   -12.03%
       gem:exec:fault:256MiB:   -15.28%
gem:exec:fault:256MiB:forked:   -16.98%

(I was really hoping to be able to drop a patch!)

Patch used for reference:

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 03b7c2e..d7c1431 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3787,4 +3787,56 @@ int remap_io_mapping(struct vm_area_struct *vma,
 #define i915_gem_object_for_each_vma(vma, obj) \
        list_for_each_entry_check(vma, &(obj)->vma_list, obj_link, &(obj)->base.dev->struct_mutex)
 
+struct sgt_iter {
+       struct scatterlist *sgp;
+       union {
+               unsigned long pfn;
+               unsigned long dma;
+       } ix;
+       unsigned int curr;
+       unsigned int max;
+};
+
+static inline struct sgt_iter
+__sgt_iter(struct scatterlist *sgl, bool dma)
+{
+       struct sgt_iter s = { .sgp = sgl };
+
+       if (sgl) {
+               s.max = s.curr = sgl->offset;
+               s.max += sgl->length;
+               if (dma)
+                       s.ix.dma = sg_dma_address(sgl);
+               else
+                       s.ix.pfn = page_to_pfn(sg_page(sgl));
+       }
+
+       return s;
+}
+
+/**
+ * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
+ * @__dmap:    DMA address (output)
+ * @__iter:    'struct sgt_iter' (iterator state, internal)
+ * @__sgt:     sg_table to iterate over (input)
+ */
+#define for_each_sgt_dma(__dmap, __iter, __sgt)                                \
+       for ((__iter) = __sgt_iter((__sgt)->sgl, true);                 \
+            ((__dmap) = (__iter).ix.dma + (__iter).curr);              \
+            (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
+            ((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
+
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @__pp:      page pointer (output)
+ * @__iter:    'struct sgt_iter' (iterator state, internal)
+ * @__sgt:     sg_table to iterate over (input)
+ */
+#define for_each_sgt_page(__pp, __iter, __sgt)                         \
+       for ((__iter) = __sgt_iter((__sgt)->sgl, false);                \
+            ((__pp) = (__iter).ix.pfn == 0 ? NULL :                    \
+             pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\
+            (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
+            ((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 603895a..3fcb540 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1571,18 +1571,19 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
        unsigned act_pt = first_entry / GEN6_PTES;
        unsigned act_pte = first_entry % GEN6_PTES - 1;
        u32 pte_encode = vm->pte_encode(0, cache_level, true, flags);
-       struct st_iter iter;
+       struct sgt_iter iter;
        gen6_pte_t *pt_vaddr;
+       dma_addr_t addr;
 
        pt_vaddr = kmap_px(ppgtt, &ppgtt->pd.page_table[act_pt]);
-       st_for_each_address(&iter, pages) {
+       for_each_sgt_dma(addr, iter, pages) {
                if (++act_pte == GEN6_PTES) {
                        kunmap_px(pt_vaddr);
                        pt_vaddr = kmap_px(ppgtt,
                                           &ppgtt->pd.page_table[++act_pt]);
                        act_pte = 0;
                }
-               pt_vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
+               pt_vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(addr);
        }
        kunmap_px(pt_vaddr);
 }

-- 
Chris Wilson, Intel Open Source Technology Centre

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ