linux-kernel - [RFC] x86: gup_fast() batch limit (was: DRM lock ordering fix series)

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <1238242929.4039.706.camel@laptop>
Date:	Sat, 28 Mar 2009 13:22:09 +0100
From:	Peter Zijlstra <peterz@...radead.org>
To:	Brice Goglin <Brice.Goglin@...ia.fr>
Cc:	Eric Anholt <eric@...olt.net>, Andi Kleen <andi@...stfloor.org>,
	linux-kernel@...r.kernel.org, dri-devel@...ts.sourceforge.net,
	Nick Piggin <nickpiggin@...oo.com.au>
Subject: [RFC] x86: gup_fast() batch limit (was: DRM lock ordering fix
 series)

On Sat, 2009-03-28 at 11:48 +0100, Peter Zijlstra wrote:
> On Sat, 2009-03-28 at 09:46 +0100, Brice Goglin wrote:
> > Peter Zijlstra wrote:
> > > Also note that doing large gup() with gup_fast() will be undesirable due
> > > to it disabling IRQs. So iterating say several MB worth of pages will
> > > hurt like crazy. Currently all gup_fast() users do a single page lookup.
> > >   
> > 
> > In 2.6.29, fs/bio.c:955, fs/direct-io.c:153 and fs/splice.c:1222 do
> > multiple-pages lookup at once. The latter might be limited to 16 pages
> > because of the pipe-depth, I don't know about the formers.
> > 
> > Is there some sort of reasonable limit? A couple dozens pages at once maybe?
> 
> Depends on your latency requirements, looking at the code I'd say we'd
> have to add that batch limit the comment talks about. I'd see preempt-rt
> wanting to lower that significantly.
> 
> Regular mainline could do with 32-64 I guess, max irq latency is well
> over 10ms on mainline anyway.

I'm not really trusting my brain today, but something like the below
should work I think.

Nick, any thoughts?

Not-Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
---
 arch/x86/mm/gup.c |   24 +++++++++++++++++++++---
 1 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index be54176..4ded5c3 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -11,6 +11,8 @@
 
 #include <asm/pgtable.h>
 
+#define GUP_BATCH	32
+
 static inline pte_t gup_get_pte(pte_t *ptep)
 {
 #ifndef CONFIG_X86_PAE
@@ -91,7 +93,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		get_page(page);
 		pages[*nr] = page;
 		(*nr)++;
-
+		if (*nr > GUP_BATCH)
+			break;
 	} while (ptep++, addr += PAGE_SIZE, addr != end);
 	pte_unmap(ptep - 1);
 
@@ -157,6 +160,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			if (!gup_pte_range(pmd, addr, next, write, pages, nr))
 				return 0;
 		}
+		if (*nr > GUP_BATCH)
+			break;
 	} while (pmdp++, addr = next, addr != end);
 
 	return 1;
@@ -214,6 +219,8 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 			if (!gup_pmd_range(pud, addr, next, write, pages, nr))
 				return 0;
 		}
+		if (*nr > GUP_BATCH)
+			break;
 	} while (pudp++, addr = next, addr != end);
 
 	return 1;
@@ -226,7 +233,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	unsigned long addr, len, end;
 	unsigned long next;
 	pgd_t *pgdp;
-	int nr = 0;
+	int batch = 0, nr = 0;
 
 	start &= PAGE_MASK;
 	addr = start;
@@ -254,6 +261,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	 * (which we do on x86, with the above PAE exception), we can follow the
 	 * address down to the the page and take a ref on it.
 	 */
+again:
 	local_irq_disable();
 	pgdp = pgd_offset(mm, addr);
 	do {
@@ -262,11 +270,21 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none(pgd))
 			goto slow;
-		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+		if (!gup_pud_range(pgd, addr, next, write, pages, &batch))
 			goto slow;
+		if (batch > GUP_BATCH) {
+			local_irq_enable();
+			addr += batch << PAGE_SHIFT;
+			nr += batch;
+			batch = 0;
+			if (addr != end)
+				goto again;
+		}
 	} while (pgdp++, addr = next, addr != end);
 	local_irq_enable();
 
+	nr += batch;
+
 	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
 	return nr;
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/