[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20191216151419.GL2871@hirez.programming.kicks-ass.net>
Date: Mon, 16 Dec 2019 16:14:19 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: "Aneesh Kumar K.V" <aneesh.kumar@...ux.ibm.com>
Cc: Michael Ellerman <mpe@...erman.id.au>,
Will Deacon <will@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Nick Piggin <npiggin@...il.com>, linux-arch@...r.kernel.org,
linux-mm@...ck.org, linux-kernel@...r.kernel.org,
Yoshinori Sato <ysato@...rs.sourceforge.jp>,
Rich Felker <dalias@...c.org>,
"David S. Miller" <davem@...emloft.net>,
Helge Deller <deller@....de>,
Geert Uytterhoeven <geert@...ux-m68k.org>,
Paul Burton <paulburton@...nel.org>,
Tony Luck <tony.luck@...el.com>,
Richard Henderson <rth@...ddle.net>,
Nick Hu <nickhu@...estech.com>,
Paul Walmsley <paul.walmsley@...ive.com>
Subject: Re: [PATCH 05/17] asm-generic/tlb: Rename
HAVE_RCU_TABLE_NO_INVALIDATE
On Mon, Dec 16, 2019 at 03:50:41PM +0100, Peter Zijlstra wrote:
> On Mon, Dec 16, 2019 at 07:24:24PM +0530, Aneesh Kumar K.V wrote:
>
> > So __p*_free_tlb() routines on ppc64 just mark that we need a page walk
> > cache flush and the actual flush in done in tlb_flush_mmu.
>
> Not quite, your __p*_free_tlb() goes to pgtable_free_tlb() which call
> tlb_remove_table().
>
> > As per
> >
> > d86564a2f085b79ec046a5cba90188e61235280 (mm/tlb, x86/mm: Support
> > invalidating TLB caches for RCU_TABLE_FREE ) that is not sufficient?
>
> 96bc9567cbe1 ("asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE")
>
> And no. Since you have TABLE_NO_INVALIDATE set, tlb_remove_table() will
> not TLBI when it fails to allocate a batch page, which is an error for
> PPC-Radix.
>
> There is also no TLBI when the batch page is full and the RCU callback
> happens, which is also a bug on PPC-Radix.
It seems to me you need something like this here patch, all you need to
add is a suitable definition of tlb_needs_table_invalidate() for Power.
---
diff --git a/arch/Kconfig b/arch/Kconfig
index c44ef15866a3..98de654b79b3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -400,10 +400,6 @@ config MMU_GATHER_RCU_TABLE_FREE
bool
select MMU_GATHER_TABLE_FREE
-config MMU_GATHER_NO_TABLE_INVALIDATE
- bool
- depends on MMU_GATHER_RCU_TABLE_FREE
-
config MMU_GATHER_PAGE_SIZE
bool
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3dea4c8d39f2..2ddf24822d5b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -223,7 +223,6 @@ config PPC
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if SMP
- select MMU_GATHER_NO_TABLE_INVALIDATE if MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index a76e915ab207..acf20b6c0a54 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -66,7 +66,6 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select MMU_GATHER_RCU_TABLE_FREE if SMP
- select MMU_GATHER_NO_TABLE_INVALIDATE if MMU_GATHER_RCU_TABLE_FREE
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index a2f3fa61ee36..ac8e74a96122 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -28,6 +28,12 @@ void flush_tlb_pending(void);
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending()
+/*
+ * SPARC64's hardware TLB fill does not use the Linux page-tables
+ * and therefore we don't need a TLBI when freeing page-table pages.
+ */
+#define tlb_needs_table_invalidate() (false)
+
#include <asm-generic/tlb.h>
#endif /* _SPARC64_TLB_H */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index fe0ea6ff3636..4108d6d18ca5 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -156,13 +156,6 @@
* Useful if your architecture doesn't use IPIs for remote TLB invalidates
* and therefore doesn't naturally serialize with software page-table walkers.
*
- * MMU_GATHER_NO_TABLE_INVALIDATE
- *
- * This makes MMU_GATHER_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly()
- * before freeing the page-table pages. This can be avoided if you use
- * MMU_GATHER_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
- * page-tables natively.
- *
* MMU_GATHER_NO_RANGE
*
* Use this if your architecture lacks an efficient flush_tlb_range().
@@ -203,6 +196,24 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
+#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
+
+/*
+ * This allows an architecture that does not use the linux page-tables for
+ * hardware to skip the TLBI when freeing page tables.
+ */
+#ifndef tlb_needs_table_invalidate
+#define tlb_needs_table_invalidate() (true)
+#endif
+
+#else
+
+#ifdef tlb_needs_table_invalidate
+#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
+#endif
+
+#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+
#ifndef CONFIG_MMU_GATHER_NO_GATHER
/*
* If we can't allocate a page to make a big batch of page pointers
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 9d103031568d..a3538cb2bcbe 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -177,14 +177,14 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
*/
static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
-#ifndef CONFIG_MMU_GATHER_NO_TABLE_INVALIDATE
- /*
- * Invalidate page-table caches used by hardware walkers. Then we still
- * need to RCU-sched wait while freeing the pages because software
- * walkers can still be in-flight.
- */
- tlb_flush_mmu_tlbonly(tlb);
-#endif
+ if (tlb_needs_table_invalidate()) {
+ /*
+ * Invalidate page-table caches used by hardware walkers. Then
+ * we still need to RCU-sched wait while freeing the pages
+ * because software walkers can still be in-flight.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+ }
}
static void tlb_remove_table_one(void *table)
Powered by blists - more mailing lists