lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sat, 18 Dec 2021 01:37:42 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Nikita Yushchenko <nikita.yushchenko@...tuozzo.com>
Cc:     Will Deacon <will@...nel.org>,
        "Aneesh Kumar K.V" <aneesh.kumar@...ux.ibm.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Nick Piggin <npiggin@...il.com>,
        Catalin Marinas <catalin.marinas@....com>,
        Heiko Carstens <hca@...ux.ibm.com>,
        Vasily Gorbik <gor@...ux.ibm.com>,
        Christian Borntraeger <borntraeger@...ux.ibm.com>,
        "David S. Miller" <davem@...emloft.net>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Arnd Bergmann <arnd@...db.de>, x86@...nel.org,
        linux-kernel@...r.kernel.org, linux-arch@...r.kernel.org,
        linux-mm@...ck.org, linuxppc-dev@...ts.ozlabs.org,
        linux-s390@...r.kernel.org, sparclinux@...r.kernel.org,
        kernel@...nvz.org
Subject: Re: [PATCH/RFC] mm: add and use batched version of
 __tlb_remove_table()

On Fri, Dec 17, 2021 at 11:19:10AM +0300, Nikita Yushchenko wrote:
> When batched page table freeing via struct mmu_table_batch is used, the
> final freeing in __tlb_remove_table_free() executes a loop, calling
> arch hook __tlb_remove_table() to free each table individually.
> 
> Shift that loop down to archs. This allows archs to optimize it, by
> freeing multiple tables in a single release_pages() call. This is
> faster than individual put_page() calls, especially with memcg
> accounting enabled.
> 
> Signed-off-by: Andrey Ryabinin <aryabinin@...tuozzo.com>
> Signed-off-by: Nikita Yushchenko <nikita.yushchenko@...tuozzo.com>
> ---
>  arch/arm/include/asm/tlb.h                   |  5 ++++
>  arch/arm64/include/asm/tlb.h                 |  5 ++++
>  arch/powerpc/include/asm/book3s/32/pgalloc.h |  8 +++++++
>  arch/powerpc/include/asm/book3s/64/pgalloc.h |  1 +
>  arch/powerpc/include/asm/nohash/pgalloc.h    |  8 +++++++
>  arch/powerpc/mm/book3s64/pgtable.c           |  8 +++++++
>  arch/s390/include/asm/tlb.h                  |  1 +
>  arch/s390/mm/pgalloc.c                       |  8 +++++++
>  arch/sparc/include/asm/pgalloc_64.h          |  8 +++++++
>  arch/x86/include/asm/tlb.h                   |  5 ++++
>  include/asm-generic/tlb.h                    |  2 +-
>  include/linux/swap.h                         |  5 +++-
>  mm/mmu_gather.c                              |  6 +----
>  mm/swap_state.c                              | 24 +++++++++++++++-----
>  14 files changed, 81 insertions(+), 13 deletions(-)

Oh gawd, that's terrible. Never, ever duplicate code like that.

I'm thinking the below does the same? But yes, please do as Dave said,
give us actual numbers that show this is worth it.

---
 arch/Kconfig                 |  4 ++++
 arch/arm/Kconfig             |  1 +
 arch/arm/include/asm/tlb.h   |  5 -----
 arch/arm64/Kconfig           |  1 +
 arch/arm64/include/asm/tlb.h |  5 -----
 arch/x86/Kconfig             |  1 +
 arch/x86/include/asm/tlb.h   |  4 ----
 mm/mmu_gather.c              | 22 +++++++++++++++++++---
 8 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 26b8ed11639d..f2bd3f5af2b1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -415,6 +415,10 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
 config MMU_GATHER_TABLE_FREE
 	bool
 
+config MMU_GATHER_TABLE_PAGE
+	bool
+	depends on MMU_GATHER_TABLE_FREE
+
 config MMU_GATHER_RCU_TABLE_FREE
 	bool
 	select MMU_GATHER_TABLE_FREE
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f0f9e8bec83a..11baaa5719c2 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -110,6 +110,7 @@ config ARM
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
+	select MMU_GATHER_TABLE_PAGE if MMU
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RSEQ
 	select HAVE_STACKPROTECTOR
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index b8cbe03ad260..9d9b21649ca0 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -29,11 +29,6 @@
 #include <linux/swap.h>
 #include <asm/tlbflush.h>
 
-static inline void __tlb_remove_table(void *_table)
-{
-	free_page_and_swap_cache((struct page *)_table);
-}
-
 #include <asm-generic/tlb.h>
 
 static inline void
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c4207cf9bb17..4aa28fb03f4f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -196,6 +196,7 @@ config ARM64
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select MMU_GATHER_RCU_TABLE_FREE
+	select MMU_GATHER_TABLE_PAGE
 	select HAVE_RSEQ
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c995d1f4594f..401826260a5c 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -11,11 +11,6 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 
-static inline void __tlb_remove_table(void *_table)
-{
-	free_page_and_swap_cache((struct page *)_table);
-}
-
 #define tlb_flush tlb_flush
 static void tlb_flush(struct mmu_gather *tlb);
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b9281fab4e3e..a22e653f4d0e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -235,6 +235,7 @@ config X86
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select MMU_GATHER_RCU_TABLE_FREE		if PARAVIRT
+	select MMU_GATHER_TABLE_PAGE
 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 1bfe979bb9bc..dec5ffa3042a 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -32,9 +32,5 @@ static inline void tlb_flush(struct mmu_gather *tlb)
  * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h
  * for more details.
  */
-static inline void __tlb_remove_table(void *table)
-{
-	free_page_and_swap_cache(table);
-}
 
 #endif /* _ASM_X86_TLB_H */
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 1b9837419bf9..0195d0f13ed3 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -93,13 +93,29 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 
 #ifdef CONFIG_MMU_GATHER_TABLE_FREE
 
-static void __tlb_remove_table_free(struct mmu_table_batch *batch)
+#ifdef CONFIG_MMU_GATHER_TABLE_PAGE
+static inline void __tlb_remove_table(void *table)
+{
+	free_page_and_swap_cache(table);
+}
+
+static inline void __tlb_remove_tables(void **tables, int nr)
+{
+	free_pages_and_swap_cache_nolru((struct page **)tables, nr);
+}
+#else
+static inline void __tlb_remove_tables(void **tables, int nr)
 {
 	int i;
 
-	for (i = 0; i < batch->nr; i++)
-		__tlb_remove_table(batch->tables[i]);
+	for (i = 0; i < nr; i++)
+		__tlb_remove_table(tables[i]);
+}
+#endif
 
+static void __tlb_remove_table_free(struct mmu_table_batch *batch)
+{
+	__tlb_remove_tables(batch->tables, batch->nr);
 	free_page((unsigned long)batch);
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ