[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181005151203.17473.25723.stgit@localhost.localdomain>
Date: Fri, 05 Oct 2018 08:12:03 -0700
From: Alexander Duyck <alexander.h.duyck@...ux.intel.com>
To: linux-mm@...ck.org, akpm@...ux-foundation.org
Cc: pavel.tatashin@...rosoft.com, mhocko@...e.com,
dave.jiang@...el.com, alexander.h.duyck@...ux.intel.com,
linux-kernel@...r.kernel.org, willy@...radead.org,
davem@...emloft.net, khalid.aziz@...cle.com,
rppt@...ux.vnet.ibm.com, vbabka@...e.cz,
sparclinux@...r.kernel.org, dan.j.williams@...el.com,
ldufour@...ux.vnet.ibm.com, mgorman@...hsingularity.net,
mingo@...nel.org, kirill.shutemov@...ux.intel.com
Subject: [mm PATCH 1/5] mm: Use mm_zero_struct_page from SPARC on all 64b
architectures
This change makes it so that we use the same approach that was already in
use on Sparc on all the archtectures that support a 64b long.
This is mostly motivated by the fact that 8 to 10 store/move instructions
are likely always going to be faster than having to call into a function
that is not specialized for handling page init.
An added advantage to doing it this way is that the compiler can get away
with combining writes in the __init_single_page call. As a result the
memset call will be reduced to only about 4 write operations, or at least
that is what I am seeing with GCC 6.2 as the flags, LRU poitners, and
count/mapcount seem to be cancelling out at least 4 of the 8 assignments on
my system.
Signed-off-by: Alexander Duyck <alexander.h.duyck@...ux.intel.com>
---
arch/sparc/include/asm/pgtable_64.h | 30 ------------------------------
include/linux/mm.h | 33 +++++++++++++++++++++++++++++++++
2 files changed, 33 insertions(+), 30 deletions(-)
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 1393a8ac596b..22500c3be7a9 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -231,36 +231,6 @@
extern struct page *mem_map_zero;
#define ZERO_PAGE(vaddr) (mem_map_zero)
-/* This macro must be updated when the size of struct page grows above 80
- * or reduces below 64.
- * The idea that compiler optimizes out switch() statement, and only
- * leaves clrx instructions
- */
-#define mm_zero_struct_page(pp) do { \
- unsigned long *_pp = (void *)(pp); \
- \
- /* Check that struct page is either 64, 72, or 80 bytes */ \
- BUILD_BUG_ON(sizeof(struct page) & 7); \
- BUILD_BUG_ON(sizeof(struct page) < 64); \
- BUILD_BUG_ON(sizeof(struct page) > 80); \
- \
- switch (sizeof(struct page)) { \
- case 80: \
- _pp[9] = 0; /* fallthrough */ \
- case 72: \
- _pp[8] = 0; /* fallthrough */ \
- default: \
- _pp[7] = 0; \
- _pp[6] = 0; \
- _pp[5] = 0; \
- _pp[4] = 0; \
- _pp[3] = 0; \
- _pp[2] = 0; \
- _pp[1] = 0; \
- _pp[0] = 0; \
- } \
-} while (0)
-
/* PFNs are real physical page numbers. However, mem_map only begins to record
* per-page information starting at pfn_base. This is to handle systems where
* the first physical page in the machine is at some huge physical address,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 273d4dbd3883..a0440a6ef5cf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -102,8 +102,41 @@ static inline void set_max_mapnr(unsigned long limit) { }
* zeroing by defining this macro in <asm/pgtable.h>.
*/
#ifndef mm_zero_struct_page
+#if BITS_PER_LONG == 64
+/* This function must be updated when the size of struct page grows above 80
+ * or reduces below 64. The idea that compiler optimizes out switch()
+ * statement, and only leaves move/store instructions
+ */
+#define mm_zero_struct_page(pp) __mm_zero_struct_page(pp)
+static inline void __mm_zero_struct_page(struct page *page)
+{
+ unsigned long *_pp = (void *)page;
+
+ /* Check that struct page is either 64, 72, or 80 bytes */
+ BUILD_BUG_ON(sizeof(struct page) & 7);
+ BUILD_BUG_ON(sizeof(struct page) < 64);
+ BUILD_BUG_ON(sizeof(struct page) > 80);
+
+ switch (sizeof(struct page)) {
+ case 80:
+ _pp[9] = 0; /* fallthrough */
+ case 72:
+ _pp[8] = 0; /* fallthrough */
+ default:
+ _pp[7] = 0;
+ _pp[6] = 0;
+ _pp[5] = 0;
+ _pp[4] = 0;
+ _pp[3] = 0;
+ _pp[2] = 0;
+ _pp[1] = 0;
+ _pp[0] = 0;
+ }
+}
+#else
#define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page)))
#endif
+#endif
/*
* Default maximum number of active map areas, this limits the number of vmas
Powered by blists - more mailing lists