[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1-v3-1893cd8b9369+1925-mlx5_arm_wc_jgg@nvidia.com>
Date: Thu, 11 Apr 2024 13:46:14 -0300
From: Jason Gunthorpe <jgg@...dia.com>
To: Alexander Gordeev <agordeev@...ux.ibm.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Gerald Schaefer <gerald.schaefer@...ux.ibm.com>,
Vasily Gorbik <gor@...ux.ibm.com>,
Heiko Carstens <hca@...ux.ibm.com>,
"H. Peter Anvin" <hpa@...or.com>,
Justin Stitt <justinstitt@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Leon Romanovsky <leon@...nel.org>,
linux-rdma@...r.kernel.org,
linux-s390@...r.kernel.org,
llvm@...ts.linux.dev,
Ingo Molnar <mingo@...hat.com>,
Bill Wendling <morbo@...gle.com>,
Nathan Chancellor <nathan@...nel.org>,
Nick Desaulniers <ndesaulniers@...gle.com>,
netdev@...r.kernel.org,
Paolo Abeni <pabeni@...hat.com>,
Salil Mehta <salil.mehta@...wei.com>,
Sven Schnelle <svens@...ux.ibm.com>,
Thomas Gleixner <tglx@...utronix.de>,
x86@...nel.org,
Yisen Zhuang <yisen.zhuang@...wei.com>
Cc: Arnd Bergmann <arnd@...db.de>,
Catalin Marinas <catalin.marinas@....com>,
Leon Romanovsky <leonro@...lanox.com>,
linux-arch@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org,
Mark Rutland <mark.rutland@....com>,
Michael Guralnik <michaelgur@...lanox.com>,
patches@...ts.linux.dev,
Niklas Schnelle <schnelle@...ux.ibm.com>,
Jijie Shao <shaojijie@...wei.com>,
Will Deacon <will@...nel.org>
Subject: [PATCH v3 1/6] x86: Stop using weak symbols for __iowrite32_copy()
Start switching iomap_copy routines over to use #define and arch provided
inline/macro functions instead of weak symbols.
Inline functions allow more compiler optimization and this is often a
driver hot path.
x86 has the only weak implementation for __iowrite32_copy(), so replace it
with a static inline containing the same single instruction inline
assembly. The compiler will generate the "mov edx,ecx" in a more optimal
way.
Remove iomap_copy_64.S
Signed-off-by: Jason Gunthorpe <jgg@...dia.com>
---
arch/x86/include/asm/io.h | 17 +++++++++++++++++
arch/x86/lib/Makefile | 1 -
arch/x86/lib/iomap_copy_64.S | 15 ---------------
include/linux/io.h | 5 ++++-
lib/iomap_copy.c | 6 +++---
5 files changed, 24 insertions(+), 20 deletions(-)
delete mode 100644 arch/x86/lib/iomap_copy_64.S
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 294cd2a4081812..4b99ed326b1748 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -209,6 +209,23 @@ void memset_io(volatile void __iomem *, int, size_t);
#define memcpy_toio memcpy_toio
#define memset_io memset_io
+#ifdef CONFIG_X86_64
+/*
+ * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for
+ * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy
+ * loop.
+ */
+static inline void __iowrite32_copy(void __iomem *to, const void *from,
+ size_t count)
+{
+ asm volatile("rep ; movsl"
+ : "=&c"(count), "=&D"(to), "=&S"(from)
+ : "0"(count), "1"(to), "2"(from)
+ : "memory");
+}
+#define __iowrite32_copy __iowrite32_copy
+#endif
+
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
* explicitly ioremap() it. The fact that the ISA IO space is mapped
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 6da73513f02668..98583a9dbab337 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -53,7 +53,6 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y)
lib-y += atomic64_386_32.o
endif
else
- obj-y += iomap_copy_64.o
ifneq ($(CONFIG_GENERIC_CSUM),y)
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
endif
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
deleted file mode 100644
index 6ff2f56cb0f71a..00000000000000
--- a/arch/x86/lib/iomap_copy_64.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2006 PathScale, Inc. All Rights Reserved.
- */
-
-#include <linux/linkage.h>
-
-/*
- * override generic version in lib/iomap_copy.c
- */
-SYM_FUNC_START(__iowrite32_copy)
- movl %edx,%ecx
- rep movsl
- RET
-SYM_FUNC_END(__iowrite32_copy)
diff --git a/include/linux/io.h b/include/linux/io.h
index 235ba7d80a8f0d..ce86120ce9d526 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -16,7 +16,10 @@
struct device;
struct resource;
-__visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
+#ifndef __iowrite32_copy
+void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
+#endif
+
void __ioread32_copy(void *to, const void __iomem *from, size_t count);
void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c
index 5de7c04e05ef56..8ddcbb53507dfe 100644
--- a/lib/iomap_copy.c
+++ b/lib/iomap_copy.c
@@ -16,9 +16,8 @@
* time. Order of access is not guaranteed, nor is a memory barrier
* performed afterwards.
*/
-void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
- const void *from,
- size_t count)
+#ifndef __iowrite32_copy
+void __iowrite32_copy(void __iomem *to, const void *from, size_t count)
{
u32 __iomem *dst = to;
const u32 *src = from;
@@ -28,6 +27,7 @@ void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
__raw_writel(*src++, dst++);
}
EXPORT_SYMBOL_GPL(__iowrite32_copy);
+#endif
/**
* __ioread32_copy - copy data from MMIO space, in 32-bit units
--
2.43.2
Powered by blists - more mailing lists