[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250506165227.158932-3-ubizjak@gmail.com>
Date: Tue, 6 May 2025 18:52:08 +0200
From: Uros Bizjak <ubizjak@...il.com>
To: x86@...nel.org,
linux-kernel@...r.kernel.org
Cc: Uros Bizjak <ubizjak@...il.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...nel.org>,
Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
"H. Peter Anvin" <hpa@...or.com>
Subject: [PATCH -tip 3/3] x86/asm/32: Modernize _memcpy()
Use inout "+" constraint modifier where appropriate, declare
temporary variables as unsigned long and rewrite parts of assembly
in plain C. The memcpy() function shrinks by 10 bytes, from:
00e778d0 <memcpy>:
e778d0: 55 push %ebp
e778d1: 89 e5 mov %esp,%ebp
e778d3: 83 ec 0c sub $0xc,%esp
e778d6: 89 5d f4 mov %ebx,-0xc(%ebp)
e778d9: 89 c3 mov %eax,%ebx
e778db: 89 c8 mov %ecx,%eax
e778dd: 89 75 f8 mov %esi,-0x8(%ebp)
e778e0: c1 e9 02 shr $0x2,%ecx
e778e3: 89 d6 mov %edx,%esi
e778e5: 89 7d fc mov %edi,-0x4(%ebp)
e778e8: 89 df mov %ebx,%edi
e778ea: f3 a5 rep movsl %ds:(%esi),%es:(%edi)
e778ec: 89 c1 mov %eax,%ecx
e778ee: 83 e1 03 and $0x3,%ecx
e778f1: 74 02 je e778f5 <memcpy+0x25>
e778f3: f3 a4 rep movsb %ds:(%esi),%es:(%edi)
e778f5: 8b 75 f8 mov -0x8(%ebp),%esi
e778f8: 89 d8 mov %ebx,%eax
e778fa: 8b 5d f4 mov -0xc(%ebp),%ebx
e778fd: 8b 7d fc mov -0x4(%ebp),%edi
e77900: 89 ec mov %ebp,%esp
e77902: 5d pop %ebp
e77903: c3 ret
to:
00e778b0 <memcpy>:
e778b0: 55 push %ebp
e778b1: 89 e5 mov %esp,%ebp
e778b3: 83 ec 08 sub $0x8,%esp
e778b6: 89 75 f8 mov %esi,-0x8(%ebp)
e778b9: 89 d6 mov %edx,%esi
e778bb: 89 ca mov %ecx,%edx
e778bd: 89 7d fc mov %edi,-0x4(%ebp)
e778c0: c1 e9 02 shr $0x2,%ecx
e778c3: 89 c7 mov %eax,%edi
e778c5: f3 a5 rep movsl %ds:(%esi),%es:(%edi)
e778c7: 83 e2 03 and $0x3,%edx
e778ca: 74 04 je e778d0 <memcpy+0x20>
e778cc: 89 d1 mov %edx,%ecx
e778ce: f3 a4 rep movsb %ds:(%esi),%es:(%edi)
e778d0: 8b 75 f8 mov -0x8(%ebp),%esi
e778d3: 8b 7d fc mov -0x4(%ebp),%edi
e778d6: 89 ec mov %ebp,%esp
e778d8: 5d pop %ebp
e778d9: c3 ret
due to a better register allocation, avoiding the call-saved
%ebx register.
No functional changes intended.
Signed-off-by: Uros Bizjak <ubizjak@...il.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Borislav Petkov <bp@...en8.de>
Cc: Dave Hansen <dave.hansen@...ux.intel.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
---
arch/x86/include/asm/string_32.h | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 00d497837571..6a4062414495 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -32,16 +32,18 @@ extern size_t strlen(const char *s);
static __always_inline void *__memcpy(void *to, const void *from, size_t n)
{
- int d0, d1, d2;
- asm volatile("rep movsl\n\t"
- "movl %4,%%ecx\n\t"
- "andl $3,%%ecx\n\t"
- "jz 1f\n\t"
- "rep movsb\n\t"
- "1:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
- : "memory");
+ unsigned long esi = (unsigned long)from;
+ unsigned long edi = (unsigned long)to;
+ unsigned long ecx = n >> 2;
+
+ asm volatile("rep movsl"
+ : "+D" (edi), "+S" (esi), "+c" (ecx)
+ : : "memory");
+ ecx = n & 3;
+ if (ecx)
+ asm volatile("rep movsb"
+ : "+D" (edi), "+S" (esi), "+c" (ecx)
+ : : "memory");
return to;
}
--
2.49.0
Powered by blists - more mailing lists