[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231122092855.4440-4-shijie@os.amperecomputing.com>
Date: Wed, 22 Nov 2023 17:28:54 +0800
From: Huang Shijie <shijie@...amperecomputing.com>
To: catalin.marinas@....com
Cc: will@...nel.org, mark.rutland@....com, suzuki.poulose@....com,
broonie@...nel.org, linux-arm-kernel@...ts.infradead.org,
linux-kernel@...r.kernel.org, anshuman.khandual@....com,
robh@...nel.org, oliver.upton@...ux.dev, maz@...nel.org,
patches@...erecomputing.com,
Huang Shijie <shijie@...amperecomputing.com>
Subject: [PATCH 3/4] arm64: copy_template.S: add loop_for_copy_128_bytes macro
Add the loop_for_copy_128_bytes macro, to make the code clean.
And make preparation for the next patch.
Signed-off-by: Huang Shijie <shijie@...amperecomputing.com>
---
arch/arm64/lib/copy_template.S | 58 ++++++++++++++++++----------------
1 file changed, 31 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
index 488df234c49a..79b32569260c 100644
--- a/arch/arm64/lib/copy_template.S
+++ b/arch/arm64/lib/copy_template.S
@@ -10,6 +10,36 @@
* files/head:/src/aarch64/
*/
+.macro loop_for_copy_128_bytes extra_ops
+ /* pre-get 64 bytes data. */
+ ldp1 A_l, A_h, src, #16
+ ldp1 B_l, B_h, src, #16
+ ldp1 C_l, C_h, src, #16
+ ldp1 D_l, D_h, src, #16
+1:
+ \extra_ops
+ /*
+ * interlace the load of next 64 bytes data block with store of the last
+ * loaded 64 bytes data.
+ */
+ stp1 A_l, A_h, dst, #16
+ ldp1 A_l, A_h, src, #16
+ stp1 B_l, B_h, dst, #16
+ ldp1 B_l, B_h, src, #16
+ stp1 C_l, C_h, dst, #16
+ ldp1 C_l, C_h, src, #16
+ stp1 D_l, D_h, dst, #16
+ ldp1 D_l, D_h, src, #16
+ subs count, count, #64
+ b.ge 1b
+ stp1 A_l, A_h, dst, #16
+ stp1 B_l, B_h, dst, #16
+ stp1 C_l, C_h, dst, #16
+ stp1 D_l, D_h, dst, #16
+
+ tst count, #0x3f
+ b.ne .Ltail63
+.endm
/*
* Copy a buffer from src to dest (alignment handled by the hardware)
@@ -151,31 +181,5 @@ D_h .req x14
*/
.p2align L1_CACHE_SHIFT
.Lcpy_body_large:
- /* pre-get 64 bytes data. */
- ldp1 A_l, A_h, src, #16
- ldp1 B_l, B_h, src, #16
- ldp1 C_l, C_h, src, #16
- ldp1 D_l, D_h, src, #16
-1:
- /*
- * interlace the load of next 64 bytes data block with store of the last
- * loaded 64 bytes data.
- */
- stp1 A_l, A_h, dst, #16
- ldp1 A_l, A_h, src, #16
- stp1 B_l, B_h, dst, #16
- ldp1 B_l, B_h, src, #16
- stp1 C_l, C_h, dst, #16
- ldp1 C_l, C_h, src, #16
- stp1 D_l, D_h, dst, #16
- ldp1 D_l, D_h, src, #16
- subs count, count, #64
- b.ge 1b
- stp1 A_l, A_h, dst, #16
- stp1 B_l, B_h, dst, #16
- stp1 C_l, C_h, dst, #16
- stp1 D_l, D_h, dst, #16
-
- tst count, #0x3f
- b.ne .Ltail63
+ loop_for_copy_128_bytes
.Lexitfunc:
--
2.40.1
Powered by blists - more mailing lists