linux-kernel - [PATCH 4.19 16/64] lib/raid6: use vdupq_n

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20200422095015.766450493@linuxfoundation.org>
Date:   Wed, 22 Apr 2020 11:57:00 +0200
From:   Greg Kroah-Hartman <gregkh@...uxfoundation.org>
To:     linux-kernel@...r.kernel.org
Cc:     Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        stable@...r.kernel.org,
        Nathan Chancellor <natechancellor@...il.com>,
        Ard Biesheuvel <ard.biesheuvel@...aro.org>,
        Nick Desaulniers <ndesaulniers@...gle.com>,
        Catalin Marinas <catalin.marinas@....com>
Subject: [PATCH 4.19 16/64] lib/raid6: use vdupq_n_u8 to avoid endianness warnings

From: ndesaulniers@...gle.com <ndesaulniers@...gle.com>

commit 1ad3935b39da78a403e7df7a3813f866c731bc64 upstream.

Clang warns: vector initializers are not compatible with NEON intrinsics
in big endian mode [-Wnonportable-vector-initialization]

While this is usually the case, it's not an issue for this case since
we're initializing the uint8x16_t (16x uint8_t's) with the same value.

Instead, use vdupq_n_u8 which both compilers lower into a single movi
instruction: https://godbolt.org/z/vBrgzt

This avoids the static storage for a constant value.

Link: https://github.com/ClangBuiltLinux/linux/issues/214
Suggested-by: Nathan Chancellor <natechancellor@...il.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@...aro.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@...gle.com>
Signed-off-by: Catalin Marinas <catalin.marinas@....com>
Signed-off-by: Greg Kroah-Hartman <gregkh@...uxfoundation.org>

---
 lib/raid6/neon.uc            |    5 ++---
 lib/raid6/recov_neon_inner.c |    7 ++-----
 2 files changed, 4 insertions(+), 8 deletions(-)

--- a/lib/raid6/neon.uc
+++ b/lib/raid6/neon.uc
@@ -28,7 +28,6 @@
 
 typedef uint8x16_t unative_t;
 
-#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
 #define NSIZE	sizeof(unative_t)
 
 /*
@@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int
 	int d, z, z0;
 
 	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
-	const unative_t x1d = NBYTES(0x1d);
+	const unative_t x1d = vdupq_n_u8(0x1d);
 
 	z0 = disks - 3;		/* Highest data disk */
 	p = dptr[z0+1];		/* XOR parity */
@@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int
 	int d, z, z0;
 
 	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
-	const unative_t x1d = NBYTES(0x1d);
+	const unative_t x1d = vdupq_n_u8(0x1d);
 
 	z0 = stop;		/* P/Q right side optimization */
 	p = dptr[disks-2];	/* XOR parity */
--- a/lib/raid6/recov_neon_inner.c
+++ b/lib/raid6/recov_neon_inner.c
@@ -10,11 +10,6 @@
 
 #include <arm_neon.h>
 
-static const uint8x16_t x0f = {
-	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-};
-
 #ifdef CONFIG_ARM
 /*
  * AArch32 does not provide this intrinsic natively because it does not
@@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes,
 	uint8x16_t pm1 = vld1q_u8(pbmul + 16);
 	uint8x16_t qm0 = vld1q_u8(qmul);
 	uint8x16_t qm1 = vld1q_u8(qmul + 16);
+	uint8x16_t x0f = vdupq_n_u8(0x0f);
 
 	/*
 	 * while ( bytes-- ) {
@@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes,
 {
 	uint8x16_t qm0 = vld1q_u8(qmul);
 	uint8x16_t qm1 = vld1q_u8(qmul + 16);
+	uint8x16_t x0f = vdupq_n_u8(0x0f);
 
 	/*
 	 * while (bytes--) {