From: ndesaulniers@google.com ndesaulniers@google.com
commit 1ad3935b39da78a403e7df7a3813f866c731bc64 upstream.
Clang warns: vector initializers are not compatible with NEON intrinsics in big endian mode [-Wnonportable-vector-initialization]
While this is usually the case, it's not an issue for this case since we're initializing the uint8x16_t (16x uint8_t's) with the same value.
Instead, use vdupq_n_u8 which both compilers lower into a single movi instruction: https://godbolt.org/z/vBrgzt
This avoids the static storage for a constant value.
Link: https://github.com/ClangBuiltLinux/linux/issues/214 Suggested-by: Nathan Chancellor natechancellor@gmail.com Reviewed-by: Ard Biesheuvel ard.biesheuvel@linaro.org Signed-off-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
--- lib/raid6/neon.uc | 5 ++--- lib/raid6/recov_neon_inner.c | 7 ++----- 2 files changed, 4 insertions(+), 8 deletions(-)
--- a/lib/raid6/neon.uc +++ b/lib/raid6/neon.uc @@ -28,7 +28,6 @@
typedef uint8x16_t unative_t;
-#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) #define NSIZE sizeof(unative_t)
/* @@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int int d, z, z0;
register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d);
z0 = disks - 3; /* Highest data disk */ p = dptr[z0+1]; /* XOR parity */ @@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int int d, z, z0;
register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d);
z0 = stop; /* P/Q right side optimization */ p = dptr[disks-2]; /* XOR parity */ --- a/lib/raid6/recov_neon_inner.c +++ b/lib/raid6/recov_neon_inner.c @@ -10,11 +10,6 @@
#include <arm_neon.h>
-static const uint8x16_t x0f = { - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -}; - #ifdef CONFIG_ARM /* * AArch32 does not provide this intrinsic natively because it does not @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8x16_t pm1 = vld1q_u8(pbmul + 16); uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f);
/* * while ( bytes-- ) { @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, { uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f);
/* * while (bytes--) {