Hi,
This patch series is a collection of clean cherry picks into the 5.4 kernel allowing us to use the Clang integrated assembler to build the ARM 32-bit kernel.
This is useful in order to have proper build and runtime coverage of the stable kernel(s).
Ard Biesheuvel (3): crypto: arm/sha256-neon - avoid ADRL pseudo instruction crypto: arm/sha512-neon - avoid ADRL pseudo instruction crypto: arm - use Kconfig based compiler checks for crypto opcodes
Jian Cai (2): ARM: 8971/1: replace the sole use of a symbol with its definition ARM: 9029/1: Make iwmmxt.S support Clang's integrated assembler
Nick Desaulniers (1): ARM: 8933/1: replace Sun/Solaris style flag on section directive
Stefan Agner (5): ARM: 8989/1: use .fpu assembler directives instead of assembler arguments ARM: 8990/1: use VFP assembler mnemonics in register load/store macros ARM: 8929/1: use APSR_nzcv instead of r15 as mrc operand ARM: OMAP2+: drop unnecessary adrl crypto: arm/ghash-ce - define fpu before fpu registers are referenced
arch/arm/boot/bootp/init.S | 2 +- arch/arm/boot/compressed/big-endian.S | 2 +- arch/arm/boot/compressed/head.S | 4 +- arch/arm/boot/compressed/piggy.S | 2 +- arch/arm/crypto/Kconfig | 14 +++-- arch/arm/crypto/Makefile | 32 ++-------- arch/arm/crypto/crct10dif-ce-core.S | 2 +- arch/arm/crypto/ghash-ce-core.S | 4 +- arch/arm/crypto/sha1-ce-core.S | 1 + arch/arm/crypto/sha2-ce-core.S | 1 + arch/arm/crypto/sha256-armv4.pl | 4 +- arch/arm/crypto/sha256-core.S_shipped | 4 +- arch/arm/crypto/sha512-armv4.pl | 4 +- arch/arm/crypto/sha512-core.S_shipped | 4 +- arch/arm/include/asm/assembler.h | 3 +- arch/arm/include/asm/vfpmacros.h | 19 +++--- arch/arm/kernel/iwmmxt.S | 89 ++++++++++++++------------- arch/arm/kernel/iwmmxt.h | 47 ++++++++++++++ arch/arm/mach-omap2/sleep34xx.S | 2 +- arch/arm/mm/proc-arm1020.S | 2 +- arch/arm/mm/proc-arm1020e.S | 2 +- arch/arm/mm/proc-arm1022.S | 2 +- arch/arm/mm/proc-arm1026.S | 6 +- arch/arm/mm/proc-arm720.S | 2 +- arch/arm/mm/proc-arm740.S | 2 +- arch/arm/mm/proc-arm7tdmi.S | 2 +- arch/arm/mm/proc-arm920.S | 2 +- arch/arm/mm/proc-arm922.S | 2 +- arch/arm/mm/proc-arm925.S | 2 +- arch/arm/mm/proc-arm926.S | 6 +- arch/arm/mm/proc-arm940.S | 2 +- arch/arm/mm/proc-arm946.S | 2 +- arch/arm/mm/proc-arm9tdmi.S | 2 +- arch/arm/mm/proc-fa526.S | 2 +- arch/arm/mm/proc-feroceon.S | 2 +- arch/arm/mm/proc-mohawk.S | 2 +- arch/arm/mm/proc-sa110.S | 2 +- arch/arm/mm/proc-sa1100.S | 2 +- arch/arm/mm/proc-v6.S | 2 +- arch/arm/mm/proc-v7.S | 2 +- arch/arm/mm/proc-v7m.S | 4 +- arch/arm/mm/proc-xsc3.S | 2 +- arch/arm/mm/proc-xscale.S | 2 +- arch/arm/vfp/Makefile | 2 - arch/arm/vfp/vfphw.S | 30 ++++++--- 45 files changed, 187 insertions(+), 143 deletions(-) create mode 100644 arch/arm/kernel/iwmmxt.h
From: Stefan Agner stefan@agner.ch
commit a6c30873ee4a5cc0549c1973668156381ab2c1c4 upstream
Explicit FPU selection has been introduced in commit 1a6be26d5b1a ("[ARM] Enable VFP to be built when non-VFP capable CPUs are selected") to make use of assembler mnemonics for VFP instructions.
However, clang currently does not support passing assembler flags like this and errors out with: clang-10: error: the clang compiler does not support '-Wa,-mfpu=softvfp+vfp'
Make use of the .fpu assembler directives to select the floating point hardware selectively. Also use the new unified assembler language mnemonics. This allows to build these procedures with Clang.
Link: https://github.com/ClangBuiltLinux/linux/issues/762
Signed-off-by: Stefan Agner stefan@agner.ch Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/vfp/Makefile | 2 -- arch/arm/vfp/vfphw.S | 30 ++++++++++++++++++++---------- 2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile index 9975b63ac3b0..749901a72d6d 100644 --- a/arch/arm/vfp/Makefile +++ b/arch/arm/vfp/Makefile @@ -8,6 +8,4 @@ # ccflags-y := -DDEBUG # asflags-y := -DDEBUG
-KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp -mfloat-abi=soft) - obj-y += vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index b530db8f2c6c..772c6a3b1f72 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -253,11 +253,14 @@ vfp_current_hw_state_address:
ENTRY(vfp_get_float) tbl_branch r0, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0 +1: vmov r0, s\dr ret lr .org 1b + 8 -1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1 + .endr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov r0, s\dr ret lr .org 1b + 8 .endr @@ -265,11 +268,14 @@ ENDPROC(vfp_get_float)
ENTRY(vfp_put_float) tbl_branch r1, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0 +1: vmov s\dr, r0 ret lr .org 1b + 8 -1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1 + .endr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov s\dr, r0 ret lr .org 1b + 8 .endr @@ -277,15 +283,17 @@ ENDPROC(vfp_put_float)
ENTRY(vfp_get_double) tbl_branch r0, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: fmrrd r0, r1, d\dr +1: vmov r0, r1, d\dr ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 @ d16 - d31 registers - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr + .fpu vfpv3 + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov r0, r1, d\dr ret lr .org 1b + 8 .endr @@ -299,15 +307,17 @@ ENDPROC(vfp_get_double)
ENTRY(vfp_put_double) tbl_branch r2, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: fmdrr d\dr, r0, r1 +1: vmov d\dr, r0, r1 ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 + .fpu vfpv3 @ d16 - d31 registers - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov d\dr, r0, r1 ret lr .org 1b + 8 .endr
From: Stefan Agner stefan@agner.ch
commit ee440336e5ef977c397afdb72cbf9c6b8effc8ea upstream
The integrated assembler of Clang 10 and earlier do not allow to access the VFP registers through the coprocessor load/store instructions: <instantiation>:4:6: error: invalid operand for instruction LDC p11, cr0, [r10],#32*4 @ FLDMIAD r10!, {d0-d15} ^
This has been addressed with Clang 11 [0]. However, to support earlier versions of Clang and for better readability use of VFP assembler mnemonics still is preferred.
Replace the coprocessor load/store instructions with explicit assembler mnemonics to accessing the floating point coprocessor registers. Use assembler directives to select the appropriate FPU version.
This allows to build these macros with GNU assembler as well as with Clang's built-in assembler.
[0] https://reviews.llvm.org/D59733
Link: https://github.com/ClangBuiltLinux/linux/issues/905
Signed-off-by: Stefan Agner stefan@agner.ch Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/include/asm/vfpmacros.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h index 628c336e8e3b..947ee5395e1f 100644 --- a/arch/arm/include/asm/vfpmacros.h +++ b/arch/arm/include/asm/vfpmacros.h @@ -19,23 +19,25 @@
@ read all the working registers back into the VFP .macro VFPFLDMIA, base, tmp + .fpu vfpv2 #if __LINUX_ARM_ARCH__ < 6 - LDC p11, cr0, [\base],#33*4 @ FLDMIAX \base!, {d0-d15} + fldmiax \base!, {d0-d15} #else - LDC p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d0-d15} + vldmia \base!, {d0-d15} #endif #ifdef CONFIG_VFPv3 + .fpu vfpv3 #if __LINUX_ARM_ARCH__ <= 6 ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPD32 - ldclne p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + vldmiane \base!, {d16-d31} addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? - ldcleq p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + vldmiaeq \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif #endif @@ -44,22 +46,23 @@ @ write all the working registers out of the VFP .macro VFPFSTMIA, base, tmp #if __LINUX_ARM_ARCH__ < 6 - STC p11, cr0, [\base],#33*4 @ FSTMIAX \base!, {d0-d15} + fstmiax \base!, {d0-d15} #else - STC p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d0-d15} + vstmia \base!, {d0-d15} #endif #ifdef CONFIG_VFPv3 + .fpu vfpv3 #if __LINUX_ARM_ARCH__ <= 6 ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPD32 - stclne p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + vstmiane \base!, {d16-d31} addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? - stcleq p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + vstmiaeq \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif #endif
From: Jian Cai caij2003@gmail.com
commit a780e485b5768e78aef087502499714901b68cc4 upstream
ALT_UP_B macro sets symbol up_b_offset via .equ to an expression involving another symbol. The macro gets expanded twice when arch/arm/kernel/sleep.S is assembled, creating a scenario where up_b_offset is set to another expression involving symbols while its current value is based on symbols. LLVM integrated assembler does not allow such cases, and based on the documentation of binutils, "Values that are based on expressions involving other symbols are allowed, but some targets may restrict this to only being done once per assembly", so it may be better to avoid such cases as it is not clearly stated which targets should support or disallow them. The fix in this case is simple, as up_b_offset has only one use, so we can replace the use with the definition and get rid of up_b_offset.
Link:https://github.com/ClangBuiltLinux/linux/issues/920
Reviewed-by: Stefan Agner stefan@agner.ch
Reviewed-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Jian Cai caij2003@gmail.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/include/asm/assembler.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 6b3e64e19fb6..70e1c23feedb 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -279,10 +279,9 @@ .endif ;\ .popsection #define ALT_UP_B(label) \ - .equ up_b_offset, label - 9998b ;\ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ - W(b) . + up_b_offset ;\ + W(b) . + (label - 9998b) ;\ .popsection #else #define ALT_SMP(instr...)
From: Ard Biesheuvel ardb@kernel.org
commit 54781938ec342cadbe2d76669ef8d3294d909974 upstream
The ADRL pseudo instruction is not an architectural construct, but a convenience macro that was supported by the ARM proprietary assembler and adopted by binutils GAS as well, but only when assembling in 32-bit ARM mode. Therefore, it can only be used in assembler code that is known to assemble in ARM mode only, but as it turns out, the Clang assembler does not implement ADRL at all, and so it is better to get rid of it entirely.
So replace the ADRL instruction with a ADR instruction that refers to a nearer symbol, and apply the delta explicitly using an additional instruction.
Signed-off-by: Ard Biesheuvel ardb@kernel.org Tested-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/crypto/sha256-armv4.pl | 4 ++-- arch/arm/crypto/sha256-core.S_shipped | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl index a03cf4dfb781..d927483985c2 100644 --- a/arch/arm/crypto/sha256-armv4.pl +++ b/arch/arm/crypto/sha256-armv4.pl @@ -175,7 +175,6 @@ $code=<<___; #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -471,7 +470,8 @@ sha256_block_data_order_neon: stmdb sp!,{r4-r12,lr}
sub $H,sp,#16*4+16 - adrl $Ktbl,K256 + adr $Ktbl,.Lsha256_block_data_order + sub $Ktbl,$Ktbl,#.Lsha256_block_data_order-K256 bic $H,$H,#15 @ align for 128-bit stores mov $t2,sp mov sp,$H @ alloca diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped index 054aae0edfce..9deb515f3c9f 100644 --- a/arch/arm/crypto/sha256-core.S_shipped +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -56,7 +56,6 @@ #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: stmdb sp!,{r4-r12,lr}
sub r11,sp,#16*4+16 - adrl r14,K256 + adr r14,.Lsha256_block_data_order + sub r14,r14,#.Lsha256_block_data_order-K256 bic r11,r11,#15 @ align for 128-bit stores mov r12,sp mov sp,r11 @ alloca
From: Ard Biesheuvel ardb@kernel.org
commit 0f5e8323777bfc1c1d2cba71242db6a361de03b6 upstream
The ADRL pseudo instruction is not an architectural construct, but a convenience macro that was supported by the ARM proprietary assembler and adopted by binutils GAS as well, but only when assembling in 32-bit ARM mode. Therefore, it can only be used in assembler code that is known to assemble in ARM mode only, but as it turns out, the Clang assembler does not implement ADRL at all, and so it is better to get rid of it entirely.
So replace the ADRL instruction with a ADR instruction that refers to a nearer symbol, and apply the delta explicitly using an additional instruction.
Signed-off-by: Ard Biesheuvel ardb@kernel.org Tested-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/crypto/sha512-armv4.pl | 4 ++-- arch/arm/crypto/sha512-core.S_shipped | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl index 788c17b56ecc..2a0bdf7dd87c 100644 --- a/arch/arm/crypto/sha512-armv4.pl +++ b/arch/arm/crypto/sha512-armv4.pl @@ -212,7 +212,6 @@ $code=<<___; #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -602,7 +601,8 @@ sha512_block_data_order_neon: dmb @ errata #451034 on early Cortex A8 add $len,$inp,$len,lsl#7 @ len to point at the end of inp VFP_ABI_PUSH - adrl $Ktbl,K512 + adr $Ktbl,.Lsha512_block_data_order + sub $Ktbl,$Ktbl,.Lsha512_block_data_order-K512 vldmia $ctx,{$A-$H} @ load context .Loop_neon: ___ diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped index 710ea309769e..cf5a7a70ff00 100644 --- a/arch/arm/crypto/sha512-core.S_shipped +++ b/arch/arm/crypto/sha512-core.S_shipped @@ -79,7 +79,6 @@ #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -543,7 +542,8 @@ sha512_block_data_order_neon: dmb @ errata #451034 on early Cortex A8 add r2,r1,r2,lsl#7 @ len to point at the end of inp VFP_ABI_PUSH - adrl r3,K512 + adr r3,.Lsha512_block_data_order + sub r3,r3,.Lsha512_block_data_order-K512 vldmia r0,{d16-d23} @ load context .Loop_neon: vshr.u64 d24,d20,#14 @ 0
From: Nick Desaulniers ndesaulniers@google.com
commit 790756c7e0229dedc83bf058ac69633045b1000e upstream
It looks like a section directive was using "Solaris style" to declare the section flags. Replace this with the GNU style so that Clang's integrated assembler can assemble this directive.
The modified instances were identified via: $ ag .section | grep #
Link: https://ftp.gnu.org/old-gnu/Manuals/gas-2.9.1/html_chapter/as_7.html#SEC119 Link: https://github.com/ClangBuiltLinux/linux/issues/744 Link: https://bugs.llvm.org/show_bug.cgi?id=43759 Link: https://reviews.llvm.org/D69296
Acked-by: Nicolas Pitre nico@fluxnic.net Reviewed-by: Ard Biesheuvel ardb@kernel.org Reviewed-by: Stefan Agner stefan@agner.ch Signed-off-by: Nick Desaulniers ndesaulniers@google.com Suggested-by: Fangrui Song maskray@google.com Suggested-by: Jian Cai jiancai@google.com Suggested-by: Peter Smith peter.smith@linaro.org Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/boot/bootp/init.S | 2 +- arch/arm/boot/compressed/big-endian.S | 2 +- arch/arm/boot/compressed/head.S | 2 +- arch/arm/boot/compressed/piggy.S | 2 +- arch/arm/mm/proc-arm1020.S | 2 +- arch/arm/mm/proc-arm1020e.S | 2 +- arch/arm/mm/proc-arm1022.S | 2 +- arch/arm/mm/proc-arm1026.S | 2 +- arch/arm/mm/proc-arm720.S | 2 +- arch/arm/mm/proc-arm740.S | 2 +- arch/arm/mm/proc-arm7tdmi.S | 2 +- arch/arm/mm/proc-arm920.S | 2 +- arch/arm/mm/proc-arm922.S | 2 +- arch/arm/mm/proc-arm925.S | 2 +- arch/arm/mm/proc-arm926.S | 2 +- arch/arm/mm/proc-arm940.S | 2 +- arch/arm/mm/proc-arm946.S | 2 +- arch/arm/mm/proc-arm9tdmi.S | 2 +- arch/arm/mm/proc-fa526.S | 2 +- arch/arm/mm/proc-feroceon.S | 2 +- arch/arm/mm/proc-mohawk.S | 2 +- arch/arm/mm/proc-sa110.S | 2 +- arch/arm/mm/proc-sa1100.S | 2 +- arch/arm/mm/proc-v6.S | 2 +- arch/arm/mm/proc-v7.S | 2 +- arch/arm/mm/proc-v7m.S | 4 ++-- arch/arm/mm/proc-xsc3.S | 2 +- arch/arm/mm/proc-xscale.S | 2 +- 28 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S index 5c476bd2b4ce..b562da2f7040 100644 --- a/arch/arm/boot/bootp/init.S +++ b/arch/arm/boot/bootp/init.S @@ -13,7 +13,7 @@ * size immediately following the kernel, we could build this into * a binary blob, and concatenate the zImage using the cat command. */ - .section .start,#alloc,#execinstr + .section .start, "ax" .type _start, #function .globl _start
diff --git a/arch/arm/boot/compressed/big-endian.S b/arch/arm/boot/compressed/big-endian.S index 88e2a88d324b..0e092c36da2f 100644 --- a/arch/arm/boot/compressed/big-endian.S +++ b/arch/arm/boot/compressed/big-endian.S @@ -6,7 +6,7 @@ * Author: Nicolas Pitre */
- .section ".start", #alloc, #execinstr + .section ".start", "ax"
mrc p15, 0, r0, c1, c0, 0 @ read control reg orr r0, r0, #(1 << 7) @ enable big endian mode diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 0a2410adc25b..cdaf94027d3b 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -140,7 +140,7 @@ #endif .endm
- .section ".start", #alloc, #execinstr + .section ".start", "ax" /* * sort out different calling conventions */ diff --git a/arch/arm/boot/compressed/piggy.S b/arch/arm/boot/compressed/piggy.S index 0284f84dcf38..27577644ee72 100644 --- a/arch/arm/boot/compressed/piggy.S +++ b/arch/arm/boot/compressed/piggy.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .piggydata,#alloc + .section .piggydata, "a" .globl input_data input_data: .incbin "arch/arm/boot/compressed/piggy_data" diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 4fa5371bc662..2785da387c91 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -491,7 +491,7 @@ cpu_arm1020_name:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __arm1020_proc_info,#object __arm1020_proc_info: diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 5d8a8339e09a..e9ea237ed785 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -449,7 +449,7 @@ arm1020e_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __arm1020e_proc_info,#object __arm1020e_proc_info: diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index b3dd95c345e4..920c279e7879 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -443,7 +443,7 @@ arm1022_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __arm1022_proc_info,#object __arm1022_proc_info: diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index ac5afde12f35..10e21012380b 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -437,7 +437,7 @@ arm1026_crval: string cpu_arm1026_name, "ARM1026EJ-S" .align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __arm1026_proc_info,#object __arm1026_proc_info: diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index c99d24363f32..39361e196d61 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -172,7 +172,7 @@ arm720_crval: * See <asm/procinfo.h> for a definition of this structure. */ - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req .type __\name()_proc_info,#object diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 1b4a3838393f..1a94bbf6e53f 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -128,7 +128,7 @@ __arm740_setup:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm740_proc_info,#object __arm740_proc_info: .long 0x41807400 diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index 17a4687065c7..52b66cf0259e 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -72,7 +72,7 @@ __arm7tdmi_setup:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ extra_hwcaps=0 diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 298c76b47749..31ac8acc34dc 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -434,7 +434,7 @@ arm920_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __arm920_proc_info,#object __arm920_proc_info: diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 824be3a0bc23..ca2c7ca8af21 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -412,7 +412,7 @@ arm922_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __arm922_proc_info,#object __arm922_proc_info: diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index d40cff8f102c..a381a0c9f109 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -477,7 +477,7 @@ arm925_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name()_proc_info,#object diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index f3cd08f353f0..3188ab2bac61 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -460,7 +460,7 @@ arm926_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __arm926_proc_info,#object __arm926_proc_info: diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 1c26d991386d..4b8a00220cc9 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -340,7 +340,7 @@ __arm940_setup:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __arm940_proc_info,#object __arm940_proc_info: diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 2dc1c75a4fd4..555becf9c758 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -395,7 +395,7 @@ __arm946_setup:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm946_proc_info,#object __arm946_proc_info: .long 0x41009460 diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index 913c06e590af..ef517530130b 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -66,7 +66,7 @@ __arm9tdmi_setup:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name()_proc_info, #object diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index 8120b6f4dbb8..dddf833fe000 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -185,7 +185,7 @@ fa526_cr1_set:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __fa526_proc_info,#object __fa526_proc_info: diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index bb6dc34d42a3..b12b76bc8d30 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -571,7 +571,7 @@ feroceon_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req .type __\name()_proc_info,#object diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index f08308578885..d47d6c5cee63 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -416,7 +416,7 @@ mohawk_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __88sv331x_proc_info,#object __88sv331x_proc_info: diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index d5bc5d702563..baba503ba816 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -196,7 +196,7 @@ sa110_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.type __sa110_proc_info,#object __sa110_proc_info: diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index be7b611c76c7..75ebacc8e4e5 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -239,7 +239,7 @@ sa1100_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name()_proc_info,#object diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index c1c85eb3484f..1dd0d5ca27da 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -261,7 +261,7 @@ v6_crval: string cpu_elf_name, "v6" .align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
/* * Match any ARMv6 processor core. diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c4e8006a1a8c..48e0ef6f0dcc 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -644,7 +644,7 @@ __v7_setup_stack: string cpu_elf_name, "v7" .align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
/* * Standard v7 proc info content diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1a49d503eafc..84459c1d31b8 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -93,7 +93,7 @@ ENTRY(cpu_cm7_proc_fin) ret lr ENDPROC(cpu_cm7_proc_fin)
- .section ".init.text", #alloc, #execinstr + .section ".init.text", "ax"
__v7m_cm7_setup: mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) @@ -177,7 +177,7 @@ ENDPROC(__v7m_setup) string cpu_elf_name "v7m" string cpu_v7m_name "ARMv7-M"
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0, proc_fns = v7m_processor_functions .long 0 /* proc_info_list.__cpu_mm_mmu_flags */ diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 1ac0fbbe9f12..42eaecc43cfe 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -496,7 +496,7 @@ xsc3_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req .type __\name()_proc_info,#object diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index bdb2b7749b03..18ac5a1f8922 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -610,7 +610,7 @@ xscale_crval:
.align
- .section ".proc.info.init", #alloc + .section ".proc.info.init", "a"
.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name()_proc_info,#object
From: Stefan Agner stefan@agner.ch
commit 9f1984c6ae30e2a379751339ce3375a21099b5d4 upstream
LLVM's integrated assembler does not accept r15 as mrc operand. arch/arm/boot/compressed/head.S:1267:16: error: operand must be a register in range [r0, r14] or apsr_nzcv 1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache ^
Use APSR_nzcv instead of r15. The GNU assembler supports this syntax since binutils 2.21 [0].
[0] https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git%3Ba=commit%3Bh=db47...
Signed-off-by: Stefan Agner stefan@agner.ch Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/boot/compressed/head.S | 2 +- arch/arm/mm/proc-arm1026.S | 4 ++-- arch/arm/mm/proc-arm926.S | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index cdaf94027d3b..17f87f4c74f5 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1274,7 +1274,7 @@ iflush: __armv5tej_mmu_cache_flush: tst r4, #1 movne pc, lr -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate D cache bne 1b mcr p15, 0, r0, c7, c5, 0 @ flush I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 10e21012380b..0bdf25a95b10 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -138,7 +138,7 @@ ENTRY(arm1026_flush_kern_cache_all) mov ip, #0 __flush_whole_cache: #ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate bne 1b #endif tst r2, #VM_EXEC @@ -363,7 +363,7 @@ ENTRY(cpu_arm1026_switch_mm) #ifdef CONFIG_MMU mov r1, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate bne 1b #endif #ifndef CONFIG_CPU_ICACHE_DISABLE diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 3188ab2bac61..1ba253c2bce1 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -131,7 +131,7 @@ __flush_whole_cache: #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate bne 1b #endif tst r2, #VM_EXEC @@ -358,7 +358,7 @@ ENTRY(cpu_arm926_switch_mm) mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else @ && 'Clean & Invalidate whole DCache' -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate bne 1b #endif mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
From: Stefan Agner stefan@agner.ch
commit d85d5247885ef2e8192287b895c2e381fa931b0b upstream
The adrl instruction has been introduced with commit dd31394779aa ("ARM: omap3: Thumb-2 compatibility for sleep34xx.S"), back when this assembly file was considerably longer. Today adr seems to have enough reach, even when inserting about 60 instructions between the use site and the label. Replace adrl with conventional adr instruction.
This allows to build this file using Clang's integrated assembler (which does not support the adrl pseudo instruction).
Link: https://github.com/ClangBuiltLinux/linux/issues/430 Signed-off-by: Stefan Agner stefan@agner.ch Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/mach-omap2/sleep34xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index ac1324c6453b..c4e97d35c310 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -72,7 +72,7 @@ ENTRY(enable_omap3630_toggle_l2_on_restore) stmfd sp!, {lr} @ save registers on stack /* Setup so that we will disable and enable l2 */ mov r1, #0x1 - adrl r3, l2dis_3630_offset @ may be too distant for plain adr + adr r3, l2dis_3630_offset ldr r2, [r3] @ value for offset str r1, [r2, r3] @ write to l2dis_3630 ldmfd sp!, {pc} @ restore regs and return
From: Jian Cai jiancai@google.com
commit 3c9f5708b7aed6a963e2aefccbd1854802de163e upstream
This patch replaces 6 IWMMXT instructions Clang's integrated assembler does not support in iwmmxt.S using macros, while making sure GNU assembler still emit the same instructions. This should be easier than providing full IWMMXT support in Clang. This is one of the last bits of kernel code that could be compiled but not assembled with clang. Once all of it works with IAS, we no longer need to special-case 32-bit Arm in Kbuild, or turn off CONFIG_IWMMXT when build-testing.
"Intel Wireless MMX Technology - Developer Guide - August, 2002" should be referenced for the encoding schemes of these extensions.
Link: https://github.com/ClangBuiltLinux/linux/issues/975
Suggested-by: Nick Desaulniers ndesaulniers@google.com Suggested-by: Ard Biesheuvel ardb@kernel.org Acked-by: Ard Biesheuvel ardb@kernel.org Reviewed-by: Nick Desaulniers ndesaulniers@google.com Tested-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Jian Cai jiancai@google.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/kernel/iwmmxt.S | 89 ++++++++++++++++++++-------------------- arch/arm/kernel/iwmmxt.h | 47 +++++++++++++++++++++ 2 files changed, 92 insertions(+), 44 deletions(-) create mode 100644 arch/arm/kernel/iwmmxt.h
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index 0dcae787b004..d2b4ac06e4ed 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -16,6 +16,7 @@ #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/assembler.h> +#include "iwmmxt.h"
#if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) #define PJ4(code...) code @@ -113,33 +114,33 @@ concan_save:
concan_dump:
- wstrw wCSSF, [r1, #MMX_WCSSF] - wstrw wCASF, [r1, #MMX_WCASF] - wstrw wCGR0, [r1, #MMX_WCGR0] - wstrw wCGR1, [r1, #MMX_WCGR1] - wstrw wCGR2, [r1, #MMX_WCGR2] - wstrw wCGR3, [r1, #MMX_WCGR3] + wstrw wCSSF, r1, MMX_WCSSF + wstrw wCASF, r1, MMX_WCASF + wstrw wCGR0, r1, MMX_WCGR0 + wstrw wCGR1, r1, MMX_WCGR1 + wstrw wCGR2, r1, MMX_WCGR2 + wstrw wCGR3, r1, MMX_WCGR3
1: @ MUP? wRn tst r2, #0x2 beq 2f
- wstrd wR0, [r1, #MMX_WR0] - wstrd wR1, [r1, #MMX_WR1] - wstrd wR2, [r1, #MMX_WR2] - wstrd wR3, [r1, #MMX_WR3] - wstrd wR4, [r1, #MMX_WR4] - wstrd wR5, [r1, #MMX_WR5] - wstrd wR6, [r1, #MMX_WR6] - wstrd wR7, [r1, #MMX_WR7] - wstrd wR8, [r1, #MMX_WR8] - wstrd wR9, [r1, #MMX_WR9] - wstrd wR10, [r1, #MMX_WR10] - wstrd wR11, [r1, #MMX_WR11] - wstrd wR12, [r1, #MMX_WR12] - wstrd wR13, [r1, #MMX_WR13] - wstrd wR14, [r1, #MMX_WR14] - wstrd wR15, [r1, #MMX_WR15] + wstrd wR0, r1, MMX_WR0 + wstrd wR1, r1, MMX_WR1 + wstrd wR2, r1, MMX_WR2 + wstrd wR3, r1, MMX_WR3 + wstrd wR4, r1, MMX_WR4 + wstrd wR5, r1, MMX_WR5 + wstrd wR6, r1, MMX_WR6 + wstrd wR7, r1, MMX_WR7 + wstrd wR8, r1, MMX_WR8 + wstrd wR9, r1, MMX_WR9 + wstrd wR10, r1, MMX_WR10 + wstrd wR11, r1, MMX_WR11 + wstrd wR12, r1, MMX_WR12 + wstrd wR13, r1, MMX_WR13 + wstrd wR14, r1, MMX_WR14 + wstrd wR15, r1, MMX_WR15
2: teq r0, #0 @ anything to load? reteq lr @ if not, return @@ -147,30 +148,30 @@ concan_dump: concan_load:
@ Load wRn - wldrd wR0, [r0, #MMX_WR0] - wldrd wR1, [r0, #MMX_WR1] - wldrd wR2, [r0, #MMX_WR2] - wldrd wR3, [r0, #MMX_WR3] - wldrd wR4, [r0, #MMX_WR4] - wldrd wR5, [r0, #MMX_WR5] - wldrd wR6, [r0, #MMX_WR6] - wldrd wR7, [r0, #MMX_WR7] - wldrd wR8, [r0, #MMX_WR8] - wldrd wR9, [r0, #MMX_WR9] - wldrd wR10, [r0, #MMX_WR10] - wldrd wR11, [r0, #MMX_WR11] - wldrd wR12, [r0, #MMX_WR12] - wldrd wR13, [r0, #MMX_WR13] - wldrd wR14, [r0, #MMX_WR14] - wldrd wR15, [r0, #MMX_WR15] + wldrd wR0, r0, MMX_WR0 + wldrd wR1, r0, MMX_WR1 + wldrd wR2, r0, MMX_WR2 + wldrd wR3, r0, MMX_WR3 + wldrd wR4, r0, MMX_WR4 + wldrd wR5, r0, MMX_WR5 + wldrd wR6, r0, MMX_WR6 + wldrd wR7, r0, MMX_WR7 + wldrd wR8, r0, MMX_WR8 + wldrd wR9, r0, MMX_WR9 + wldrd wR10, r0, MMX_WR10 + wldrd wR11, r0, MMX_WR11 + wldrd wR12, r0, MMX_WR12 + wldrd wR13, r0, MMX_WR13 + wldrd wR14, r0, MMX_WR14 + wldrd wR15, r0, MMX_WR15
@ Load wCx - wldrw wCSSF, [r0, #MMX_WCSSF] - wldrw wCASF, [r0, #MMX_WCASF] - wldrw wCGR0, [r0, #MMX_WCGR0] - wldrw wCGR1, [r0, #MMX_WCGR1] - wldrw wCGR2, [r0, #MMX_WCGR2] - wldrw wCGR3, [r0, #MMX_WCGR3] + wldrw wCSSF, r0, MMX_WCSSF + wldrw wCASF, r0, MMX_WCASF + wldrw wCGR0, r0, MMX_WCGR0 + wldrw wCGR1, r0, MMX_WCGR1 + wldrw wCGR2, r0, MMX_WCGR2 + wldrw wCGR3, r0, MMX_WCGR3
@ clear CUP/MUP (only if r1 != 0) teq r1, #0 diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h new file mode 100644 index 000000000000..fb627286f5bb --- /dev/null +++ b/arch/arm/kernel/iwmmxt.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __IWMMXT_H__ +#define __IWMMXT_H__ + +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +.set .LwR\b, \b +.set .Lr\b, \b +.endr + +.set .LwCSSF, 0x2 +.set .LwCASF, 0x3 +.set .LwCGR0, 0x8 +.set .LwCGR1, 0x9 +.set .LwCGR2, 0xa +.set .LwCGR3, 0xb + +.macro wldrd, reg:req, base:req, offset:req +.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wldrw, reg:req, base:req, offset:req +.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrd, reg:req, base:req, offset:req +.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrw, reg:req, base:req, offset:req +.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +#ifdef __clang__ + +#define wCon c1 + +.macro tmrc, dest:req, control:req +mrc p1, 0, \dest, \control, c0, 0 +.endm + +.macro tmcr, control:req, src:req +mcr p1, 0, \src, \control, c0, 0 +.endm +#endif + +#endif
From: Ard Biesheuvel ard.biesheuvel@linaro.org
commit b4d0c0aad57ac3bd1b5141bac5ab1ab1d5e442b3 upstream
Instead of allowing the Crypto Extensions algorithms to be selected when using a toolchain that does not support them, and complain about it at build time, use the information we have about the compiler to prevent them from being selected in the first place. Users that are stuck with a GCC version <4.8 are unlikely to care about these routines anyway, and it cleans up the Makefile considerably.
While at it, add explicit 'armv8-a' CPU specifiers to the code that uses the 'crypto-neon-fp-armv8' FPU specifier so we don't regress Clang, which will complain about this in version 10 and later.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/crypto/Kconfig | 14 +++++++------ arch/arm/crypto/Makefile | 32 ++++++----------------------- arch/arm/crypto/crct10dif-ce-core.S | 2 +- arch/arm/crypto/ghash-ce-core.S | 1 + arch/arm/crypto/sha1-ce-core.S | 1 + arch/arm/crypto/sha2-ce-core.S | 1 + 6 files changed, 18 insertions(+), 33 deletions(-)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 043b0b18bf7e..f747caea10ff 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON
config CRYPTO_SHA1_ARM_CE tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA1_ARM select CRYPTO_HASH help @@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE
config CRYPTO_SHA2_ARM_CE tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA256_ARM select CRYPTO_HASH help @@ -96,7 +96,7 @@ config CRYPTO_AES_ARM_BS
config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_BLKCIPHER select CRYPTO_LIB_AES select CRYPTO_SIMD @@ -106,7 +106,7 @@ config CRYPTO_AES_ARM_CE
config CRYPTO_GHASH_ARM_CE tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_HASH select CRYPTO_CRYPTD select CRYPTO_GF128MUL @@ -118,12 +118,14 @@ config CRYPTO_GHASH_ARM_CE
config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" - depends on KERNEL_MODE_NEON && CRC_T10DIF + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC_T10DIF select CRYPTO_HASH
config CRYPTO_CRC32_ARM_CE tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions" - depends on KERNEL_MODE_NEON && CRC32 + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC32 select CRYPTO_HASH
config CRYPTO_CHACHA20_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4180f3a13512..c0d36771a693 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -12,32 +12,12 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
-ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o -crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o - -ifneq ($(crc-obj-y)$(crc-obj-m),) -ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y) -ce-obj-y += $(crc-obj-y) -ce-obj-m += $(crc-obj-m) -else -$(warning These CRC Extensions modules need binutils 2.23 or higher) -$(warning $(crc-obj-y) $(crc-obj-m)) -endif -endif - -ifneq ($(ce-obj-y)$(ce-obj-m),) -ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) -obj-y += $(ce-obj-y) -obj-m += $(ce-obj-m) -else -$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) -$(warning $(ce-obj-y) $(ce-obj-m)) -endif -endif +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o +obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S index 86be258a803f..46c02c518a30 100644 --- a/arch/arm/crypto/crct10dif-ce-core.S +++ b/arch/arm/crypto/crct10dif-ce-core.S @@ -72,7 +72,7 @@ #endif
.text - .arch armv7-a + .arch armv8-a .fpu crypto-neon-fp-armv8
init_crc .req r0 diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index c47fe81abcb0..534c9647726d 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -88,6 +88,7 @@ T3_H .req d17
.text + .arch armv8-a .fpu crypto-neon-fp-armv8
.macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S index 49a74a441aec..8a702e051738 100644 --- a/arch/arm/crypto/sha1-ce-core.S +++ b/arch/arm/crypto/sha1-ce-core.S @@ -10,6 +10,7 @@ #include <asm/assembler.h>
.text + .arch armv8-a .fpu crypto-neon-fp-armv8
k0 .req q0 diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S index 4ad517577e23..b6369d2440a1 100644 --- a/arch/arm/crypto/sha2-ce-core.S +++ b/arch/arm/crypto/sha2-ce-core.S @@ -10,6 +10,7 @@ #include <asm/assembler.h>
.text + .arch armv8-a .fpu crypto-neon-fp-armv8
k0 .req q7
From: Stefan Agner stefan@agner.ch
commit 7548bf8c17d84607c106bd45d81834afd95a2edb upstream
Building ARMv7 with Clang's integrated assembler leads to errors such as: arch/arm/crypto/ghash-ce-core.S:34:11: error: register name expected t3l .req d16 ^
Since no FPU has selected yet Clang considers d16 not a valid register. Moving the FPU directive on-top allows Clang to parse the registers and allows to successfully build this file with Clang's integrated assembler.
Signed-off-by: Stefan Agner stefan@agner.ch Reviewed-by: Nick Desaulniers ndesaulniers@google.com Tested-by: Nick Desaulniers ndesaulniers@google.com Acked-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Florian Fainelli f.fainelli@gmail.com --- arch/arm/crypto/ghash-ce-core.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index 534c9647726d..9f51e3fa4526 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -8,6 +8,9 @@ #include <linux/linkage.h> #include <asm/assembler.h>
+ .arch armv8-a + .fpu crypto-neon-fp-armv8 + SHASH .req q0 T1 .req q1 XL .req q2 @@ -88,8 +91,6 @@ T3_H .req d17
.text - .arch armv8-a - .fpu crypto-neon-fp-armv8
.macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 vmull.p64 \rd, \rn, \rm
On Wed, Jun 29, 2022 at 11:02:16AM -0700, Florian Fainelli wrote:
Hi,
This patch series is a collection of clean cherry picks into the 5.4 kernel allowing us to use the Clang integrated assembler to build the ARM 32-bit kernel.
This is useful in order to have proper build and runtime coverage of the stable kernel(s).
Odd, but ok, if this helps you out. Now queued up.
greg k-h
linux-stable-mirror@lists.linaro.org