Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-master-arm-spec2k6-Oz. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-master-arm-spec2k6-Oz
Culprit: <cut> commit cd76f43b4995cf01bae9f97a54ca0e79c2355032 Author: David Green david.green@arm.com Date: Wed Jun 30 19:19:03 2021 +0100
[ARM] Set the immediate cost of GEP operands to 0
This prevents constant gep operands from being hoisted by the Constant Hoisting pass, leaving them to CodegenPrepare which can usually do a better job at splitting large offsets. This can, in general, improve performance and decrease codesize, especially for v6m where many constants have a high cost.
Differential Revision: https://reviews.llvm.org/D104877 </cut>
Results regressed to (for first_bad == cd76f43b4995cf01bae9f97a54ca0e79c2355032) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -Oz_mthumb -- artifacts/build-cd76f43b4995cf01bae9f97a54ca0e79c2355032/results_id: 1 # 470.lbm,lbm_base.default regressed by 107
from (for last_good == 4339d3bd84a9bc1b5ecc58ddfc935d53e9de4fd4) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -Oz_mthumb -- artifacts/build-4339d3bd84a9bc1b5ecc58ddfc935d53e9de4fd4/results_id: 1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-a... Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-master-arm-spec2k6-Oz/1400 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-a... Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-master-arm-spec2k6-Oz/1406 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-a...
Configuration details:
Reproduce builds: <cut> mkdir investigate-llvm-cd76f43b4995cf01bae9f97a54ca0e79c2355032 cd investigate-llvm-cd76f43b4995cf01bae9f97a54ca0e79c2355032
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-a... --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-a... --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-a... --fail chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
cd llvm
# Reproduce first_bad build git checkout --detach cd76f43b4995cf01bae9f97a54ca0e79c2355032 ../artifacts/test.sh
# Reproduce last_good build git checkout --detach 4339d3bd84a9bc1b5ecc58ddfc935d53e9de4fd4 ../artifacts/test.sh
cd .. </cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/c...
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-a... Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-a...
Full commit (up to 1000 lines): <cut> commit cd76f43b4995cf01bae9f97a54ca0e79c2355032 Author: David Green david.green@arm.com Date: Wed Jun 30 19:19:03 2021 +0100
[ARM] Set the immediate cost of GEP operands to 0
This prevents constant gep operands from being hoisted by the Constant Hoisting pass, leaving them to CodegenPrepare which can usually do a better job at splitting large offsets. This can, in general, improve performance and decrease codesize, especially for v6m where many constants have a high cost.
Differential Revision: https://reviews.llvm.org/D104877 --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 5 ++ llvm/test/CodeGen/ARM/gep-imm.ll | 74 ++++++++++------------ .../ConstantHoisting/ARM/gep-struct-index.ll | 9 +-- 3 files changed, 42 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 6c393405a185..7410d8d1eabe 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -337,6 +337,11 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, Idx == 1) return 0;
+ // Leave any gep offsets for the CodeGenPrepare, which will do a better job at + // splitting any large offsets. + if (Opcode == Instruction::GetElementPtr && Idx != 0) + return 0; + if (Opcode == Instruction::And) { // UXTB/UXTH if (Imm == 255 || Imm == 65535) diff --git a/llvm/test/CodeGen/ARM/gep-imm.ll b/llvm/test/CodeGen/ARM/gep-imm.ll index 5358261426b4..20218725f8a4 100644 --- a/llvm/test/CodeGen/ARM/gep-imm.ll +++ b/llvm/test/CodeGen/ARM/gep-imm.ll @@ -37,16 +37,15 @@ entry: define void @large(i32 %a, i32 %b, i32 *%c, i32* %d) { ; CHECKV6M-LABEL: large: ; CHECKV6M: @ %bb.0: @ %entry -; CHECKV6M-NEXT: .save {r4, r5, r7, lr} -; CHECKV6M-NEXT: push {r4, r5, r7, lr} -; CHECKV6M-NEXT: movs r4, #125 -; CHECKV6M-NEXT: lsls r4, r4, #4 -; CHECKV6M-NEXT: lsls r4, r4, #2 -; CHECKV6M-NEXT: str r0, [r3, r4] -; CHECKV6M-NEXT: ldr r5, .LCPI1_0 -; CHECKV6M-NEXT: str r1, [r3, r5] -; CHECKV6M-NEXT: str r0, [r2, r4] -; CHECKV6M-NEXT: pop {r4, r5, r7, pc} +; CHECKV6M-NEXT: .save {r4, lr} +; CHECKV6M-NEXT: push {r4, lr} +; CHECKV6M-NEXT: ldr r4, .LCPI1_0 +; CHECKV6M-NEXT: str r1, [r3, r4] +; CHECKV6M-NEXT: movs r1, #125 +; CHECKV6M-NEXT: lsls r1, r1, #6 +; CHECKV6M-NEXT: str r0, [r3, r1] +; CHECKV6M-NEXT: str r0, [r2, r1] +; CHECKV6M-NEXT: pop {r4, pc} ; CHECKV6M-NEXT: .p2align 2 ; CHECKV6M-NEXT: @ %bb.1: ; CHECKV6M-NEXT: .LCPI1_0: @@ -82,47 +81,42 @@ entry: define void @huge(i32 %a, i32 %b, i32 *%c, i32* %d) { ; CHECKV6M-LABEL: huge: ; CHECKV6M: @ %bb.0: @ %entry -; CHECKV6M-NEXT: .save {r4, r5, r7, lr} -; CHECKV6M-NEXT: push {r4, r5, r7, lr} +; CHECKV6M-NEXT: .save {r4, lr} +; CHECKV6M-NEXT: push {r4, lr} ; CHECKV6M-NEXT: ldr r4, .LCPI2_0 -; CHECKV6M-NEXT: lsls r4, r4, #2 -; CHECKV6M-NEXT: str r0, [r3, r4] -; CHECKV6M-NEXT: ldr r5, .LCPI2_1 -; CHECKV6M-NEXT: str r1, [r3, r5] -; CHECKV6M-NEXT: str r0, [r2, r4] -; CHECKV6M-NEXT: pop {r4, r5, r7, pc} +; CHECKV6M-NEXT: str r1, [r3, r4] +; CHECKV6M-NEXT: ldr r1, .LCPI2_1 +; CHECKV6M-NEXT: str r0, [r3, r1] +; CHECKV6M-NEXT: str r0, [r2, r1] +; CHECKV6M-NEXT: pop {r4, pc} ; CHECKV6M-NEXT: .p2align 2 ; CHECKV6M-NEXT: @ %bb.1: ; CHECKV6M-NEXT: .LCPI2_0: -; CHECKV6M-NEXT: .long 200000 @ 0x30d40 -; CHECKV6M-NEXT: .LCPI2_1: ; CHECKV6M-NEXT: .long 1200000 @ 0x124f80 +; CHECKV6M-NEXT: .LCPI2_1: +; CHECKV6M-NEXT: .long 800000 @ 0xc3500 ; ; CHECKV7M-LABEL: huge: ; CHECKV7M: @ %bb.0: @ %entry -; CHECKV7M-NEXT: .save {r7, lr} -; CHECKV7M-NEXT: push {r7, lr} -; CHECKV7M-NEXT: movw r12, #3392 -; CHECKV7M-NEXT: movw lr, #20352 -; CHECKV7M-NEXT: movt r12, #3 -; CHECKV7M-NEXT: movt lr, #18 -; CHECKV7M-NEXT: str.w r0, [r3, r12, lsl #2] -; CHECKV7M-NEXT: str.w r1, [r3, lr] -; CHECKV7M-NEXT: str.w r0, [r2, r12, lsl #2] -; CHECKV7M-NEXT: pop {r7, pc} +; CHECKV7M-NEXT: movw r12, #20352 +; CHECKV7M-NEXT: movt r12, #18 +; CHECKV7M-NEXT: str.w r1, [r3, r12] +; CHECKV7M-NEXT: movw r1, #13568 +; CHECKV7M-NEXT: movt r1, #12 +; CHECKV7M-NEXT: str r0, [r3, r1] +; CHECKV7M-NEXT: str r0, [r2, r1] +; CHECKV7M-NEXT: bx lr ; ; CHECKV7A-LABEL: huge: ; CHECKV7A: @ %bb.0: @ %entry -; CHECKV7A-NEXT: .save {r7, lr} -; CHECKV7A-NEXT: push {r7, lr} -; CHECKV7A-NEXT: movw r12, #3392 -; CHECKV7A-NEXT: movw lr, #20352 -; CHECKV7A-NEXT: movt r12, #3 -; CHECKV7A-NEXT: movt lr, #18 -; CHECKV7A-NEXT: str.w r0, [r3, r12, lsl #2] -; CHECKV7A-NEXT: str.w r1, [r3, lr] -; CHECKV7A-NEXT: str.w r0, [r2, r12, lsl #2] -; CHECKV7A-NEXT: pop {r7, pc} +; CHECKV7A-NEXT: movw r12, #20352 +; CHECKV7A-NEXT: movt r12, #18 +; CHECKV7A-NEXT: str.w r1, [r3, r12] +; CHECKV7A-NEXT: movw r1, #13568 +; CHECKV7A-NEXT: movt r1, #12 +; CHECKV7A-NEXT: str r0, [r3, r1] +; CHECKV7A-NEXT: str r0, [r2, r1] +; CHECKV7A-NEXT: bx lr entry: %arrayidx = getelementptr inbounds i32, i32* %d, i32 200000 store i32 %a, i32* %arrayidx, align 4 diff --git a/llvm/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll b/llvm/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll index 45f4500b37c1..70a01a660b62 100644 --- a/llvm/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll +++ b/llvm/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll @@ -19,14 +19,11 @@ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-; Indices for GEPs that index into a struct type should not be hoisted. +; Indices for GEPs should not be hoisted. define i32 @test1(%T* %P) nounwind { ; CHECK-LABEL: @test1 -; CHECK: %const = bitcast i32 256 to i32 -; CHECK: %addr1 = getelementptr %T, %T* %P, i32 %const, i32 256 -; CHECK: %addr2 = getelementptr %T, %T* %P, i32 %const, i32 256 -; The first index into the pointer is hoisted, but the second one into the -; struct isn't. +; CHECK: %addr1 = getelementptr %T, %T* %P, i32 256, i32 256 +; CHECK: %addr2 = getelementptr %T, %T* %P, i32 256, i32 256 %addr1 = getelementptr %T, %T* %P, i32 256, i32 256 %tmp1 = load i32, i32* %addr1 %addr2 = getelementptr %T, %T* %P, i32 256, i32 256 </cut>
linaro-toolchain@lists.linaro.org