Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3
Culprit: <cut> commit 5c315bee8c9db27d12cead928eea5a3fef97f34f Author: Dawid Jurczak dawid_jurek@vp.pl Date: Mon Jul 5 11:42:17 2021 +0200
[DSE] Transform memset + malloc --> calloc (PR25892)
After this change DSE can eliminate malloc + memset and emit calloc. It's https://reviews.llvm.org/D101440 follow-up.
Differential Revision: https://reviews.llvm.org/D103009 </cut>
Results regressed to (for first_bad == 5c315bee8c9db27d12cead928eea5a3fef97f34f) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-5c315bee8c9db27d12cead928eea5a3fef97f34f/results_id: 1 # 464.h264ref,h264ref_base.default regressed by 105 # 464.h264ref,[.] FastFullPelBlockMotionSearch regressed by 146
from (for last_good == bc5b5ea037dbadd281c59248ae9d2742b51c69ed) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-bc5b5ea037dbadd281c59248ae9d2742b51c69ed/results_id: 1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-a... Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3221 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-a... Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3210 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-a...
Configuration details:
Reproduce builds: <cut> mkdir investigate-llvm-5c315bee8c9db27d12cead928eea5a3fef97f34f cd investigate-llvm-5c315bee8c9db27d12cead928eea5a3fef97f34f
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-a... --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-a... --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-a... --fail chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/
cd llvm
# Reproduce first_bad build git checkout --detach 5c315bee8c9db27d12cead928eea5a3fef97f34f ../artifacts/test.sh
# Reproduce last_good build git checkout --detach bc5b5ea037dbadd281c59248ae9d2742b51c69ed ../artifacts/test.sh
cd .. </cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/c...
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-a... Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-a...
Full commit (up to 1000 lines): <cut> commit 5c315bee8c9db27d12cead928eea5a3fef97f34f Author: Dawid Jurczak dawid_jurek@vp.pl Date: Mon Jul 5 11:42:17 2021 +0200
[DSE] Transform memset + malloc --> calloc (PR25892)
After this change DSE can eliminate malloc + memset and emit calloc. It's https://reviews.llvm.org/D101440 follow-up.
Differential Revision: https://reviews.llvm.org/D103009 --- .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 81 +++++++++-- .../Transforms/DeadStoreElimination/noop-stores.ll | 153 ++++++++++++++++++++- 2 files changed, 219 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index d22b3f409585..0ada5c6e72c9 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -56,6 +56,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -78,6 +79,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> @@ -505,7 +507,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BasicBlock::iterator SecondBBI(SecondI); BasicBlock *FirstBB = FirstI->getParent(); BasicBlock *SecondBB = SecondI->getParent(); - MemoryLocation MemLoc = MemoryLocation::get(SecondI); + MemoryLocation MemLoc; + if (auto *MemSet = dyn_cast<MemSetInst>(SecondI)) + MemLoc = MemoryLocation::getForDest(MemSet); + else + MemLoc = MemoryLocation::get(SecondI); + auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
// Start checking the SecondBB. @@ -819,14 +826,17 @@ bool isNoopIntrinsic(Instruction *I) { }
// Check if we can ignore \p D for DSE. -bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { +bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller, + const TargetLibraryInfo &TLI) { Instruction *DI = D->getMemoryInst(); // Calls that only access inaccessible memory cannot read or write any memory // locations we consider for elimination. if (auto *CB = dyn_cast<CallBase>(DI)) - if (CB->onlyAccessesInaccessibleMemory()) + if (CB->onlyAccessesInaccessibleMemory()) { + if (isAllocLikeFn(DI, &TLI)) + return false; return true; - + } // We can eliminate stores to locations not visible to the caller across // throwing instructions. if (DI->mayThrow() && !DefVisibleToCaller) @@ -841,7 +851,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { return true;
// Skip intrinsics that do not really read or modify memory. - if (isNoopIntrinsic(D->getMemoryInst())) + if (isNoopIntrinsic(DI)) return true;
return false; @@ -1389,7 +1399,7 @@ struct DSEState { MemoryDef *CurrentDef = cast<MemoryDef>(Current); Instruction *CurrentI = CurrentDef->getMemoryInst();
- if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) + if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO), TLI)) continue;
// Before we try to remove anything, check for any extra throwing @@ -1816,13 +1826,58 @@ struct DSEState {
if (StoredConstant && StoredConstant->isNullValue()) { auto *DefUOInst = dyn_cast<Instruction>(DefUO); - if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) { - auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst)); - // If UnderlyingDef is the clobbering access of Def, no instructions - // between them can modify the memory location. - auto *ClobberDef = - MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); - return UnderlyingDef == ClobberDef; + if (DefUOInst) { + if (isCallocLikeFn(DefUOInst, &TLI)) { + auto *UnderlyingDef = + cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst)); + // If UnderlyingDef is the clobbering access of Def, no instructions + // between them can modify the memory location. + auto *ClobberDef = + MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); + return UnderlyingDef == ClobberDef; + } + + if (MemSet) { + if (F.hasFnAttribute(Attribute::SanitizeMemory) || + F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeHWAddress) || + F.getName() == "calloc") + return false; + auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst)); + if (!Malloc) + return false; + auto *InnerCallee = Malloc->getCalledFunction(); + if (!InnerCallee) + return false; + LibFunc Func; + if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || + Func != LibFunc_malloc) + return false; + if (Malloc->getOperand(0) == MemSet->getLength()) { + if (DT.dominates(Malloc, MemSet) && + memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) { + IRBuilder<> IRB(Malloc); + const auto &DL = Malloc->getModule()->getDataLayout(); + AttributeList EmptyList; + if (auto *Calloc = emitCalloc( + ConstantInt::get(IRB.getIntPtrTy(DL), 1), + Malloc->getArgOperand(0), EmptyList, IRB, TLI)) { + MemorySSAUpdater Updater(&MSSA); + auto *LastDef = cast<MemoryDef>( + Updater.getMemorySSA()->getMemoryAccess(Malloc)); + auto *NewAccess = Updater.createMemoryAccessAfter( + cast<Instruction>(Calloc), LastDef, LastDef); + auto *NewAccessMD = cast<MemoryDef>(NewAccess); + Updater.insertDef(NewAccessMD, /*RenameUses=*/true); + Updater.removeMemoryAccess(Malloc); + Malloc->replaceAllUsesWith(Calloc); + Malloc->eraseFromParent(); + return true; + } + return false; + } + } + } } }
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll index 184653982a6a..12534b6047c5 100644 --- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll @@ -1,9 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='dse,verify<memoryssa>' -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-declare i8* @calloc(i64, i64) declare void @memset_pattern16(i8*, i8*, i64)
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind @@ -309,6 +308,156 @@ entry: ret void }
+declare noalias i8* @malloc(i64) +declare noalias i8* @_Znwm(i64) +declare void @clobber_memory(float*) + +; based on pr25892_lite +define i8* @zero_memset_after_malloc(i64 %size) { +; CHECK-LABEL: @zero_memset_after_malloc( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]]) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @malloc(i64 %size) inaccessiblememonly + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + ret i8* %call +} + +; based on pr25892_lite +define i8* @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) { +; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]]) +; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float* +; CHECK-NEXT: call void @clobber_memory(float* [[BC]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE]], i1 false) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @malloc(i64 %size) inaccessiblememonly + %bc = bitcast i8* %call to float* + call void @clobber_memory(float* %bc) + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + ret i8* %call +} + +; based on pr25892_lite +define i8* @zero_memset_after_malloc_with_different_sizes(i64 %size) { +; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]]) +; CHECK-NEXT: [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE2]], i1 false) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @malloc(i64 %size) inaccessiblememonly + %size2 = add nsw i64 %size, -1 + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size2, i1 false) + ret i8* %call +} + +; based on pr25892_lite +define i8* @zero_memset_after_new(i64 %size) { +; CHECK-LABEL: @zero_memset_after_new( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @_Znwm(i64 [[SIZE:%.*]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE]], i1 false) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @_Znwm(i64 %size) + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + ret i8* %call +} + +; This should not create a calloc and should not crash the compiler. +define i8* @notmalloc_memset(i64 %size, i8*(i64)* %notmalloc) { +; CHECK-LABEL: @notmalloc_memset( +; CHECK-NEXT: [[CALL1:%.*]] = call i8* [[NOTMALLOC:%.*]](i64 [[SIZE:%.*]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL1]], i8 0, i64 [[SIZE]], i1 false) +; CHECK-NEXT: ret i8* [[CALL1]] +; + %call1 = call i8* %notmalloc(i64 %size) + call void @llvm.memset.p0i8.i64(i8* %call1, i8 0, i64 %size, i1 false) + ret i8* %call1 +} + +; This should not create recursive call to calloc. +define i8* @calloc(i64 %nmemb, i64 %size) { +; CHECK-LABEL: @calloc( +; CHECK: entry: +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SIZE:%.*]], [[NMEMB:%.*]] +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias align 16 i8* @malloc(i64 [[MUL]]) +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[CALL]], i8 0, i64 [[MUL]], i1 false) +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret i8* [[CALL]] +; +entry: + %mul = mul i64 %size, %nmemb + %call = tail call noalias align 16 i8* @malloc(i64 %mul) + %tobool.not = icmp eq i8* %call, null + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %call, i8 0, i64 %mul, i1 false) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call +} + +define float* @pr25892(i64 %size) { +; CHECK-LABEL: @pr25892( +; CHECK: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float* +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float* [[RETVAL_0]] +; +entry: + %call = call i8* @malloc(i64 %size) inaccessiblememonly + %cmp = icmp eq i8* %call, null + br i1 %cmp, label %cleanup, label %if.end +if.end: + %bc = bitcast i8* %call to float* + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + br label %cleanup +cleanup: + %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ] + ret float* %retval.0 +} + +define float* @pr25892_with_extra_store(i64 %size) { +; CHECK-LABEL: @pr25892_with_extra_store( +; CHECK: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float* +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float* [[RETVAL_0]] +; +entry: + %call = call i8* @malloc(i64 %size) inaccessiblememonly + %cmp = icmp eq i8* %call, null + br i1 %cmp, label %cleanup, label %if.end +if.end: + %bc = bitcast i8* %call to float* + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + store i8 0, i8* %call, align 1 + br label %cleanup +cleanup: + %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ] + ret float* %retval.0 +} + ; PR50143 define i8* @store_zero_after_calloc_inaccessiblememonly() { ; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly( </cut>