diff --git a/src/pipeline.cpp b/src/pipeline.cpp index c4b07914907e67..3caf54824b867c 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -35,6 +35,7 @@ #include // NewPM needs to manually include all the pass headers +#include #include #include #include @@ -46,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -75,7 +77,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -209,10 +213,10 @@ namespace { .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .forwardSwitchCondToPhi(true) + .needCanonicalLoops(false) //These mess with loop rotation, so only do them after that .hoistCommonInsts(true) - // Causes an SRET assertion error in late-gc-lowering - // .sinkCommonInsts(true) + .sinkCommonInsts(true) ; } #if JL_LLVM_VERSION < 150000 @@ -357,7 +361,7 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder if (O.getSpeedupLevel() >= 1) { #if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. - FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); #else FPM.addPass(SROAPass()); #endif @@ -397,20 +401,23 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, if (O.getSpeedupLevel() >= 2) { #if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. - FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); #else FPM.addPass(SROAPass()); #endif // SROA can duplicate PHI nodes which can block LowerSIMD - FPM.addPass(InstCombinePass()); + FPM.addPass(EarlyCSEPass()); FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(AggressiveInstCombinePass()); + FPM.addPass(LibCallsShrinkWrapPass()); + FPM.addPass(ReassociatePass()); - FPM.addPass(EarlyCSEPass()); JULIA_PASS(FPM.addPass(AllocOptPass())); } else { // if (O.getSpeedupLevel() >= 1) (exactly) - FPM.addPass(InstCombinePass()); FPM.addPass(EarlyCSEPass()); + FPM.addPass(InstCombinePass()); } invokePeepholeEPCallbacks(FPM, PB, O); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); @@ -472,16 +479,18 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder * JULIA_PASS(FPM.addPass(AllocOptPass())); #if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. - FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); #else FPM.addPass(SROAPass()); #endif + FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); FPM.addPass(InstSimplifyPass()); FPM.addPass(GVNPass()); FPM.addPass(MemCpyOptPass()); FPM.addPass(SCCPPass()); + FPM.addPass(BDCEPass()); FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(DCEPass()); + FPM.addPass(ADCEPass()); FPM.addPass(IRCEPass()); FPM.addPass(InstCombinePass()); FPM.addPass(JumpThreadingPass()); @@ -496,11 +505,12 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder * JULIA_PASS(FPM.addPass(AllocOptPass())); { LoopPassManager LPM; - LPM.addPass(LoopDeletionPass()); - LPM.addPass(LoopInstSimplifyPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + LPM.addPass(LICMPass(LICMOptions())); + LPM.addPass(JuliaLICMPass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true)); } - FPM.addPass(LoopDistributePass()); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + FPM.addPass(InstCombinePass()); } invokeScalarOptimizerCallbacks(FPM, PB, O); FPM.addPass(AfterScalarOptimizationMarkerPass()); @@ -509,6 +519,13 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder * static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { FPM.addPass(BeforeVectorizationMarkerPass()); //TODO look into loop vectorize options + // Rerotate loops that might have been unrotated in the simplification + LoopPassManager LPM; + LPM.addPass(LoopRotatePass()); + LPM.addPass(LoopDeletionPass()); + FPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); + FPM.addPass(LoopDistributePass()); FPM.addPass(InjectTLIMappings()); FPM.addPass(LoopVectorizePass()); FPM.addPass(LoopLoadEliminationPass()); @@ -517,11 +534,13 @@ static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, Optim FPM.addPass(SLPVectorizerPass()); invokeVectorizerCallbacks(FPM, PB, O); FPM.addPass(VectorCombinePass()); - FPM.addPass(ADCEPass()); + FPM.addPass(InstCombinePass()); //TODO add BDCEPass here? // This unroll will unroll vectorized loops // as well as loops that we tried but failed to vectorize FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); + FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); + FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(LICMOptions()), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); FPM.addPass(AfterVectorizationMarkerPass()); }