From cedaa6613ec558a40ed43f8150705c6d624aa868 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 12 Feb 2025 17:54:27 -0300 Subject: [PATCH 1/6] Make late_gc_lowering more robust to cases where we optimize the sret usage a bit more --- src/llvm-final-gc-lowering.cpp | 35 +++++++++++++-- src/llvm-late-gc-lowering.cpp | 82 +++++++++++++++++++++++----------- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 76dcd944890ab..88ff81d0f280c 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -167,14 +167,14 @@ bool FinalLowerGC::runOnFunction(Function &F) initAll(*F.getParent()); if (!pgcstack_getter && !adoptthread_func) { LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n"); - return false; + goto verify_skip; } // Look for a call to 'julia.get_pgcstack'. pgcstack = getPGCstack(F); if (!pgcstack) { LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n"); - return false; + goto verify_skip; } LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot); @@ -212,8 +212,37 @@ bool FinalLowerGC::runOnFunction(Function &F) #undef LOWER_INTRINSIC } } - return true; + // Verify that skipping was in fact correct + verify_skip: + #ifdef JL_VERIFY_PASSES + for (auto &BB : F) { + for (auto &I : make_early_inc_range(BB)) { + auto *CI = dyn_cast(&I); + if (!CI) + continue; + + Value *callee = CI->getCalledOperand(); + assert(callee); + auto IS_INTRINSIC = [&](auto intrinsic) { + auto intrinsic2 = getOrNull(intrinsic); + if (intrinsic2 == callee) { + errs() << "Final-GC-lowering didn't eliminate all intrinsics'" << F.getName() << "', dumping entire module!\n\n"; + errs() << *F.getParent() << "\n"; + abort(); + } + }; + IS_INTRINSIC(jl_intrinsics::newGCFrame); + IS_INTRINSIC(jl_intrinsics::pushGCFrame); + IS_INTRINSIC(jl_intrinsics::popGCFrame); + IS_INTRINSIC(jl_intrinsics::getGCFrameSlot); + IS_INTRINSIC(jl_intrinsics::GCAllocBytes); + IS_INTRINSIC(jl_intrinsics::queueGCRoot); + IS_INTRINSIC(jl_intrinsics::safepoint); + } + } + #endif + return false; } PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 7d6fba65a79e7..6af9bb327ff96 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1114,6 +1114,47 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef PHINumbers, State &S) } } +// Look through instructions to find all possible allocas that might become the sret argument +static SmallSetVector FindSretAllocas(Value* SRetArg) { + SmallSetVector allocas; + if (AllocaInst *OneSRet = dyn_cast(SRetArg)) { + allocas.insert(OneSRet); // We found the alloca directly + } else { // Look throught instructions until we find it + SmallSetVector worklist; + worklist.insert(SRetArg); + while (!worklist.empty()) { + Value *V = worklist.pop_back_val(); + if (AllocaInst *Alloca = dyn_cast(V->stripInBoundsOffsets())) { + allocas.insert(Alloca); + } else if (PHINode *Phi = dyn_cast(V)) { + for (Value *Incoming : Phi->incoming_values()) { + worklist.insert(Incoming); + } + } else if (SelectInst *SI = dyn_cast(SRetArg)) { + auto TrueBranch = SI->getTrueValue(); + auto FalseBranch = SI->getFalseValue(); + if (TrueBranch && FalseBranch) { + worklist.insert(TrueBranch); + worklist.insert(FalseBranch); + } else { + llvm_dump(SI); + assert(false && "Malformed Select"); + } + } else { + llvm_dump(V); + assert(false && "Unexpected SRet argument"); + } + } + } + assert(allocas.size() > 0); + assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca){ + return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() && + SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType()); + } + )); + return allocas; +} + State LateLowerGCFrame::LocalScan(Function &F) { State S(F); SmallVector PHINumbers; @@ -1165,9 +1206,11 @@ State LateLowerGCFrame::LocalScan(Function &F) { Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType(); auto tracked = CountTrackedPointers(ElT, true); if (tracked.count) { - AllocaInst *SRet = dyn_cast((CI->arg_begin()[0])->stripInBoundsOffsets()); - assert(SRet); - { + SmallSetVector allocas = FindSretAllocas((CI->arg_begin()[0])->stripInBoundsOffsets()); + // We know that with the right optimizations we can forward a sret directly from an argument + // This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too + // If they are tracked.all we can just pass through but if they have a roots bundle it's possible we need to emit some copies ¯\_(ツ)_/¯ + for (AllocaInst *SRet : allocas) { if (!(SRet->isStaticAlloca() && isa(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) { assert(!tracked.derived); if (tracked.all) { @@ -1175,36 +1218,23 @@ State LateLowerGCFrame::LocalScan(Function &F) { } else { Value *arg1 = (CI->arg_begin()[1])->stripInBoundsOffsets(); + SmallSetVector gc_allocas = FindSretAllocas(arg1); AllocaInst *SRet_gc = nullptr; - if (PHINode *Phi = dyn_cast(arg1)) { - for (Value *V : Phi->incoming_values()) { - if (AllocaInst *Alloca = dyn_cast(V->stripInBoundsOffsets())) { - if (SRet_gc == nullptr) { - SRet_gc = Alloca; - } else if (SRet_gc == Alloca) { - continue; - } else { - llvm_dump(Alloca); - llvm_dump(SRet_gc); - assert(false && "Allocas in Phi node should match"); - } - } else { - llvm_dump(V->stripInBoundsOffsets()); - assert(false && "Expected alloca"); - } - } - } else { - SRet_gc = dyn_cast(arg1); + if (gc_allocas.size() == 1) { + SRet_gc = gc_allocas.pop_back_val(); } - if (!SRet_gc) { + else { llvm_dump(CI); - llvm_dump(arg1); - assert(false && "Expected alloca"); + for (AllocaInst *Alloca : gc_allocas) { + llvm_dump(Alloca); + } + assert(false && "Expected single alloca"); } Type *ElT = SRet_gc->getAllocatedType(); if (!(SRet_gc->isStaticAlloca() && isa(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) { S.ArrayAllocas[SRet_gc] = tracked.count * cast(SRet_gc->getArraySize())->getZExtValue(); } + break; // Found our gc roots } } } @@ -1401,6 +1431,8 @@ State LateLowerGCFrame::LocalScan(Function &F) { return S; } + + static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef Idxs, IRBuilder<> &irbuilder) { Type *T_int32 = Type::getInt32Ty(V->getContext()); if (isptr) { From a34a4a1baa2a6d547ce82fdae12c2edeea292600 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 13 Feb 2025 11:37:04 -0300 Subject: [PATCH 2/6] Add tests --- test/llvmpasses/late-lower-gc-sret.ll | 49 +++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 test/llvmpasses/late-lower-gc-sret.ll diff --git a/test/llvmpasses/late-lower-gc-sret.ll b/test/llvmpasses/late-lower-gc-sret.ll new file mode 100644 index 0000000000000..0c31edb3494d1 --- /dev/null +++ b/test/llvmpasses/late-lower-gc-sret.ll @@ -0,0 +1,49 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s + +declare ptr @julia.get_pgcstack() + +declare swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]), ptr nonnull swiftself, ptr addrspace(10) nonnull) + +define hidden swiftcc nonnull ptr addrspace(10) @sret_select(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) { + ; CHECK-LABEL: @sret_select + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6) + ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3) + ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) + ; CHECK: %pgcstack = call ptr @julia.get_pgcstack() + ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6) + %pgcstack = call ptr @julia.get_pgcstack() + %3 = alloca [3 x i64], align 8 + %4 = alloca [3 x i64], align 8 + %5 = select i1 %unpredictable, ptr %3, ptr %4 + call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %5, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1) + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret ptr addrspace(10) %1 +} + +define hidden swiftcc nonnull ptr addrspace(10) @sret_phi(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) { +top: + ; CHECK-LABEL: @sret_phi + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6) + ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3) + ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) + ; CHECK: %pgcstack = call ptr @julia.get_pgcstack() + ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6) + %pgcstack = call ptr @julia.get_pgcstack() + %2 = alloca [3 x i64], align 8 + %3 = alloca [3 x i64], align 8 + br i1 %unpredictable, label %true, label %false + +true: ; preds = %top + br label %ret + +false: ; preds = %top + br label %ret + +ret: ; preds = %false, %true + %4 = phi ptr [ %2, %true ], [ %3, %false ] + call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %4, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1) + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret ptr addrspace(10) %1 +} From 52948d37a37e8a605ba9c8f02f8d4e6da76d5f64 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 13 Feb 2025 11:45:53 -0300 Subject: [PATCH 3/6] Remove redundant comments --- src/llvm-late-gc-lowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 6af9bb327ff96..84311feb62323 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1118,14 +1118,14 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef PHINumbers, State &S) static SmallSetVector FindSretAllocas(Value* SRetArg) { SmallSetVector allocas; if (AllocaInst *OneSRet = dyn_cast(SRetArg)) { - allocas.insert(OneSRet); // We found the alloca directly - } else { // Look throught instructions until we find it + allocas.insert(OneSRet); // Found it directly + } else { SmallSetVector worklist; worklist.insert(SRetArg); while (!worklist.empty()) { Value *V = worklist.pop_back_val(); if (AllocaInst *Alloca = dyn_cast(V->stripInBoundsOffsets())) { - allocas.insert(Alloca); + allocas.insert(Alloca); // Found a candidate } else if (PHINode *Phi = dyn_cast(V)) { for (Value *Incoming : Phi->incoming_values()) { worklist.insert(Incoming); From 91a14c4a16571503908da3274a5d0baab76eca24 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 13 Feb 2025 14:53:36 -0300 Subject: [PATCH 4/6] Add notsafepoint annotation --- src/llvm-late-gc-lowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 84311feb62323..81d636b58c527 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1147,7 +1147,7 @@ static SmallSetVector FindSretAllocas(Value* SRetArg) { } } assert(allocas.size() > 0); - assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca){ + assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT { return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() && SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType()); } From 1dbdf347d52c32408cfe9cc5011903c121c67432 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Fri, 14 Feb 2025 13:44:46 -0300 Subject: [PATCH 5/6] Fix final GC lowering detection --- src/llvm-final-gc-lowering.cpp | 26 +++++++++++++++++--------- src/llvm-gc-interface-passes.h | 2 ++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 88ff81d0f280c..b11e8100d7c4c 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -161,21 +161,29 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) target->replaceAllUsesWith(newI); target->eraseFromParent(); } +bool FinalLowerGC::shouldRunFinalGC(Function &F) +{ + bool should_run = 0; + should_run |= getOrNull(jl_intrinsics ::newGCFrame) != nullptr; + should_run |= getOrNull(jl_intrinsics ::getGCFrameSlot) != nullptr; + should_run |= getOrNull(jl_intrinsics ::pushGCFrame) != nullptr; + should_run |= getOrNull(jl_intrinsics ::popGCFrame) != nullptr; + should_run |= getOrNull(jl_intrinsics ::GCAllocBytes) != nullptr; + should_run |= getOrNull(jl_intrinsics ::queueGCRoot) != nullptr; + should_run |= getOrNull(jl_intrinsics ::safepoint) != nullptr; + should_run |= pgcstack_getter != nullptr; + should_run |= adoptthread_func != nullptr; + should_run |= pgcstack != nullptr; + return should_run; +} bool FinalLowerGC::runOnFunction(Function &F) { initAll(*F.getParent()); - if (!pgcstack_getter && !adoptthread_func) { - LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n"); - goto verify_skip; - } - - // Look for a call to 'julia.get_pgcstack'. pgcstack = getPGCstack(F); - if (!pgcstack) { - LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n"); + if (!shouldRunFinalGC(F)) goto verify_skip; - } + LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot); smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc); diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index 7b2a4bb033203..b899cd9494673 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -411,6 +411,8 @@ struct FinalLowerGC: private JuliaPassContext { // Lowers a `julia.safepoint` intrinsic. void lowerSafepoint(CallInst *target, Function &F); + // Check if the pass should be run + bool shouldRunFinalGC(Function &F); }; #endif // LLVM_GC_PASSES_H From 3e78f79786553250877caa328ed7de603c156621 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 17 Feb 2025 10:05:55 -0500 Subject: [PATCH 6/6] Update src/llvm-final-gc-lowering.cpp Co-authored-by: Jameson Nash --- src/llvm-final-gc-lowering.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index b11e8100d7c4c..50bdd2251ab42 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -171,9 +171,6 @@ bool FinalLowerGC::shouldRunFinalGC(Function &F) should_run |= getOrNull(jl_intrinsics ::GCAllocBytes) != nullptr; should_run |= getOrNull(jl_intrinsics ::queueGCRoot) != nullptr; should_run |= getOrNull(jl_intrinsics ::safepoint) != nullptr; - should_run |= pgcstack_getter != nullptr; - should_run |= adoptthread_func != nullptr; - should_run |= pgcstack != nullptr; return should_run; }