Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make late_gc_lowering more robust #57380

Merged
merged 6 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 45 additions & 11 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,21 +161,26 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
target->replaceAllUsesWith(newI);
target->eraseFromParent();
}
bool FinalLowerGC::shouldRunFinalGC(Function &F)
{
bool should_run = 0;
should_run |= getOrNull(jl_intrinsics ::newGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::getGCFrameSlot) != nullptr;
should_run |= getOrNull(jl_intrinsics ::pushGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::popGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::GCAllocBytes) != nullptr;
should_run |= getOrNull(jl_intrinsics ::queueGCRoot) != nullptr;
should_run |= getOrNull(jl_intrinsics ::safepoint) != nullptr;
return should_run;
}

bool FinalLowerGC::runOnFunction(Function &F)
{
initAll(*F.getParent());
if (!pgcstack_getter && !adoptthread_func) {
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
return false;
}

// Look for a call to 'julia.get_pgcstack'.
pgcstack = getPGCstack(F);
if (!pgcstack) {
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n");
return false;
}
if (!shouldRunFinalGC(F))
goto verify_skip;

LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
Expand Down Expand Up @@ -212,8 +217,37 @@ bool FinalLowerGC::runOnFunction(Function &F)
#undef LOWER_INTRINSIC
}
}

return true;
// Verify that skipping was in fact correct
verify_skip:
#ifdef JL_VERIFY_PASSES
for (auto &BB : F) {
for (auto &I : make_early_inc_range(BB)) {
auto *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;

Value *callee = CI->getCalledOperand();
assert(callee);
auto IS_INTRINSIC = [&](auto intrinsic) {
auto intrinsic2 = getOrNull(intrinsic);
if (intrinsic2 == callee) {
errs() << "Final-GC-lowering didn't eliminate all intrinsics'" << F.getName() << "', dumping entire module!\n\n";
errs() << *F.getParent() << "\n";
abort();
}
};
IS_INTRINSIC(jl_intrinsics::newGCFrame);
IS_INTRINSIC(jl_intrinsics::pushGCFrame);
IS_INTRINSIC(jl_intrinsics::popGCFrame);
IS_INTRINSIC(jl_intrinsics::getGCFrameSlot);
IS_INTRINSIC(jl_intrinsics::GCAllocBytes);
IS_INTRINSIC(jl_intrinsics::queueGCRoot);
IS_INTRINSIC(jl_intrinsics::safepoint);
}
}
#endif
return false;
}

PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
Expand Down
2 changes: 2 additions & 0 deletions src/llvm-gc-interface-passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,8 @@ struct FinalLowerGC: private JuliaPassContext {

// Lowers a `julia.safepoint` intrinsic.
void lowerSafepoint(CallInst *target, Function &F);
// Check if the pass should be run
bool shouldRunFinalGC(Function &F);
};

#endif // LLVM_GC_PASSES_H
82 changes: 57 additions & 25 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,47 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
}
}

// Look through instructions to find all possible allocas that might become the sret argument
static SmallSetVector<AllocaInst *, 8> FindSretAllocas(Value* SRetArg) {
SmallSetVector<AllocaInst *, 8> allocas;
if (AllocaInst *OneSRet = dyn_cast<AllocaInst>(SRetArg)) {
allocas.insert(OneSRet); // Found it directly
} else {
SmallSetVector<Value *, 8> worklist;
worklist.insert(SRetArg);
while (!worklist.empty()) {
Value *V = worklist.pop_back_val();
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
allocas.insert(Alloca); // Found a candidate
} else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
for (Value *Incoming : Phi->incoming_values()) {
worklist.insert(Incoming);
}
} else if (SelectInst *SI = dyn_cast<SelectInst>(SRetArg)) {
auto TrueBranch = SI->getTrueValue();
auto FalseBranch = SI->getFalseValue();
if (TrueBranch && FalseBranch) {
worklist.insert(TrueBranch);
worklist.insert(FalseBranch);
} else {
llvm_dump(SI);
assert(false && "Malformed Select");
}
} else {
llvm_dump(V);
assert(false && "Unexpected SRet argument");
}
}
}
assert(allocas.size() > 0);
assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT {
return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() &&
SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType());
}
));
return allocas;
}

State LateLowerGCFrame::LocalScan(Function &F) {
State S(F);
SmallVector<int, 8> PHINumbers;
Expand Down Expand Up @@ -1165,46 +1206,35 @@ State LateLowerGCFrame::LocalScan(Function &F) {
Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
auto tracked = CountTrackedPointers(ElT, true);
if (tracked.count) {
AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
assert(SRet);
{
SmallSetVector<AllocaInst *, 8> allocas = FindSretAllocas((CI->arg_begin()[0])->stripInBoundsOffsets());
// We know that with the right optimizations we can forward a sret directly from an argument
// This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too
// If they are tracked.all we can just pass through but if they have a roots bundle it's possible we need to emit some copies ¯\_(ツ)_/¯
for (AllocaInst *SRet : allocas) {
if (!(SRet->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
assert(!tracked.derived);
if (tracked.all) {
S.ArrayAllocas[SRet] = tracked.count * cast<ConstantInt>(SRet->getArraySize())->getZExtValue();
}
else {
Value *arg1 = (CI->arg_begin()[1])->stripInBoundsOffsets();
SmallSetVector<AllocaInst *, 8> gc_allocas = FindSretAllocas(arg1);
AllocaInst *SRet_gc = nullptr;
if (PHINode *Phi = dyn_cast<PHINode>(arg1)) {
for (Value *V : Phi->incoming_values()) {
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
if (SRet_gc == nullptr) {
SRet_gc = Alloca;
} else if (SRet_gc == Alloca) {
continue;
} else {
llvm_dump(Alloca);
llvm_dump(SRet_gc);
assert(false && "Allocas in Phi node should match");
}
} else {
llvm_dump(V->stripInBoundsOffsets());
assert(false && "Expected alloca");
}
}
} else {
SRet_gc = dyn_cast<AllocaInst>(arg1);
if (gc_allocas.size() == 1) {
SRet_gc = gc_allocas.pop_back_val();
}
if (!SRet_gc) {
else {
llvm_dump(CI);
llvm_dump(arg1);
assert(false && "Expected alloca");
for (AllocaInst *Alloca : gc_allocas) {
llvm_dump(Alloca);
}
assert(false && "Expected single alloca");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason that this is an assert, somewhat annoying the message doesn't print on non assert builds.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pushed a fix to your branch :)

}
Type *ElT = SRet_gc->getAllocatedType();
if (!(SRet_gc->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
S.ArrayAllocas[SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize())->getZExtValue();
}
break; // Found our gc roots
}
}
}
Expand Down Expand Up @@ -1401,6 +1431,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
return S;
}



static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
Type *T_int32 = Type::getInt32Ty(V->getContext());
if (isptr) {
Expand Down
49 changes: 49 additions & 0 deletions test/llvmpasses/late-lower-gc-sret.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
; This file is a part of Julia. License is MIT: https://julialang.org/license

; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s

declare ptr @julia.get_pgcstack()

declare swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]), ptr nonnull swiftself, ptr addrspace(10) nonnull)

define hidden swiftcc nonnull ptr addrspace(10) @sret_select(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) {
; CHECK-LABEL: @sret_select
; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6)
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3)
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6)
%pgcstack = call ptr @julia.get_pgcstack()
%3 = alloca [3 x i64], align 8
%4 = alloca [3 x i64], align 8
%5 = select i1 %unpredictable, ptr %3, ptr %4
call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %5, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1)
; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
ret ptr addrspace(10) %1
}

define hidden swiftcc nonnull ptr addrspace(10) @sret_phi(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) {
top:
; CHECK-LABEL: @sret_phi
; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6)
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3)
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6)
%pgcstack = call ptr @julia.get_pgcstack()
%2 = alloca [3 x i64], align 8
%3 = alloca [3 x i64], align 8
br i1 %unpredictable, label %true, label %false

true: ; preds = %top
br label %ret

false: ; preds = %top
br label %ret

ret: ; preds = %false, %true
%4 = phi ptr [ %2, %true ], [ %3, %false ]
call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %4, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1)
; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
ret ptr addrspace(10) %1
}