From 08bd84e8a6358eb412fcef279f8875e2d69a3374 Mon Sep 17 00:00:00 2001 From: Koakuma Date: Tue, 18 Oct 2022 00:01:55 +0000 Subject: [PATCH] [SPARC] Make calls to function with big return values work Implement CanLowerReturn and associated CallingConv changes for SPARC/SPARC64. In particular, for SPARC64 there's new `RetCC_Sparc64_*` functions that handles the return case of the calling convention. It uses the same analysis as `CC_Sparc64_*` family of funtions, but fails if the return value doesn't fit into the return registers. This makes calls to functions with big return values converted to an sret function as expected, instead of crashing LLVM. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D132465 (cherry picked from commit d3fcbee10d893b9e01e563c3840414ba89283484) --- .../SelectionDAG/SelectionDAGBuilder.cpp | 1 + llvm/lib/Target/Sparc/SparcCallingConv.td | 10 +- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 61 ++++- llvm/lib/Target/Sparc/SparcISelLowering.h | 5 + llvm/test/CodeGen/SPARC/64abi.ll | 27 -- llvm/test/CodeGen/SPARC/bigreturn.ll | 254 ++++++++++++++++++ 6 files changed, 322 insertions(+), 36 deletions(-) create mode 100644 llvm/test/CodeGen/SPARC/bigreturn.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 35650b9bd00e44..ecdaef0442dabf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9693,6 +9693,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; + CLI.getArgs()[0].IndirectType = CLI.RetTy; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td index e6d23f741ea5f8..8afd0a7fc09ad2 100644 --- a/llvm/lib/Target/Sparc/SparcCallingConv.td +++ b/llvm/lib/Target/Sparc/SparcCallingConv.td @@ -125,10 +125,14 @@ def CC_Sparc64 : CallingConv<[ def RetCC_Sparc64 : CallingConv<[ // A single f32 return value always goes in %f0. The ABI doesn't specify what // happens to multiple f32 return values outside a struct. - CCIfType<[f32], CCCustom<"CC_Sparc64_Half">>, + CCIfType<[f32], CCCustom<"RetCC_Sparc64_Half">>, - // Otherwise, return values are passed exactly like arguments. - CCDelegateTo + // Otherwise, return values are passed exactly like arguments, except that + // returns that are too big to fit into the registers is passed as an sret + // instead. + CCIfInReg>>, + CCIfType<[i32], CCPromoteToType>, + CCCustom<"RetCC_Sparc64_Full"> ]>; // Callee-saved registers are handled by the register window mechanism. diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 2cb74e7709c7b6..f5567508910284 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -101,9 +101,9 @@ static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT, } // Allocate a full-sized argument for the 64-bit ABI. -static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) { +static bool Analyze_CC_Sparc64_Full(bool IsReturn, unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { assert((LocVT == MVT::f32 || LocVT == MVT::f128 || LocVT.getSizeInBits() == 64) && "Can't handle non-64 bits locations"); @@ -133,6 +133,11 @@ static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, return true; } + // Bail out if this is a return CC and we run out of registers to place + // values into. + if (IsReturn) + return false; + // This argument goes on the stack in an 8-byte slot. // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to // the right-aligned float. The first 4 bytes of the stack slot are undefined. @@ -146,9 +151,9 @@ static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, // Allocate a half-sized argument for the 64-bit ABI. // // This is used when passing { float, int } structs by value in registers. -static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) { +static bool Analyze_CC_Sparc64_Half(bool IsReturn, unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations"); unsigned Offset = State.AllocateStack(4, Align(4)); @@ -174,10 +179,43 @@ static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, return true; } + // Bail out if this is a return CC and we run out of registers to place + // values into. + if (IsReturn) + return false; + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return true; } +static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Full(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Half(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool RetCC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Full(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool RetCC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Half(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + #include "SparcGenCallingConv.inc" // The calling conventions in SparcCallingConv.td are described in terms of the @@ -191,6 +229,15 @@ static unsigned toCallerWindow(unsigned Reg) { return Reg; } +bool SparcTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, Subtarget->is64Bit() ? RetCC_Sparc64 + : RetCC_Sparc32); +} + SDValue SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -1031,6 +1078,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { + assert(RVLocs[i].isRegLoc() && "Can only return in registers!"); if (RVLocs[i].getLocVT() == MVT::v2i32) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2i32); SDValue Lo = DAG.getCopyFromReg( @@ -1346,6 +1394,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); unsigned Reg = toCallerWindow(VA.getLocReg()); // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h index 2768bb20566a5c..16e4f26870548a 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -144,6 +144,11 @@ namespace llvm { SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll index 6b181d8b343291..27865f718151ec 100644 --- a/llvm/test/CodeGen/SPARC/64abi.ll +++ b/llvm/test/CodeGen/SPARC/64abi.ll @@ -293,33 +293,6 @@ define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) { ret void } -; Structs up to 32 bytes in size can be returned in registers. -; CHECK-LABEL: ret_i64_pair: -; CHECK: ldx [%i2], %i0 -; CHECK: ldx [%i3], %i1 -define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) { - %r1 = load i64, i64* %p - %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0 - store i64 0, i64* %p - %r2 = load i64, i64* %q - %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1 - ret { i64, i64 } %rv2 -} - -; CHECK-LABEL: call_ret_i64_pair: -; CHECK: call ret_i64_pair -; CHECK: stx %o0, [%i0] -; CHECK: stx %o1, [%i0] -define void @call_ret_i64_pair(i64* %i0) { - %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef, - i64* undef, i64* undef) - %e0 = extractvalue { i64, i64 } %rv, 0 - store volatile i64 %e0, i64* %i0 - %e1 = extractvalue { i64, i64 } %rv, 1 - store i64 %e1, i64* %i0 - ret void -} - ; This is not a C struct, the i32 member uses 8 bytes, but the float only 4. ; CHECK-LABEL: ret_i32_float_pair: ; CHECK: ld [%i2], %i0 diff --git a/llvm/test/CodeGen/SPARC/bigreturn.ll b/llvm/test/CodeGen/SPARC/bigreturn.ll new file mode 100644 index 00000000000000..25b4eeecadc0f7 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/bigreturn.ll @@ -0,0 +1,254 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=sparc -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck --check-prefix=SPARC %s +; RUN: llc < %s -mtriple=sparc64 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck --check-prefix=SPARC64 %s + +;; Structs up to six registers in size can be returned in registers. +;; Note that the maximum return size and member placement is NOT +;; compatible with the C ABI - see SparcCallingConv.td. +define { i32, i32 } @ret_i32_pair(i32 %a0, i32 %a1, i32* %p, i32* %q) { +; SPARC-LABEL: ret_i32_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: ld [%i2], %i0 +; SPARC-NEXT: st %g0, [%i2] +; SPARC-NEXT: ld [%i3], %i1 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: ret_i32_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: ld [%i2], %i0 +; SPARC64-NEXT: st %g0, [%i2] +; SPARC64-NEXT: ld [%i3], %i1 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %r1 = load i32, i32* %p + %rv1 = insertvalue { i32, i32 } undef, i32 %r1, 0 + store i32 0, i32* %p + %r2 = load i32, i32* %q + %rv2 = insertvalue { i32, i32 } %rv1, i32 %r2, 1 + ret { i32, i32 } %rv2 +} + +define void @call_ret_i32_pair(i32* %i0) { +; SPARC-LABEL: call_ret_i32_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: call ret_i32_pair +; SPARC-NEXT: nop +; SPARC-NEXT: st %o0, [%i0] +; SPARC-NEXT: st %o1, [%i0] +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i32_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -176, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: call ret_i32_pair +; SPARC64-NEXT: nop +; SPARC64-NEXT: st %o0, [%i0] +; SPARC64-NEXT: st %o1, [%i0] +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %rv = call { i32, i32 } @ret_i32_pair(i32 undef, i32 undef, + i32* undef, i32* undef) + %e0 = extractvalue { i32, i32 } %rv, 0 + store volatile i32 %e0, i32* %i0 + %e1 = extractvalue { i32, i32 } %rv, 1 + store i32 %e1, i32* %i0 + ret void +} + +;; Functions returning structs more than six registers' worth of space +;; should be automatically treated as an sret function. +declare { [16 x i32] } @ret_i32_arr(i32 %input) + +define i32 @call_ret_i32_arr(i32 %0) { +; SPARC-LABEL: call_ret_i32_arr: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -160, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: add %fp, -64, %i1 +; SPARC-NEXT: st %i1, [%sp+64] +; SPARC-NEXT: mov %i0, %o0 +; SPARC-NEXT: call ret_i32_arr +; SPARC-NEXT: nop +; SPARC-NEXT: unimp 64 +; SPARC-NEXT: ld [%fp+-4], %i0 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i32_arr: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -240, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: add %fp, 1983, %o0 +; SPARC64-NEXT: mov %i0, %o1 +; SPARC64-NEXT: call ret_i32_arr +; SPARC64-NEXT: nop +; SPARC64-NEXT: ld [%fp+2043], %i0 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = call { [16 x i32] } @ret_i32_arr(i32 %0) + %3 = extractvalue { [16 x i32] } %2, 0 + %4 = extractvalue [16 x i32] %3, 15 + ret i32 %4 +} + +;; Structs up to six registers in size can be returned in registers. +;; Note that the maximum return size and member placement is NOT +;; compatible with the C ABI - see SparcCallingConv.td. +define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) { +; SPARC-LABEL: ret_i64_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: mov %g0, %i4 +; SPARC-NEXT: ldd [%i2], %i0 +; SPARC-NEXT: mov %i4, %i5 +; SPARC-NEXT: std %i4, [%i2] +; SPARC-NEXT: ldd [%i3], %i2 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: ret_i64_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: ldx [%i2], %i0 +; SPARC64-NEXT: stx %g0, [%i2] +; SPARC64-NEXT: ldx [%i3], %i1 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %r1 = load i64, i64* %p + %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0 + store i64 0, i64* %p + %r2 = load i64, i64* %q + %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1 + ret { i64, i64 } %rv2 +} + +define void @call_ret_i64_pair(i64* %i0) { +; SPARC-LABEL: call_ret_i64_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: call ret_i64_pair +; SPARC-NEXT: nop +; SPARC-NEXT: ! kill: def $o0 killed $o0 killed $o0_o1 def $o0_o1 +; SPARC-NEXT: ! kill: def $o2 killed $o2 killed $o2_o3 def $o2_o3 +; SPARC-NEXT: ! kill: def $o1 killed $o1 killed $o0_o1 def $o0_o1 +; SPARC-NEXT: std %o0, [%i0] +; SPARC-NEXT: ! kill: def $o3 killed $o3 killed $o2_o3 def $o2_o3 +; SPARC-NEXT: std %o2, [%i0] +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i64_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -176, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: call ret_i64_pair +; SPARC64-NEXT: nop +; SPARC64-NEXT: stx %o0, [%i0] +; SPARC64-NEXT: stx %o1, [%i0] +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef, + i64* undef, i64* undef) + %e0 = extractvalue { i64, i64 } %rv, 0 + store volatile i64 %e0, i64* %i0 + %e1 = extractvalue { i64, i64 } %rv, 1 + store i64 %e1, i64* %i0 + ret void +} + +;; Functions returning structs more than six registers' worth of space +;; should be automatically treated as an sret function. +declare { [16 x i64] } @ret_i64_arr(i64 %input) + +define i64 @call_ret_i64_arr(i64 %0) { +; SPARC-LABEL: call_ret_i64_arr: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -224, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: add %fp, -128, %i2 +; SPARC-NEXT: st %i2, [%sp+64] +; SPARC-NEXT: mov %i0, %o0 +; SPARC-NEXT: mov %i1, %o1 +; SPARC-NEXT: call ret_i64_arr +; SPARC-NEXT: nop +; SPARC-NEXT: unimp 128 +; SPARC-NEXT: ldd [%fp+-8], %i0 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i64_arr: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -304, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: add %fp, 1919, %o0 +; SPARC64-NEXT: mov %i0, %o1 +; SPARC64-NEXT: call ret_i64_arr +; SPARC64-NEXT: nop +; SPARC64-NEXT: ldx [%fp+2039], %i0 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = call { [16 x i64] } @ret_i64_arr(i64 %0) + %3 = extractvalue { [16 x i64] } %2, 0 + %4 = extractvalue [16 x i64] %3, 15 + ret i64 %4 +}