Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ByVal args handling #197

Merged
merged 8 commits into from
Nov 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,9 @@ add_test(NAME test_roundtrip_translate_only
COMMAND scripts/roundtrip.py --translate-only $<TARGET_FILE:${RELLIC_DECOMP}> tests/tools/decomp/failing-rebuild/ "${CLANG_PATH}"
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)

# Tests that can be decompiled
add_test(NAME test_decompile
COMMAND scripts/decompile.py $<TARGET_FILE:${RELLIC_DECOMP}> tests/tools/decomp/
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
3 changes: 3 additions & 0 deletions include/rellic/AST/IRToASTVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <clang/AST/Decl.h>
#include <clang/AST/Stmt.h>
#include <llvm/IR/Argument.h>
#include <llvm/IR/InlineAsm.h>
#include <llvm/IR/InstVisitor.h>
#include <llvm/IR/Operator.h>
Expand All @@ -27,6 +28,7 @@ namespace rellic {
using IRToTypeDeclMap = std::unordered_map<llvm::Type *, clang::TypeDecl *>;
using IRToValDeclMap = std::unordered_map<llvm::Value *, clang::ValueDecl *>;
using IRToStmtMap = std::unordered_map<llvm::Value *, clang::Stmt *>;
using ArgToTempMap = std::unordered_map<llvm::Argument *, clang::VarDecl *>;

class IRToASTVisitor : public llvm::InstVisitor<IRToASTVisitor> {
private:
Expand All @@ -37,6 +39,7 @@ class IRToASTVisitor : public llvm::InstVisitor<IRToASTVisitor> {
IRToTypeDeclMap type_decls;
IRToValDeclMap value_decls;
IRToStmtMap stmts;
ArgToTempMap temp_decls;

clang::Expr *GetOperandExpr(llvm::Value *val);
clang::QualType GetQualType(llvm::Type *type);
Expand Down
70 changes: 65 additions & 5 deletions lib/AST/IRToASTVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,30 @@ clang::Expr *IRToASTVisitor::GetOperandExpr(llvm::Value *val) {
}
// Operand is a function argument or local variable
if (llvm::isa<llvm::Argument>(val)) {
return CreateRef();
auto arg{llvm::cast<llvm::Argument>(val)};
auto ref{CreateRef()};
if (arg->hasByValAttr()) {
// Since arguments that have the `byval` are pointers, but actually mean
// pass-by-value semantics, we need to create an auxiliary pointer to the
// actual argument and use it instead of the actual argument.
// This is because `byval` arguments are pointers, so each reference to
// those arguments assume they are dealing with pointers.
auto &temp{temp_decls[arg]};
if (!temp) {
auto addr_of_arg{ast.CreateAddrOf(ref)};
auto func{arg->getParent()};
auto fdecl{GetOrCreateDecl(func)->getAsFunction()};
auto argdecl{clang::cast<clang::ParmVarDecl>(value_decls[arg])};
temp = ast.CreateVarDecl(fdecl, GetQualType(arg->getType()),
argdecl->getName().str() + "_ptr");
temp->setInit(addr_of_arg);
fdecl->addDecl(temp);
}

return ast.CreateDeclRef(temp);
} else {
return ref;
}
}
// Operand is a result of an expression
if (auto inst = llvm::dyn_cast<llvm::Instruction>(val)) {
Expand Down Expand Up @@ -398,8 +421,40 @@ void IRToASTVisitor::VisitArgument(llvm::Argument &arg) {
// Get parent function declaration
auto func{arg.getParent()};
auto fdecl{clang::cast<clang::FunctionDecl>(GetOrCreateDecl(func))};
auto argtype{arg.getType()};
if (arg.hasByValAttr()) {
auto byval{arg.getAttribute(llvm::Attribute::ByVal)};
argtype = byval.getValueAsType();
}
// Create a declaration
parm = ast.CreateParamDecl(fdecl, GetQualType(arg.getType()), name);
parm = ast.CreateParamDecl(fdecl, GetQualType(argtype), name);
}

// This function fixes function types for those functions that have arguments
// that are passed by value using the `byval` attribute.
// They need special treatment because those arguments, instead of actually
// being passed by value, are instead passed "by reference" from a bitcode point
// of view, with the caveat that the actual semantics are more like "create a
// copy of the reference before calling, and pass a pointer to that copy
// instead" (this is done implicitly).
// Thus, we need to convert a function type like
// i32 @do_foo(%struct.foo* byval(%struct.foo) align 4 %f)
// into
// i32 @do_foo(%struct.foo %f)
static llvm::FunctionType *GetFixedFunctionType(llvm::Function &func) {
std::vector<llvm::Type *> new_arg_types{};

for (auto &arg : func.args()) {
if (arg.hasByValAttr()) {
auto ptrtype{llvm::cast<llvm::PointerType>(arg.getType())};
new_arg_types.push_back(ptrtype->getElementType());
} else {
new_arg_types.push_back(arg.getType());
}
}

return llvm::FunctionType::get(func.getReturnType(), new_arg_types,
func.isVarArg());
}

void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) {
Expand All @@ -418,7 +473,7 @@ void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) {

DLOG(INFO) << "Creating FunctionDecl for " << name;
auto tudecl{ast_ctx.getTranslationUnitDecl()};
auto type{GetQualType(func.getFunctionType())};
auto type{GetQualType(GetFixedFunctionType(func))};
decl = ast.CreateFunctionDecl(tudecl, type, name);

tudecl->addDecl(decl);
Expand Down Expand Up @@ -470,8 +525,13 @@ void IRToASTVisitor::visitCallInst(llvm::CallInst &inst) {
}

std::vector<clang::Expr *> args;
for (auto &arg : inst.arg_operands()) {
args.push_back(GetOperandExpr(arg));
for (auto i{0U}; i < inst.getNumArgOperands(); ++i) {
auto &arg{inst.getArgOperandUse(i)};
auto opnd{GetOperandExpr(arg)};
if (inst.getParamAttr(i, llvm::Attribute::ByVal).isValid()) {
opnd = ast.CreateDeref(opnd);
}
args.push_back(opnd);
}

clang::Expr *callexpr{nullptr};
Expand Down
79 changes: 79 additions & 0 deletions scripts/decompile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env python3

import unittest
import subprocess
import argparse
import tempfile
import os
import sys


class RunError(Exception):
def __init__(self, msg):
self.msg = msg

def __str__(self):
return str(self.msg)


def run_cmd(cmd, timeout):
try:
p = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=timeout,
universal_newlines=True,
)
except FileNotFoundError as e:
raise RunError('Error: No such file or directory: "' + e.filename + '"')
except PermissionError as e:
raise RunError('Error: File "' + e.filename + '" is not an executable.')

return p


def decompile(self, rellic, input, output, timeout):
cmd = [rellic]
cmd.extend(
["--lower_switch", "--remove_phi_nodes", "--input", input, "--output", output]
)
p = run_cmd(cmd, timeout)

self.assertEqual(p.returncode, 0, "rellic-decomp failure: %s" % p.stderr)
self.assertEqual(
len(p.stderr), 0, "errors or warnings during decompilation: %s" % p.stderr
)

return p

class TestDecompile(unittest.TestCase):
pass


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("rellic", help="path to rellic-decomp")
parser.add_argument("tests", help="path to test directory")
parser.add_argument("-t", "--timeout", help="set timeout in seconds", type=int)

args = parser.parse_args()

def test_generator(path):
def test(self):
with tempfile.TemporaryDirectory() as tempdir:
rt_c = os.path.join(tempdir, "rt.c")
decompile(self, args.rellic, path, rt_c, args.timeout)

return test

for item in os.scandir(args.tests):
if item.is_file():
name, ext = os.path.splitext(item.name)
# Allow for READMEs and data/headers
if ext in [".bc ", ".ll"]:
test_name = f"test_{name}"
test = test_generator(item.path)
setattr(TestDecompile, test_name, test)

unittest.main(argv=[sys.argv[0]])
17 changes: 17 additions & 0 deletions tests/tools/decomp/byval_struct.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
extern int printf(const char* f, ...);
extern int atoi(const char* s);

struct foo {
long long x, y, z, w;
};

long long get_3x(struct foo f) {
f.x = f.x * 3;
return f.x;
}

int main() {
struct foo f = {atoi("1"), atoi("2"), atoi("3"), atoi("4")};
long long x = get_3x(f);
printf("%lld %lld\n", f.x, x);
}
61 changes: 61 additions & 0 deletions tests/tools/decomp/byval_tail_gep.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
; ModuleID = 'byval_tail_gep.cpp'
source_filename = "byval_tail_gep.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.big_derived = type { %struct.big_base }
%struct.big_base = type { [32 x i32] }

; Function Attrs: uwtable mustprogress
define dso_local double @_Z3foo11big_derived(%struct.big_derived* nocapture readonly byval(%struct.big_derived) align 8 %x) local_unnamed_addr #0 !dbg !7 {
entry:
call void @llvm.dbg.declare(metadata %struct.big_derived* %x, metadata !23, metadata !DIExpression()), !dbg !24
%tmpcast = getelementptr inbounds %struct.big_derived, %struct.big_derived* %x, i64 0, i32 0, !dbg !25
%call = tail call double @_Z3bar8big_base(%struct.big_base* nonnull byval(%struct.big_base) align 8 %tmpcast), !dbg !26
ret double %call, !dbg !27
}

; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

declare !dbg !28 dso_local double @_Z3bar8big_base(%struct.big_base* byval(%struct.big_base) align 8) local_unnamed_addr #2

attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 12.0.1 (https://github.com/microsoft/vcpkg.git 2a31089e777fc187f1cc05338250b8e1810cfb52)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "byval_tail_gep.cpp", directory: "/")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 12.0.1 (https://github.com/microsoft/vcpkg.git 2a31089e777fc187f1cc05338250b8e1810cfb52)"}
!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foo11big_derived", scope: !8, file: !8, line: 8, type: !9, scopeLine: 8, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !22)
!8 = !DIFile(filename: "byval_tail_gep.cpp", directory: "/")
!9 = !DISubroutineType(types: !10)
!10 = !{!11, !12}
!11 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "big_derived", file: !8, line: 4, size: 1024, flags: DIFlagTypePassByValue, elements: !13, identifier: "_ZTS11big_derived")
!13 = !{!14}
!14 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !12, baseType: !15, extraData: i32 0)
!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "big_base", file: !8, line: 1, size: 1024, flags: DIFlagTypePassByValue, elements: !16, identifier: "_ZTS8big_base")
!16 = !{!17}
!17 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !15, file: !8, line: 2, baseType: !18, size: 1024)
!18 = !DICompositeType(tag: DW_TAG_array_type, baseType: !19, size: 1024, elements: !20)
!19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!20 = !{!21}
!21 = !DISubrange(count: 32)
!22 = !{!23}
!23 = !DILocalVariable(name: "x", arg: 1, scope: !7, file: !8, line: 8, type: !12)
!24 = !DILocation(line: 8, column: 31, scope: !7)
!25 = !DILocation(line: 9, column: 14, scope: !7)
!26 = !DILocation(line: 9, column: 10, scope: !7)
!27 = !DILocation(line: 9, column: 3, scope: !7)
!28 = !DISubprogram(name: "bar", linkageName: "_Z3bar8big_base", scope: !8, file: !8, line: 7, type: !29, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
!29 = !DISubroutineType(types: !30)
!30 = !{!11, !15}
53 changes: 53 additions & 0 deletions tests/tools/decomp/byval_tail_nogep.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
; ModuleID = 'byval_tail_nogep.cpp'
source_filename = "byval_tail_nogep.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.big_base = type { [32 x i32] }

; Function Attrs: uwtable mustprogress
define dso_local double @_Z3foo8big_base(%struct.big_base* nocapture readonly byval(%struct.big_base) align 8 %x) local_unnamed_addr #0 !dbg !7 {
entry:
call void @llvm.dbg.declare(metadata %struct.big_base* %x, metadata !20, metadata !DIExpression()), !dbg !21
%call = tail call double @_Z3bar8big_base(%struct.big_base* nonnull byval(%struct.big_base) align 8 %x), !dbg !22
ret double %call, !dbg !23
}

; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

declare !dbg !24 dso_local double @_Z3bar8big_base(%struct.big_base* byval(%struct.big_base) align 8) local_unnamed_addr #2

attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 12.0.1 (https://github.com/microsoft/vcpkg.git 2a31089e777fc187f1cc05338250b8e1810cfb52)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "byval_tail_nogep.cpp", directory: "/")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 12.0.1 (https://github.com/microsoft/vcpkg.git 2a31089e777fc187f1cc05338250b8e1810cfb52)"}
!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foo8big_base", scope: !8, file: !8, line: 5, type: !9, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !19)
!8 = !DIFile(filename: "byval_tail_nogep.cpp", directory: "/")
!9 = !DISubroutineType(types: !10)
!10 = !{!11, !12}
!11 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "big_base", file: !8, line: 1, size: 1024, flags: DIFlagTypePassByValue, elements: !13, identifier: "_ZTS8big_base")
!13 = !{!14}
!14 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !12, file: !8, line: 2, baseType: !15, size: 1024)
!15 = !DICompositeType(tag: DW_TAG_array_type, baseType: !16, size: 1024, elements: !17)
!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!17 = !{!18}
!18 = !DISubrange(count: 32)
!19 = !{!20}
!20 = !DILocalVariable(name: "x", arg: 1, scope: !7, file: !8, line: 5, type: !12)
!21 = !DILocation(line: 5, column: 28, scope: !7)
!22 = !DILocation(line: 6, column: 10, scope: !7)
!23 = !DILocation(line: 6, column: 3, scope: !7)
!24 = !DISubprogram(name: "bar", linkageName: "_Z3bar8big_base", scope: !8, file: !8, line: 4, type: !9, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)