Skip to content

Commit

Permalink
Added canonicalizer for MIR
Browse files Browse the repository at this point in the history
  • Loading branch information
SchwarzXia committed Nov 17, 2023
1 parent 87afff7 commit 47d8723
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 42 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@
/requirements.txt

/compile_commands.json

.vscode
5 changes: 2 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ FROM ubuntu:22.04
RUN apt-get update && apt-get install -y clang python3 python3-pip git curl
ARG bazelisk_version=1.17.0
RUN curl -L https://github.com/bazelbuild/bazelisk/releases/download/v${bazelisk_version}/bazelisk-linux-amd64 > /usr/bin/bazelisk && chmod +x /usr/bin/bazelisk && ln -s /usr/bin/bazelisk /usr/bin/bazel
WORKDIR /gematria
WORKDIR /granlte
COPY . .
RUN pip3 install -r requirements.in

RUN pip3 install -r requirements.in
2 changes: 1 addition & 1 deletion gematria/datasets/bhive_importer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ TEST_F(BHiveImporterTest, NonStandardColumns) {
}

TEST_F(BHiveImporterTest, MIRDatasetBasicTest) {
EXPECT_THAT(x86_bhive_importer_->LoadMIRModule("/u9/z277zhu/research/gematria/sample_dataset/data.mir"),
EXPECT_THAT(x86_bhive_importer_->LoadMIRModule("/granlte/sample_dataset/data.mir"),
IsOk());
EXPECT_THAT(x86_bhive_importer_->ParseMIRCsvLine(kSourceName, "a,b,BB_13,2.37", 2,
3, kScaling),
Expand Down
136 changes: 98 additions & 38 deletions gematria/llvm/canonicalizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "lib/Target/X86/MCTargetDesc/X86BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
Expand Down Expand Up @@ -67,6 +68,7 @@ void ReplaceExprOperands(llvm::MCInst& instruction) {
}

// TODO: Write ReplaceExprOperands for MI (replace unsupported operand)
// Maybe not necessary

} // namespace

Expand Down Expand Up @@ -101,6 +103,12 @@ std::string Canonicalizer::GetRegisterNameOrEmpty(
return target_machine_.getMCRegisterInfo()->getName(operand.getReg());
}

std::string Canonicalizer::GetRegisterNameOrEmpty(
const llvm::MachineOperand& operand) const {
assert(operand.isReg());
return "register"; // TODO: should we call all virtual registers just register?
}

namespace {

llvm::SmallVector<std::string_view, 2> SplitByAny(std::string_view str,
Expand Down Expand Up @@ -253,7 +261,6 @@ X86Canonicalizer::X86Canonicalizer(const llvm::TargetMachine* target_machine)
X86Canonicalizer::~X86Canonicalizer() = default;


// TODO:PlatformSpecificInstructionFromMI(const llvm::MachineInstrunction)
Instruction X86Canonicalizer::PlatformSpecificInstructionFromMachineInstr(const llvm::MachineInstr & MI) const {
// NOTE (lukezhuz): For now, we assume that all memory references are aliased.
// This is an overly conservative but safe choice. Note that Ithemal chose the
Expand All @@ -268,47 +275,45 @@ Instruction X86Canonicalizer::PlatformSpecificInstructionFromMachineInstr(const
Instruction instruction;
instruction.llvm_mnemonic =
target_machine_.getMCInstrInfo()->getName(MI.getOpcode());
// TODO: Write AddX86VendorMnemonicAndPrefixes method for MI
AddMIRVendorMnemonicAndPrefixes(*target_machine_.getMCSubtargetInfo(), MI,
instruction);

// const llvm::MCInstrDesc& descriptor = instr_info.get(MI.getOpcode());
// if (descriptor.mayLoad()) {
// instruction.input_operands.push_back(
// InstructionOperand::MemoryLocation(kWholeMemoryAliasGroup));
// }
// if (descriptor.mayStore()) {
// instruction.output_operands.push_back(
// InstructionOperand::MemoryLocation(kWholeMemoryAliasGroup));
// }

// const int memory_operand_index = GetX86MemoryOperandPosition(descriptor);
// for (int operand_index = 0; operand_index < descriptor.getNumOperands();
// ++operand_index) {
// const bool is_output_operand = operand_index < descriptor.getNumDefs();
// const bool is_address_computation_tuple =
// operand_index == memory_operand_index;
// // TODO: Write AddOperand method for MI
// AddOperand(MI, /*operand_index=*/operand_index,
// /*is_output_operand=*/is_output_operand,
// /*is_address_computation_tuple=*/is_address_computation_tuple,
// instruction);
// if (is_address_computation_tuple) {
// // A memory reference is represented as a 5-tuple. The whole 5-tuple is
// // processed in one CanonicalizeOperand() call and we need to skip the
// // remaining 4 elements here.
// operand_index += 4;
// }
// }
const llvm::MCInstrDesc& descriptor = instr_info.get(MI.getOpcode());
if (descriptor.mayLoad()) {
instruction.input_operands.push_back(
InstructionOperand::MemoryLocation(kWholeMemoryAliasGroup));
}
if (descriptor.mayStore()) {
instruction.output_operands.push_back(
InstructionOperand::MemoryLocation(kWholeMemoryAliasGroup));
}

const int memory_operand_index = GetX86MemoryOperandPosition(descriptor);
for (int operand_index = 0; operand_index < descriptor.getNumOperands();
++operand_index) {
const bool is_output_operand = operand_index < descriptor.getNumDefs();
const bool is_address_computation_tuple =
operand_index == memory_operand_index;
AddOperand(MI, /*operand_index=*/operand_index,
/*is_output_operand=*/is_output_operand,
/*is_address_computation_tuple=*/is_address_computation_tuple,
instruction);
if (is_address_computation_tuple) {
// A memory reference is represented as a 5-tuple. The whole 5-tuple is
// processed in one CanonicalizeOperand() call and we need to skip the
// remaining 4 elements here.
operand_index += 4;
}
}

// for (llvm::MCPhysReg implicit_output_register : descriptor.implicit_defs()) {
// instruction.implicit_output_operands.push_back(InstructionOperand::Register(
// register_info.getName(implicit_output_register)));
// }
// for (llvm::MCPhysReg implicit_input_register : descriptor.implicit_uses()) {
// instruction.implicit_input_operands.push_back(InstructionOperand::Register(
// register_info.getName(implicit_input_register)));
// }
for (llvm::MCPhysReg implicit_output_register : descriptor.implicit_defs()) {
instruction.implicit_output_operands.push_back(InstructionOperand::Register(
register_info.getName(implicit_output_register)));
}
for (llvm::MCPhysReg implicit_input_register : descriptor.implicit_uses()) {
instruction.implicit_input_operands.push_back(InstructionOperand::Register(
register_info.getName(implicit_input_register)));
}
return instruction;
}

Expand Down Expand Up @@ -427,4 +432,59 @@ void X86Canonicalizer::AddOperand(const llvm::MCInst& mcinst, int operand_index,
}
}

void X86Canonicalizer::AddOperand(const llvm::MachineInstr& mi, int operand_index,
bool is_output_operand,
bool is_address_computation_tuple,
Instruction& instruction) const {
assert(operand_index < mi.getNumOperands());
assert(!is_address_computation_tuple ||
(operand_index + 5 <= mi.getNumOperands()));

const llvm::MachineOperand& operand = mi.getOperand(operand_index);
// Skip empty register operand, but not if they are part of a memory 5-tuple.
// Empty register in a memory 5-tuple is for when the address computation uses
// only a subset of components.
if (!is_address_computation_tuple && operand.isReg() && operand.getReg() == 0)
return;

std::vector<InstructionOperand>& operand_list =
is_output_operand ? instruction.output_operands
: instruction.input_operands;
if (is_address_computation_tuple) { // TODO: Check if MIR has address computation tuple
std::string base_register = GetRegisterNameOrEmpty(
mi.getOperand(operand_index + llvm::X86::AddrBaseReg));
const int64_t displacement =
mi.getOperand(operand_index + llvm::X86::AddrDisp).getImm();
std::string index_register = GetRegisterNameOrEmpty(
mi.getOperand(operand_index + llvm::X86::AddrIndexReg));
const int64_t scaling =
mi.getOperand(operand_index + llvm::X86::AddrScaleAmt).getImm();
std::string segment_register = GetRegisterNameOrEmpty(
mi.getOperand(operand_index + llvm::X86::AddrSegmentReg));
operand_list.push_back(InstructionOperand::Address(
/* base_register= */ std::move(base_register),
/* displacement= */ displacement,
/* index_register= */ std::move(index_register),
/* scaling= */ static_cast<int>(scaling),
/* segment_register= */ std::move(segment_register)));
} else if (operand.isReg()) {
operand_list.push_back(
InstructionOperand::Register(GetRegisterNameOrEmpty(operand)));
} else if (operand.isImm()) {
operand_list.push_back(
InstructionOperand::ImmediateValue(operand.getImm()));
} else if (operand.isCImm()) {
operand_list.push_back(
InstructionOperand::ImmediateValue(operand.getCImm()->getZExtValue()));
}else if (operand.isFPImm()) {
operand_list.push_back(InstructionOperand::FpImmediateValue(
llvm::bit_cast<double>(operand.getFPImm())));
} else {
llvm::errs() << "Unsupported operand type: ";
operand.print(llvm::errs());
llvm::errs() << "\n";
assert(false);
}
}

} // namespace gematria
4 changes: 4 additions & 0 deletions gematria/llvm/canonicalizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class Canonicalizer {
// the operand is an "undefined" operand.
// This method must not be called when `operand.isReg()` is false.
std::string GetRegisterNameOrEmpty(const llvm::MCOperand& operand) const;
std::string GetRegisterNameOrEmpty(const llvm::MachineOperand& operand) const;

const llvm::TargetMachine& target_machine_;
};
Expand All @@ -87,6 +88,9 @@ class X86Canonicalizer final : public Canonicalizer {
void AddOperand(const llvm::MCInst& mcinst, int operand_index,
bool is_output_operand, bool is_address_computation_tuple,
Instruction& instruction) const;
void AddOperand(const llvm::MachineInstr& mi, int operand_index,
bool is_output_operand, bool is_address_computation_tuple,
Instruction& instruction) const;

std::unique_ptr<llvm::MCInstPrinter> mcinst_printer_;
};
Expand Down

0 comments on commit 47d8723

Please sign in to comment.