Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
sacpis authored Aug 11, 2024
2 parents 52e8ed9 + 7924b17 commit 084aaff
Show file tree
Hide file tree
Showing 13 changed files with 382 additions and 59 deletions.
2 changes: 2 additions & 0 deletions include/cudaq/Optimizer/Builder/Runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,7 @@ static constexpr unsigned cudaqGenPrefixLength = sizeof(cudaqGenPrefixName) - 1;
/// compile time (see `cudaqGenPrefixName`) or it can be rewritten to call back
/// to the runtime library (and be handled at runtime).
static constexpr const char launchKernelFuncName[] = "altLaunchKernel";
static constexpr const char launchKernelVersion2FuncName[] =
"altLaunchKernelUsingLocalJIT";

} // namespace cudaq::runtime
3 changes: 3 additions & 0 deletions include/cudaq/Optimizer/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,7 @@ inline std::unique_ptr<mlir::Pass> createQuantumMemToReg() {
return createMemToReg(m2rOpt);
}

/// Name of `quake.wire_set` generated prior to mapping
static constexpr const char topologyAgnosticWiresetName[] = "wires";

} // namespace cudaq::opt
27 changes: 26 additions & 1 deletion include/cudaq/Optimizer/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,23 @@

include "mlir/Pass/PassBase.td"

def AddWireset : Pass<"add-wireset", "mlir::ModuleOp"> {
let summary = "Adds a topology-less `quake.wireset` to the module";
let description = [{
Adds a `quake.wireset` operation without tological info to the module.
}];
}

def AssignWireIndices : Pass<"assign-wire-indices", "mlir::func::FuncOp"> {
let summary = "Replaces wires with wires from a `quake.wireset`";
let description = [{
Replaces all instances of `quake.null_wire_op` with `quake.borrow_wire_op`s
from a common `quake.wireset` without any topological information.
Each wire is assigned a unique identifier (the index into the
`quake.wireset`) through this process.
}];
}

def ApplyControlNegations :
Pass<"apply-control-negations", "mlir::func::FuncOp"> {
let summary =
Expand Down Expand Up @@ -279,6 +296,11 @@ def GenerateKernelExecution : Pass<"kernel-execution", "mlir::ModuleOp"> {
Generate the kernel execution thunks. The kernel execution thunks allow
the control side (C++ code) to launch quantum kernels. This pass
generates the required glue code.

Specifying the alt-launch=2 option will generate different code that makes
use of library side argument conversion and the argument synthesis pass.
More generally, this option can be used when JIT compiling kernels on the
client/host/local processor.
}];

let dependentDialects = ["cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect"];
Expand All @@ -288,11 +310,14 @@ def GenerateKernelExecution : Pass<"kernel-execution", "mlir::ModuleOp"> {
/*default=*/"\"-\"", "Name of output file.">,
Option<"startingArgIdx", "starting-arg-idx", "std::size_t", /*default=*/"0",
"The starting argument index for the argsCreator.">,
Option<"altLaunchVersion", "alt-launch", "std::size_t", /*default=*/"1",
"Specify the version of altLaunchKernel to be used.">
];
}

def GetConcreteMatrix : Pass<"get-concrete-matrix", "mlir::func::FuncOp"> {
let summary = "Replace the unitary matrix generator function with concrete matrix.";
let summary =
"Replace the unitary matrix generator function with concrete matrix.";
let description = [{
Given a custom operation whose generator attribute is another function
within the module, such that if `LiftArrayAlloc` pass has run, there will
Expand Down
6 changes: 6 additions & 0 deletions lib/Optimizer/Builder/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,12 @@ static constexpr IntrinsicCode intrinsicTable[] = {
R"#(
func.func private @altLaunchKernel(!cc.ptr<i8>, !cc.ptr<i8>, !cc.ptr<i8>, i64, i64) -> ())#"},

{cudaq::runtime::
launchKernelVersion2FuncName, // altLaunchKernelUsingLocalJIT
{},
R"#(
func.func private @altLaunchKernelUsingLocalJIT(!cc.ptr<i8>, !cc.ptr<i8>, !cc.ptr<i8>) -> ())#"},

{"free", {}, "func.func private @free(!cc.ptr<i8>) -> ()"},

{cudaq::llvmMemCopyIntrinsic, // llvm.memcpy.p0i8.p0i8.i64
Expand Down
1 change: 1 addition & 0 deletions lib/Optimizer/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ add_cudaq_library(OptTransforms
RefToVeqAlloc.cpp
RegToMem.cpp
StatePreparation.cpp
WiresToWiresets.cpp

DEPENDS
OptTransformsPassIncGen
Expand Down
195 changes: 150 additions & 45 deletions lib/Optimizer/Transforms/GenKernelExecution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1116,10 +1116,11 @@ class GenerateKernelExecution
/// library. Pass along the thunk, so the runtime can call the quantum
/// circuit. These entry points are `operator()` member functions in a class,
/// so account for the `this` argument here.
void genNewHostEntryPoint(Location loc, OpBuilder &builder,
FunctionType funcTy, cudaq::cc::StructType structTy,
LLVM::GlobalOp kernelNameObj, func::FuncOp thunk,
func::FuncOp rewriteEntry, bool addThisPtr) {
void genNewHostEntryPoint1(Location loc, OpBuilder &builder,
FunctionType funcTy,
cudaq::cc::StructType structTy,
LLVM::GlobalOp kernelNameObj, func::FuncOp thunk,
func::FuncOp rewriteEntry, bool addThisPtr) {
auto *ctx = builder.getContext();
auto i64Ty = builder.getI64Type();
auto offset = funcTy.getNumInputs();
Expand Down Expand Up @@ -1392,6 +1393,91 @@ class GenerateKernelExecution
builder.create<func::ReturnOp>(loc, results);
}

void genNewHostEntryPoint2(Location loc, OpBuilder &builder,
FunctionType devFuncTy,
LLVM::GlobalOp kernelNameObj,
func::FuncOp hostFunc, bool addThisPtr) {
const bool hiddenSRet = cudaq::opt::factory::hasHiddenSRet(devFuncTy);
const unsigned count =
cudaq::cc::numberOfHiddenArgs(addThisPtr, hiddenSRet);
auto *ctx = builder.getContext();
auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type());

// 0) Pointer our builder into the entry block of the function.
Block *hostFuncEntryBlock = hostFunc.addEntryBlock();

OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointToStart(hostFuncEntryBlock);

// 1) Allocate and initialize a std::vector<void*> object.
auto stdVec = builder.create<cudaq::cc::AllocaOp>(
loc, cudaq::opt::factory::stlVectorType(i8PtrTy));
auto arrPtrTy = cudaq::cc::ArrayType::get(ctx, i8PtrTy, count);
Value buffer = builder.create<cudaq::cc::AllocaOp>(loc, arrPtrTy);
auto i64Ty = builder.getI64Type();
auto buffSize = builder.create<cudaq::cc::SizeOfOp>(loc, i64Ty, arrPtrTy);
auto ptrPtrTy = cudaq::cc::PointerType::get(i8PtrTy);
auto cast1 = builder.create<cudaq::cc::CastOp>(loc, ptrPtrTy, buffer);
auto ptr3Ty = cudaq::cc::PointerType::get(ptrPtrTy);
auto stdVec0 = builder.create<cudaq::cc::CastOp>(loc, ptr3Ty, stdVec);
builder.create<cudaq::cc::StoreOp>(loc, cast1, stdVec0);
auto cast2 = builder.create<cudaq::cc::CastOp>(loc, i64Ty, buffer);
auto endBuff = builder.create<arith::AddIOp>(loc, cast2, buffSize);
auto cast3 = builder.create<cudaq::cc::CastOp>(loc, ptrPtrTy, endBuff);
auto stdVec1 = builder.create<cudaq::cc::ComputePtrOp>(
loc, ptr3Ty, stdVec, ArrayRef<cudaq::cc::ComputePtrArg>{1});
builder.create<cudaq::cc::StoreOp>(loc, cast3, stdVec1);
auto stdVec2 = builder.create<cudaq::cc::ComputePtrOp>(
loc, ptr3Ty, stdVec, ArrayRef<cudaq::cc::ComputePtrArg>{2});
builder.create<cudaq::cc::StoreOp>(loc, cast3, stdVec2);
auto zero = builder.create<arith::ConstantIntOp>(loc, 0, 64);
auto nullPtr = builder.create<cudaq::cc::CastOp>(loc, i8PtrTy, zero);

// 2) Iterate over the arguments passed in and populate the vector.
SmallVector<BlockArgument> blockArgs{dropAnyHiddenArguments(
hostFuncEntryBlock->getArguments(), devFuncTy, addThisPtr)};
for (auto iter : llvm::enumerate(blockArgs)) {
std::int32_t i = iter.index();
auto pos = builder.create<cudaq::cc::ComputePtrOp>(
loc, ptrPtrTy, buffer, ArrayRef<cudaq::cc::ComputePtrArg>{i});
auto blkArg = iter.value();
if (isa<cudaq::cc::PointerType>(blkArg.getType())) {
auto castArg = builder.create<cudaq::cc::CastOp>(loc, i8PtrTy, blkArg);
builder.create<cudaq::cc::StoreOp>(loc, castArg, pos);
continue;
}
auto temp = builder.create<cudaq::cc::AllocaOp>(loc, blkArg.getType());
builder.create<cudaq::cc::StoreOp>(loc, blkArg, temp);
auto castTemp = builder.create<cudaq::cc::CastOp>(loc, i8PtrTy, temp);
builder.create<cudaq::cc::StoreOp>(loc, castTemp, pos);
}

auto resultBuffer = builder.create<cudaq::cc::AllocaOp>(loc, i8PtrTy);
builder.create<cudaq::cc::StoreOp>(loc, nullPtr, resultBuffer);
auto castResultBuffer =
builder.create<cudaq::cc::CastOp>(loc, i8PtrTy, resultBuffer);
auto castStdvec = builder.create<cudaq::cc::CastOp>(loc, i8PtrTy, stdVec);
Value loadKernName = builder.create<LLVM::AddressOfOp>(
loc, cudaq::opt::factory::getPointerType(kernelNameObj.getType()),
kernelNameObj.getSymName());
auto castKernelNameObj =
builder.create<cudaq::cc::CastOp>(loc, i8PtrTy, loadKernName);
builder.create<func::CallOp>(
loc, std::nullopt, cudaq::runtime::launchKernelVersion2FuncName,
ArrayRef<Value>{castKernelNameObj, castStdvec, castResultBuffer});

// FIXME: Drop any results on the floor for now and return random data left
// on the stack. (Maintains parity with existing kernel launch.)
if (hostFunc.getFunctionType().getResults().empty()) {
builder.create<func::ReturnOp>(loc);
return;
}
// There can only be 1 return type in C++, so this is safe.
Value garbage = builder.create<cudaq::cc::UndefOp>(
loc, hostFunc.getFunctionType().getResult(0));
builder.create<func::ReturnOp>(loc, garbage);
}

/// A kernel function that takes a quantum type argument (also known as a pure
/// device kernel) cannot be called directly from C++ (classical) code. It
/// must be called via other quantum code.
Expand Down Expand Up @@ -1422,11 +1508,18 @@ class GenerateKernelExecution
if (!mangledNameMap || mangledNameMap.empty())
return;
auto irBuilder = cudaq::IRBuilder::atBlockEnd(module.getBody());
if (failed(irBuilder.loadIntrinsic(module,
cudaq::runtime::launchKernelFuncName))) {
module.emitError("could not load altLaunchKernel intrinsic.");
return;
}
if (altLaunchVersion == 1)
if (failed(irBuilder.loadIntrinsic(
module, cudaq::runtime::launchKernelFuncName))) {
module.emitError("could not load altLaunchKernel intrinsic.");
return;
}
if (altLaunchVersion == 2)
if (failed(irBuilder.loadIntrinsic(
module, cudaq::runtime::launchKernelVersion2FuncName))) {
module.emitError("could not load altLaunchKernel intrinsic.");
return;
}

auto loc = module.getLoc();
auto ptrType = cudaq::cc::PointerType::get(builder.getI8Type());
Expand Down Expand Up @@ -1526,37 +1619,47 @@ class GenerateKernelExecution
cudaq::opt::factory::toHostSideFuncType(funcTy, hasThisPtr, module);
}

// Generate the function that computes the return offset.
genReturnOffsetFunction(loc, builder, funcTy, structTy, classNameStr);
func::FuncOp thunk;
func::FuncOp argsCreatorFunc;

// Generate thunk, `<kernel>.thunk`, to call back to the MLIR code.
auto thunk = genThunkFunction(loc, builder, classNameStr, structTy,
funcTy, funcOp);
if (altLaunchVersion == 1) {
// Generate the function that computes the return offset.
genReturnOffsetFunction(loc, builder, funcTy, structTy, classNameStr);

// Generate the argsCreator function used by synthesis.
mlir::func::FuncOp argsCreatorFunc;
if (startingArgIdx == 0) {
argsCreatorFunc =
genKernelArgsCreatorFunction(loc, builder, funcTy, structTy,
classNameStr, hostFuncTy, hasThisPtr);
} else {
// We are operating in a very special case where we want the argsCreator
// function to ignore the first `startingArgIdx` arguments. In this
// situation, the argsCreator function will not be compatible with the
// other helper functions created in this pass, so it is assumed that
// the caller is OK with that.
auto structTy_argsCreator =
cudaq::opt::factory::buildInvokeStructType(funcTy, startingArgIdx);
argsCreatorFunc = genKernelArgsCreatorFunction(
loc, builder, funcTy, structTy_argsCreator, classNameStr,
hostFuncTy, hasThisPtr);
// Generate thunk, `<kernel>.thunk`, to call back to the MLIR code.
thunk = genThunkFunction(loc, builder, classNameStr, structTy, funcTy,
funcOp);

// Generate the argsCreator function used by synthesis.
if (startingArgIdx == 0) {
argsCreatorFunc = genKernelArgsCreatorFunction(
loc, builder, funcTy, structTy, classNameStr, hostFuncTy,
hasThisPtr);
} else {
// We are operating in a very special case where we want the
// argsCreator function to ignore the first `startingArgIdx`
// arguments. In this situation, the argsCreator function will not be
// compatible with the other helper functions created in this pass, so
// it is assumed that the caller is OK with that.
auto structTy_argsCreator =
cudaq::opt::factory::buildInvokeStructType(funcTy,
startingArgIdx);
argsCreatorFunc = genKernelArgsCreatorFunction(
loc, builder, funcTy, structTy_argsCreator, classNameStr,
hostFuncTy, hasThisPtr);
}
}

// Generate a new mangled function on the host side to call the
// callback function.
if (hostEntryNeeded)
genNewHostEntryPoint(loc, builder, funcTy, structTy, kernelNameObj,
thunk, hostFunc, hasThisPtr);
if (hostEntryNeeded) {
if (altLaunchVersion == 1)
genNewHostEntryPoint1(loc, builder, funcTy, structTy, kernelNameObj,
thunk, hostFunc, hasThisPtr);
else
genNewHostEntryPoint2(loc, builder, funcTy, kernelNameObj, hostFunc,
hasThisPtr);
}

// Generate a function at startup to register this kernel as having
// been processed for kernel execution.
Expand All @@ -1576,17 +1679,19 @@ class GenerateKernelExecution
builder.create<func::CallOp>(loc, std::nullopt, cudaqRegisterKernelName,
ValueRange{castKernRef});

// Register the argsCreator too
auto ptrPtrType = cudaq::cc::PointerType::get(ptrType);
auto argsCreatorFuncType = FunctionType::get(
ctx, {ptrPtrType, ptrPtrType}, {builder.getI64Type()});
Value loadArgsCreator = builder.create<func::ConstantOp>(
loc, argsCreatorFuncType, argsCreatorFunc.getName());
auto castLoadArgsCreator = builder.create<cudaq::cc::FuncToPtrOp>(
loc, ptrType, loadArgsCreator);
builder.create<func::CallOp>(
loc, std::nullopt, cudaqRegisterArgsCreator,
ValueRange{castKernRef, castLoadArgsCreator});
if (altLaunchVersion == 1) {
// Register the argsCreator too
auto ptrPtrType = cudaq::cc::PointerType::get(ptrType);
auto argsCreatorFuncType = FunctionType::get(
ctx, {ptrPtrType, ptrPtrType}, {builder.getI64Type()});
Value loadArgsCreator = builder.create<func::ConstantOp>(
loc, argsCreatorFuncType, argsCreatorFunc.getName());
auto castLoadArgsCreator = builder.create<cudaq::cc::FuncToPtrOp>(
loc, ptrType, loadArgsCreator);
builder.create<func::CallOp>(
loc, std::nullopt, cudaqRegisterArgsCreator,
ValueRange{castKernRef, castLoadArgsCreator});
}

// Check if this is a lambda mangled name
auto demangledPtr = abi::__cxa_demangle(mangledName.str().c_str(),
Expand Down
Loading

0 comments on commit 084aaff

Please sign in to comment.