Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/NVIDIA/cuda-quantum into st…
Browse files Browse the repository at this point in the history
…ate-synthesis-remote-sim-new
  • Loading branch information
annagrin committed Aug 15, 2024
2 parents 7021438 + a817c65 commit 60c1389
Show file tree
Hide file tree
Showing 13 changed files with 635 additions and 451 deletions.
5 changes: 3 additions & 2 deletions include/cudaq/Optimizer/Builder/Runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ static constexpr unsigned cudaqGenPrefixLength = sizeof(cudaqGenPrefixName) - 1;
/// compile time (see `cudaqGenPrefixName`) or it can be rewritten to call back
/// to the runtime library (and be handled at runtime).
static constexpr const char launchKernelFuncName[] = "altLaunchKernel";
static constexpr const char launchKernelVersion2FuncName[] =
"altLaunchKernelUsingLocalJIT";
static constexpr const char launchKernelStreamlinedFuncName[] =
"streamlinedLaunchKernel";
static constexpr const char launchKernelHybridFuncName[] = "hybridLaunchKernel";

} // namespace cudaq::runtime
25 changes: 23 additions & 2 deletions include/cudaq/Optimizer/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,27 @@ def GenerateKernelExecution : Pass<"kernel-execution", "mlir::ModuleOp"> {
use of library side argument conversion and the argument synthesis pass.
More generally, this option can be used when JIT compiling kernels on the
client/host/local processor.

There are multiple code generation kinds that are supported for flexibility
and streamlining the kernel launch process. These tend to be related to the
target and runtime environment the compiler is being run in and can involve
some technical issues that require deeper understanding of the entire
process. In general, it is not recommended for user's to change this value.

```
codegen kind description

0 Hybrid. A combination of 1 and 2 that allowed early and
streamlined JIT compilation but also supports return values
and dynamic parameters.
1 Client-server interchange format. Supports kernels that
return results and dynamic parameters.
2 Streamlined for JIT. The kernel will be converted to a
nullary function with no results. Return values from the
kernel are ignored, if present. All parameter values are to
be inlined by the JIT compiler, so this codegen kind does not
support any dynamic parameters.
```
}];

let dependentDialects = ["cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect"];
Expand All @@ -311,8 +332,8 @@ def GenerateKernelExecution : Pass<"kernel-execution", "mlir::ModuleOp"> {
/*default=*/"\"-\"", "Name of output file.">,
Option<"startingArgIdx", "starting-arg-idx", "std::size_t", /*default=*/"0",
"The starting argument index for the argsCreator.">,
Option<"altLaunchVersion", "alt-launch", "std::size_t", /*default=*/"1",
"Specify the version of altLaunchKernel to be used.">
Option<"codegenKind", "codegen", "std::size_t", /*default=*/"1",
"Set the kind of code to generate for the launches.">
];
}

Expand Down
22 changes: 15 additions & 7 deletions lib/Optimizer/Builder/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,25 +293,33 @@ static constexpr IntrinsicCode intrinsicTable[] = {
return %3 : !cc.struct<{!cc.ptr<i8>, i64}>
})#"},

{cudaq::runtime::launchKernelFuncName, // altLaunchKernel
// altLaunchKernel(kernelName, thunk, commBuffer, buffSize, resultOffset)
{cudaq::runtime::launchKernelFuncName,
{},
R"#(
func.func private @altLaunchKernel(!cc.ptr<i8>, !cc.ptr<i8>, !cc.ptr<i8>, i64, i64) -> ())#"},

{cudaq::runtime::
launchKernelVersion2FuncName, // altLaunchKernelUsingLocalJIT
{"free", {}, "func.func private @free(!cc.ptr<i8>) -> ()"},

// hybridLaunchKernel(kernelName, thunk, commBuffer, buffSize,
// resultOffset, vectorArgPtrs)
{cudaq::runtime::launchKernelHybridFuncName,
{},
R"#(
func.func private @altLaunchKernelUsingLocalJIT(!cc.ptr<i8>, !cc.ptr<i8>, !cc.ptr<i8>) -> ())#"},

{"free", {}, "func.func private @free(!cc.ptr<i8>) -> ()"},
func.func private @hybridLaunchKernel(!cc.ptr<i8>, !cc.ptr<i8>, !cc.ptr<i8>, i64, i64, !cc.ptr<i8>) -> ())#"},

{cudaq::llvmMemCopyIntrinsic, // llvm.memcpy.p0i8.p0i8.i64
{},
R"#(
func.func private @llvm.memcpy.p0i8.p0i8.i64(!cc.ptr<i8>, !cc.ptr<i8>, i64, i1) -> ())#"},

{"malloc", {}, "func.func private @malloc(i64) -> !cc.ptr<i8>"}};
{"malloc", {}, "func.func private @malloc(i64) -> !cc.ptr<i8>"},

// streamlinedLaunchKernel(kernelName, vectorArgPtrs)
{cudaq::runtime::launchKernelStreamlinedFuncName,
{},
R"#(
func.func private @streamlinedLaunchKernel(!cc.ptr<i8>, !cc.ptr<i8>) -> ())#"}};

static constexpr std::size_t intrinsicTableSize =
sizeof(intrinsicTable) / sizeof(IntrinsicCode);
Expand Down
34 changes: 31 additions & 3 deletions lib/Optimizer/CodeGen/CCToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,33 @@ class SizeOfOpPattern : public ConvertOpToLLVMPattern<cudaq::cc::SizeOfOp> {
}
};

class OffsetOfOpPattern : public ConvertOpToLLVMPattern<cudaq::cc::OffsetOfOp> {
public:
using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;

// Use the GEP approach for now. LLVM is planning to remove support for this
// at some point. See: https://github.com/llvm/llvm-project/issues/71507
LogicalResult
matchAndRewrite(cudaq::cc::OffsetOfOp offsetOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto inputTy = offsetOp.getInputType();
SmallVector<cudaq::cc::ComputePtrArg> args;
for (std::int32_t i : offsetOp.getConstantIndices())
args.push_back(i);
auto resultTy = offsetOp.getType();
auto loc = offsetOp.getLoc();
// TODO: replace this with some target-specific memory layout computation
// when we upgrade to a newer MLIR.
auto zero = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
auto ptrTy = cudaq::cc::PointerType::get(inputTy);
auto nul = rewriter.create<cudaq::cc::CastOp>(loc, ptrTy, zero);
Value nextPtr =
rewriter.create<cudaq::cc::ComputePtrOp>(loc, ptrTy, nul, args);
rewriter.replaceOpWithNewOp<cudaq::cc::CastOp>(offsetOp, resultTy, nextPtr);
return success();
}
};

class StdvecDataOpPattern
: public ConvertOpToLLVMPattern<cudaq::cc::StdvecDataOp> {
public:
Expand Down Expand Up @@ -647,7 +674,8 @@ void cudaq::opt::populateCCToLLVMPatterns(LLVMTypeConverter &typeConverter,
ComputePtrOpPattern, CreateStringLiteralOpPattern,
ExtractValueOpPattern, FuncToPtrOpPattern, GlobalOpPattern,
InsertValueOpPattern, InstantiateCallableOpPattern,
LoadOpPattern, PoisonOpPattern, SizeOfOpPattern,
StdvecDataOpPattern, StdvecInitOpPattern, StdvecSizeOpPattern,
StoreOpPattern, UndefOpPattern>(typeConverter);
LoadOpPattern, OffsetOfOpPattern, PoisonOpPattern,
SizeOfOpPattern, StdvecDataOpPattern, StdvecInitOpPattern,
StdvecSizeOpPattern, StoreOpPattern, UndefOpPattern>(
typeConverter);
}
Loading

0 comments on commit 60c1389

Please sign in to comment.