From e97f14bb8c366e9289769f1f2a370b3898601c75 Mon Sep 17 00:00:00 2001
From: "Adam T. Geller" <adgeller@nvidia.com>
Date: Thu, 15 Aug 2024 16:41:35 -0700
Subject: [PATCH] Replace base profile DependencyAnalysis with WIP adaptive
 profile DependencyAnalysis (#2095)

* WIP of dep-analysis with ifs

* Make LifeTimeSet a stack of frames

* Make getting first use separate from detecting allocs

* Add fixes to assign-ids

* Add support for lifting operations from blocks

* Formatting

* Make analysis work inside-out, move scheduling later in the process

* OpName needs to be public, no reason to be optional

* Myriad bug fixes

* Ignore lifting ops that depend on allocs
* Makes gatherOperands use the no. of dependencies as the no. of operands
* Fix an issue getting the QID for the wire output of a measurement
* Make generation of block arguments from dependency blocks more flexible
* Make result types of IfDependencyNodes more flexible
* IfDependencyNodes now invokes codegen on successors correctly
* TerminatorDependencyNode constructor used so virtual method lookup works properly

* Support for contracting allocs, and WIP refactoring to separate analysis into "passes"

* Formatting

* Add functionality to update height metadata after lifting

* Fix a few bugs related to lifting

* Single graph per block, based on terminator

* Some more clean up and bug fixes

* Refactor allocation pass

* Formatting

* Update pipelines

* Remove outdated tests

* Formatting
---
 include/cudaq/Optimizer/Transforms/Passes.h   |    2 -
 include/cudaq/Optimizer/Transforms/Passes.td  |   15 +-
 lib/Optimizer/Transforms/AssignIDs.cpp        |  174 --
 lib/Optimizer/Transforms/CMakeLists.txt       |    1 -
 .../Transforms/DependencyAnalysis.cpp         | 1839 ++++++++++++-----
 runtime/common/RuntimeMLIR.cpp                |    1 -
 runtime/common/RuntimeMLIR.h                  |    3 +-
 runtime/common/RuntimeMLIRCommonImpl.h        |    7 +-
 .../default/rest/helpers/ionq/ionq.yml        |    2 +-
 .../platform/default/rest/helpers/oqc/oqc.yml |    2 +-
 .../rest/helpers/quantinuum/quantinuum.yml    |    2 +-
 .../qubit_management/classical_example1.cpp   |   33 -
 .../qubit_management/classical_example2.cpp   |   38 -
 .../qubit_management/classical_example3.cpp   |   34 -
 .../qubit_management/reuse_example1.cpp       |   44 -
 .../qubit_management/scheduling_test1.cpp     |   35 -
 .../qubit_management/scheduling_test2.cpp     |   40 -
 17 files changed, 1379 insertions(+), 893 deletions(-)
 delete mode 100644 lib/Optimizer/Transforms/AssignIDs.cpp
 delete mode 100644 targettests/execution/qubit_management/classical_example1.cpp
 delete mode 100644 targettests/execution/qubit_management/classical_example2.cpp
 delete mode 100644 targettests/execution/qubit_management/classical_example3.cpp
 delete mode 100644 targettests/execution/qubit_management/reuse_example1.cpp
 delete mode 100644 targettests/execution/qubit_management/scheduling_test1.cpp
 delete mode 100644 targettests/execution/qubit_management/scheduling_test2.cpp

diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h
index 650fd6f500..56f65f4f79 100644
--- a/include/cudaq/Optimizer/Transforms/Passes.h
+++ b/include/cudaq/Optimizer/Transforms/Passes.h
@@ -30,8 +30,6 @@ void registerAggressiveEarlyInlining();
 
 void registerUnrollingPipeline();
 
-void registerQubitManagementPipeline();
-
 std::unique_ptr<mlir::Pass> createApplyOpSpecializationPass();
 std::unique_ptr<mlir::Pass>
 createApplyOpSpecializationPass(bool computeActionOpt);
diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td
index a2d0390bb8..af8ce55bbe 100644
--- a/include/cudaq/Optimizer/Transforms/Passes.td
+++ b/include/cudaq/Optimizer/Transforms/Passes.td
@@ -100,18 +100,7 @@ def ArgumentSynthesis : Pass<"argument-synthesis", "mlir::func::FuncOp"> {
     "mlir::cf::ControlFlowDialect"];
 }
 
-def AssignIDs : Pass<"assign-ids", "mlir::func::FuncOp"> {
-  let summary = "Generate and assign unique identifiers for virtual qubits.";
-  let description = [{
-   Attachs a matching unique ID attribute to each `quake.null_wire` and
-   corresponding `quake.sink`. This is used in DependencyAnalysis to associate
-   allocations and deallocations for tracking qubit lifetimes.
-  }];
-
-  let dependentDialects = ["quake::QuakeDialect"];
-}
-
-def BasisConversionPass : Pass<"basis-conversion", "mlir::ModuleOp"> {
+def BasisConversionPass: Pass<"basis-conversion", "mlir::ModuleOp"> {
   let summary = "Converts kernels to a set of basis operations.";
   let description = [{
     This pass takes as input a list of target (allowed) quantum operations.
@@ -226,7 +215,7 @@ def DecompositionPass: Pass<"decomposition", "mlir::ModuleOp"> {
   ];
 }
 
-def DependencyAnalysis : Pass<"dep-analysis", "mlir::func::FuncOp"> {
+def DependencyAnalysis : Pass<"dep-analysis", "mlir::ModuleOp"> {
   let summary = "Maps qubits and reorders operations based on dependency graph.";
   let description = [{
     A dependency graph is a Directed Acyclic Graph (DAG) where each node
diff --git a/lib/Optimizer/Transforms/AssignIDs.cpp b/lib/Optimizer/Transforms/AssignIDs.cpp
deleted file mode 100644
index 55ece36c81..0000000000
--- a/lib/Optimizer/Transforms/AssignIDs.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-#include "cudaq/Frontend/nvqpp/AttributeNames.h"
-#include "cudaq/Optimizer/Dialect/CC/CCDialect.h"
-#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h"
-#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
-#include "cudaq/Optimizer/Transforms/Passes.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/Threading.h"
-#include "mlir/InitAllDialects.h"
-#include "mlir/Rewrite/FrozenRewritePatternSet.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// Generated logic
-//===----------------------------------------------------------------------===//
-namespace cudaq::opt {
-#define GEN_PASS_DEF_ASSIGNIDS
-#include "cudaq/Optimizer/Transforms/Passes.h.inc"
-} // namespace cudaq::opt
-
-namespace {
-bool isMeasureOp(Operation *op) {
-  return dyn_cast<quake::MxOp>(*op) || dyn_cast<quake::MyOp>(*op) ||
-         dyn_cast<quake::MzOp>(*op);
-}
-
-int numClassicalInput(Operation *op) {
-  if (dyn_cast<quake::RxOp>(*op) || dyn_cast<quake::RyOp>(*op) ||
-      dyn_cast<quake::RzOp>(*op))
-    return 1;
-
-  if (dyn_cast<quake::PhasedRxOp>(*op))
-    return 2;
-
-  return 0;
-}
-
-class NullWirePat : public OpRewritePattern<quake::NullWireOp> {
-public:
-  unsigned *counter;
-
-  NullWirePat(MLIRContext *context, unsigned *c)
-      : OpRewritePattern<quake::NullWireOp>(context), counter(c) {}
-
-  LogicalResult matchAndRewrite(quake::NullWireOp alloc,
-                                PatternRewriter &rewriter) const override {
-    if (alloc->hasAttr("qid"))
-      return failure();
-
-    auto qid = (*counter)++;
-
-    rewriter.startRootUpdate(alloc);
-    alloc->setAttr("qid", rewriter.getUI32IntegerAttr(qid));
-    rewriter.finalizeRootUpdate(alloc);
-
-    return success();
-  }
-};
-
-std::optional<uint> findQid(Value v) {
-  auto defop = v.getDefiningOp();
-  if (!defop)
-    return std::nullopt;
-
-  if (defop->getRegions().size() != 0) {
-    defop->emitOpError(
-        "AssignIDsPass cannot handle non-function operations with regions."
-        " Do you have if statements in a Base Profile QIR program?");
-    return std::nullopt;
-  }
-
-  if (!isa<quake::WireType>(v.getType()))
-    return std::nullopt;
-
-  assert(quake::isLinearValueForm(defop) &&
-         "AssignIDsPass requires operations to be in value form");
-
-  if (defop->hasAttr("qid")) {
-    uint qid = defop->getAttr("qid").cast<IntegerAttr>().getUInt();
-    return std::optional<uint>(qid);
-  }
-
-  // Figure out matching operand
-  size_t i = 0;
-  for (; i < defop->getNumResults(); i++)
-    if (defop->getResult(i) == v)
-      break;
-
-  // Special cases where result # != operand #:
-  // Wire is second output but sole input
-  if (isMeasureOp(defop))
-    i = 0;
-  // Classical values preceding wires as input are consumed and not part of the results
-  i += numClassicalInput(defop);
-  // Swap op swaps wires
-  if (dyn_cast<quake::SwapOp>(defop))
-    i = (i == 1 ? 0 : 1);
-
-  return findQid(defop->getOperand(i));
-}
-
-class SinkOpPat : public OpRewritePattern<quake::SinkOp> {
-public:
-  SinkOpPat(MLIRContext *context) : OpRewritePattern<quake::SinkOp>(context) {}
-
-  LogicalResult matchAndRewrite(quake::SinkOp release,
-                                PatternRewriter &rewriter) const override {
-    auto qid = findQid(release.getOperand());
-
-    if (!qid.has_value())
-      return failure();
-
-    rewriter.startRootUpdate(release);
-    release->setAttr("qid", rewriter.getUI32IntegerAttr(qid.value()));
-    rewriter.finalizeRootUpdate(release);
-
-    return success();
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Pass implementation
-//===----------------------------------------------------------------------===//
-
-struct AssignIDsPass : public cudaq::opt::impl::AssignIDsBase<AssignIDsPass> {
-  using AssignIDsBase::AssignIDsBase;
-
-  void runOnOperation() override {
-    auto func = getOperation();
-
-    if (!func->hasAttr("cudaq-kernel") || func.getBlocks().empty())
-      return;
-
-    if (!func.getFunctionBody().hasOneBlock()) {
-      func.emitError("AssignIDsPass cannot handle multiple blocks. Do "
-                     "you have if statements in a Base Profile QIR program?");
-      signalPassFailure();
-      return;
-    }
-
-    assign();
-  }
-
-  void assign() {
-    auto *ctx = &getContext();
-    func::FuncOp func = getOperation();
-    RewritePatternSet patterns(ctx);
-    unsigned x = 0;
-    patterns.insert<NullWirePat>(ctx, &x);
-    patterns.insert<SinkOpPat>(ctx);
-    ConversionTarget target(*ctx);
-    target.addLegalDialect<quake::QuakeDialect>();
-    target.addDynamicallyLegalOp<quake::NullWireOp>(
-        [&](quake::NullWireOp alloc) { return alloc->hasAttr("qid"); });
-    target.addDynamicallyLegalOp<quake::SinkOp>(
-        [&](quake::SinkOp sink) { return sink->hasAttr("qid"); });
-    if (failed(applyPartialConversion(func.getOperation(), target,
-                                      std::move(patterns)))) {
-      func.emitOpError("Assigning qids failed");
-      signalPassFailure();
-    }
-  }
-};
-
-} // namespace
diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt
index 39a185d895..8862e6022e 100644
--- a/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/lib/Optimizer/Transforms/CMakeLists.txt
@@ -16,7 +16,6 @@ add_cudaq_library(OptTransforms
   ApplyControlNegations.cpp
   ApplyOpSpecialization.cpp
   ArgumentSynthesis.cpp
-  AssignIDs.cpp
   BasisConversion.cpp
   CombineQuantumAlloc.cpp
   ConstPropComplex.cpp
diff --git a/lib/Optimizer/Transforms/DependencyAnalysis.cpp b/lib/Optimizer/Transforms/DependencyAnalysis.cpp
index de5757338c..4c4ac9a130 100644
--- a/lib/Optimizer/Transforms/DependencyAnalysis.cpp
+++ b/lib/Optimizer/Transforms/DependencyAnalysis.cpp
@@ -8,6 +8,7 @@
 
 #include "PassDetails.h"
 #include "cudaq/Frontend/nvqpp/AttributeNames.h"
+#include "cudaq/Optimizer/Dialect/CC/CCOps.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h"
 #include "cudaq/Optimizer/Transforms/Passes.h"
@@ -28,13 +29,50 @@ namespace cudaq::opt {
 } // namespace cudaq::opt
 
 namespace {
-bool isBeginOp(Operation *op) {
-  return dyn_cast<quake::UnwrapOp>(*op) || dyn_cast<quake::ExtractRefOp>(*op) ||
-         dyn_cast<quake::NullWireOp>(*op);
+typedef size_t PhysicalQID;
+typedef size_t VirtualQID;
+
+[[maybe_unused]] bool isMeasureOp(Operation *op) {
+  return isa<quake::MxOp>(*op) || isa<quake::MyOp>(*op) ||
+         isa<quake::MzOp>(*op);
+}
+
+[[maybe_unused]] bool isBeginOp(Operation *op) {
+  return isa<quake::NullWireOp>(*op);
+}
+
+[[maybe_unused]] bool isEndOp(Operation *op) { return isa<quake::SinkOp>(*op); }
+
+[[maybe_unused]] size_t getOperandIDXFromResultIDX(size_t resultidx,
+                                                   Operation *op) {
+  if (isMeasureOp(op))
+    return 0;
+  if (isa<quake::SwapOp>(op))
+    return (resultidx == 0 ? 1 : 0);
+  // Currently, all classical operands precede all quantum operands
+  for (auto type : op->getOperandTypes()) {
+    if (!quake::isQuantumType(type))
+      resultidx++;
+    else
+      break;
+  }
+  return resultidx;
 }
 
-bool isEndOp(Operation *op) {
-  return dyn_cast<quake::DeallocOp>(*op) || dyn_cast<quake::SinkOp>(*op);
+[[maybe_unused]] size_t getResultIDXFromOperandIDX(size_t operand_idx,
+                                                   Operation *op) {
+  if (isMeasureOp(op))
+    return 1;
+  if (isa<quake::SwapOp>(op))
+    return (operand_idx == 0 ? 1 : 0);
+  // Currently, all classical operands precede all quantum operands
+  for (auto type : op->getOperandTypes()) {
+    if (!quake::isQuantumType(type))
+      operand_idx--;
+    else
+      break;
+  }
+  return operand_idx;
 }
 
 class LifeTime {
@@ -43,8 +81,8 @@ class LifeTime {
   uint end;
 
 public:
-  LifeTime(uint _begin, uint _end) : begin(_begin), end(_end) {
-    assert(_end >= _begin && "invalid lifetime");
+  LifeTime(uint begin, uint end) : begin(begin), end(end) {
+    assert(end >= begin && "invalid lifetime");
   };
 
   bool isAfter(LifeTime *other) { return begin > other->end; }
@@ -59,55 +97,157 @@ class LifeTime {
     return std::max(begin, other->begin) - std::min(end, other->end);
   }
 
-  LifeTime *combine(LifeTime *other) {
-    return new LifeTime(std::min(begin, other->begin),
-                        std::max(end, other->end));
+  void combine(LifeTime *other) {
+    begin = std::min(begin, other->begin);
+    end = std::max(end, other->end);
   }
 
   uint getBegin() { return begin; }
   uint getEnd() { return end; }
 };
 
+class LifeTimeAnalysis {
+private:
+  StringRef name;
+  SmallVector<LifeTime *> lifetimes;
+  SetVector<PhysicalQID> frame;
+  // DenseMap<VirtualQID, PhysicalQID> allocMap;
+
+  /// Given a candidate lifetime, tries to find a qubit to reuse,
+  /// and otherwise allocates a new qubit
+  PhysicalQID allocatePhysical(LifeTime *lifetime) {
+    std::optional<size_t> best_reuse = std::nullopt;
+    std::optional<size_t> empty = std::nullopt;
+    uint best_distance = INT_MAX;
+
+    for (uint i = 0; i < lifetimes.size(); i++) {
+      if (!lifetimes[i]) {
+        empty = i;
+        continue;
+      }
+
+      auto other = lifetimes[i];
+      auto distance = lifetime->distance(other);
+      if (!lifetime->isOverlapping(other) && distance < best_distance) {
+        best_reuse = i;
+        best_distance = distance;
+      }
+    }
+
+    // Reuse a qubit based on its lifetime in the same scope
+    if (best_reuse) {
+      auto physical = best_reuse.value();
+      lifetimes[physical]->combine(lifetime);
+      return physical;
+    }
+
+    // Reuse a qubit without a lifetime (used in a different frame)
+    if (empty) {
+      auto physical = empty.value();
+      lifetimes[physical] = lifetime;
+      return physical;
+    }
+
+    // Fall back: allocate a new qubit
+    lifetimes.push_back(lifetime);
+    return lifetimes.size() - 1;
+  }
+
+public:
+  LifeTimeAnalysis(StringRef name) : name(name), lifetimes(), frame() {}
+
+  PhysicalQID allocatePhysical(VirtualQID qid, LifeTime *lifetime) {
+    auto phys = allocatePhysical(lifetime);
+    frame.insert(phys);
+    return phys;
+  }
+
+  // quake::BorrowWireOp genBorrow(VirtualQID qid, OpBuilder &builder) {
+  //   // auto phys = mapToPhysical(lifetime);
+  //   // frames.back().insert(phys);
+
+  //   auto wirety = quake::WireType::get(builder.getContext());
+  //   return builder.create<quake::BorrowWireOp>(builder.getUnknownLoc(),
+  //   wirety, set, virToPhys[qid]);
+  // }
+
+  void pushFrame() {
+    // TODO: anything here?
+  }
+
+  SetVector<PhysicalQID> popFrame() {
+    for (uint i = 0; i < lifetimes.size(); i++)
+      lifetimes[i] = nullptr;
+    auto pqids = SetVector<PhysicalQID>(frame);
+    frame.clear();
+    return pqids;
+  }
+
+  void reallocatePhysical(PhysicalQID phys, LifeTime *lifetime) {
+    lifetimes[phys] = lifetime;
+  }
+
+  size_t getCount() { return lifetimes.size(); }
+
+  void print() {
+    llvm::outs() << "# qubits: " << getCount() << ", cycles: ";
+    for (size_t i = 0; i < lifetimes.size(); i++)
+      llvm::outs() << lifetimes[i]->getBegin() << " - "
+                   << lifetimes[i]->getEnd() << " ";
+    llvm::outs() << "\n";
+  }
+
+  StringRef getName() { return name; }
+};
+
 class DependencyNode {
   friend class DependencyGraph;
+  friend class OpDependencyNode;
+  friend class IfDependencyNode;
+  friend class ArgDependencyNode;
+  friend class RootDependencyNode;
+
+public:
+  struct DependencyEdge {
+  public:
+    DependencyNode *node;
+    // If a given dependency appears multiple times,
+    // (e.g., multiple results of the dependency are used by this node),
+    // it is important to know which result from the dependency
+    // corresponds to which operand.
+    // Otherwise, the dependency will be code gen'ed first, and it will
+    // be impossible to know (e.g., which result is a control and which is a
+    // target). Resultidx tracks this information.
+    size_t resultidx;
+    std::optional<VirtualQID> qid;
+
+    DependencyEdge() : node(nullptr), resultidx(INT_MAX), qid(std::nullopt) {}
+
+    DependencyEdge(DependencyNode *node, size_t resultidx)
+        : node(node), resultidx(resultidx) {
+      assert(node && "DependencyEdge: node cannot be null");
+      qid = node->getQIDForResult(resultidx);
+    }
+
+    /// Returns the underlying DependencyNode * without attached metadata
+    DependencyNode *operator->() { return node; }
+
+    /// Returns the value represented by this DependencyEdge
+    Value getValue() { return node->getResult(resultidx); }
+  };
 
 protected:
   SetVector<DependencyNode *> successors;
-  // Dependencies are in the order of operands of associated
-  SmallVector<DependencyNode *> dependencies;
-  // If a given dependency appears multiple times,
-  // (e.g., multiple results of the dependency are used by associated),
-  // it is important to know which result from the dependency
-  // corresponds to which operand of associated.
-  // Otherwise, the dependency will be code gen'ed first, and it will
-  // be impossible to know (e.g., which result is a control and which is a
-  // target). Result_idxs tracks this information, and therefore should be
-  // exactly the same size/order as dependencies.
-  SmallVector<size_t> result_idxs;
-  SetVector<size_t> qids;
-  Operation *associated;
+  // Dependencies are in the order of operands
+  SmallVector<DependencyEdge> dependencies;
+  SetVector<VirtualQID> qids;
   uint cycle = INT_MAX;
   bool hasCodeGen = false;
   uint height;
-  bool quantumOp;
   bool isScheduled;
 
-  void printNode() {
-    llvm::outs() << "QIDs: ";
-    bool printComma = false;
-    for (auto qid : qids) {
-      if (printComma)
-        llvm::outs() << ", ";
-      llvm::outs() << qid;
-      printComma = true;
-    }
-    if (isScheduled)
-      llvm::outs() << " @ " << cycle;
-    llvm::outs() << " | ";
-    associated->dump();
-  }
+  virtual void printNode() = 0;
 
-  // Print with tab index to should depth in graph
   void printSubGraph(int tabIndex) {
     for (int i = 0; i < tabIndex; i++) {
       llvm::outs() << "\t";
@@ -119,21 +259,46 @@ class DependencyNode {
       dependency->printSubGraph(tabIndex + 1);
   }
 
-  bool isRoot() { return isEndOp(associated); }
+  virtual bool isAlloc() { return false; }
+  virtual bool isRoot() { return successors.size() == 0; };
+  virtual bool isLeaf() { return dependencies.size() == 0; };
+  virtual bool isSkip() { return numTicks() == 0; };
+  virtual bool isQuantumOp() = 0;
+  virtual uint numTicks() = 0;
+  virtual Value getResult(uint resultidx) = 0;
+  virtual ValueRange getResults() = 0;
+  virtual SetVector<PhysicalQID> mapToPhysical(LifeTimeAnalysis &set) {
+    return {};
+  }
+  virtual void codeGen(OpBuilder &builder, LifeTimeAnalysis &set) = 0;
+
+  /// Recursively find nodes scheduled at a given cycle
+  SetVector<DependencyNode *>
+  getNodesAtCycle(uint _cycle, SetVector<DependencyNode *> &seen) {
+    SetVector<DependencyNode *> nodes;
 
-  bool isLeaf() { return isBeginOp(associated); }
+    if (cycle < _cycle || seen.contains(this))
+      return nodes;
+    else if (cycle == _cycle && !isSkip()) {
+      nodes.insert(this);
+      return nodes;
+    }
 
-  bool isSkip() { return isRoot() || isLeaf() || !quantumOp; }
+    seen.insert(this);
 
-  bool isQuantumDependent() { return qids.size() > 0; }
+    for (auto dependency : dependencies)
+      nodes.set_union(dependency->getNodesAtCycle(_cycle, seen));
 
-  uint numTicks() { return isSkip() ? 0 : 1; }
+    return nodes;
+  }
 
-  /// This function guarantees that nodes are scheduled after their predecessors
-  /// and before their successors, and that every node is scheduled at a cycle
-  /// between 0 and the height of the graph to which they belong.
+  /// Assigns cycles to quantum operations. A node must be scheduled after all
+  /// of its dependencies, and before all of its successors. A node cannot be
+  /// scheduled at a negative cycle, nor can it be scheduled at a cycle greater
+  /// than or equal to the height of the graph to which it belongs.
   ///
-  /// The scheduling algorithm works by always following the longest path first.
+  /// The scheduling algorithm (as currently implemented) works by always
+  /// following the longest path first.
   /// The longest path will always be "saturated" with an operation every cycle,
   /// so we know exactly when to schedule every operation along that path.
   /// Then, every successor (not on the path) of an operation on the path should
@@ -154,7 +319,7 @@ class DependencyNode {
   void schedule(uint level) {
     isScheduled = true;
     // Ignore classical values that don't depend on quantum values
-    if (!quantumOp && !isQuantumDependent())
+    if (!isQuantumDependent())
       return;
 
     // The height of a node (minus numTicks()) is the earliest a node can be
@@ -172,10 +337,10 @@ class DependencyNode {
     // Without this, two dependencies may be scheduled at the same cycle,
     // even if one of the dependencies depends on the other.
     // This sort of mimics working over a transitive reduction of the graph.
-    SmallVector<DependencyNode *> sorted(dependencies);
+    SmallVector<DependencyEdge> sorted(dependencies);
     std::sort(sorted.begin(), sorted.end(),
-              [](DependencyNode *x, DependencyNode *y) {
-                return x->getHeight() > y->getHeight();
+              [](DependencyEdge x, DependencyEdge y) {
+                return x.node->getHeight() > y.node->getHeight();
               });
 
     // Schedule dependencies as late as possible (right before this operation)
@@ -189,59 +354,214 @@ class DependencyNode {
         successor->schedule(current + numTicks() + successor->numTicks());
   }
 
-  /// Returns the index of the result of the dependency corresponding to the
-  /// \p operand_idx'th operand of this node
-  size_t getResultIdx(size_t operand_idx) { return result_idxs[operand_idx]; }
+  virtual bool equivalentTo(DependencyNode *other) {
+    if (getOpName() != other->getOpName())
+      return false;
+    if (height != other->height)
+      return false;
+    if (dependencies.size() != other->dependencies.size())
+      return false;
+    for (uint i = 0; i < dependencies.size(); i++) {
+      if (!dependencies[i].node->equivalentTo(other->dependencies[i].node) ||
+          dependencies[i]->isAlloc())
+        return false;
+    }
+    return true;
+  }
 
-  /// Recursively find nodes scheduled at a given cycle
-  SetVector<DependencyNode *> getNodesAtCycle(uint _cycle, SetVector<DependencyNode *> &seen) {
-    SetVector<DependencyNode *> nodes;
+  virtual std::optional<VirtualQID> getQIDForResult(size_t resultidx) = 0;
 
-    if (cycle < _cycle || seen.contains(this))
-      return nodes;
-    else if (cycle == _cycle && !isSkip()) {
-      nodes.insert(this);
-      return nodes;
+  virtual void performLifting() {}
+
+  virtual void updateHeight() {
+    height = 0;
+    for (auto edge : dependencies) {
+      if (edge->getHeight() > height)
+        height = edge->getHeight();
     }
+    height += numTicks();
+  }
 
-    seen.insert(this);
+public:
+  DependencyNode()
+      : successors(), dependencies({}), qids({}), height(0),
+        isScheduled(false) {}
 
+  uint getHeight() { return height; };
+
+  void print() { printSubGraph(0); }
+
+  virtual bool isQuantumDependent() {
+    if (isQuantumOp())
+      return true;
     for (auto dependency : dependencies)
-      nodes.set_union(dependency->getNodesAtCycle(_cycle, seen));
+      if (dependency->isQuantumDependent())
+        return true;
+    return false;
+  };
 
-    return nodes;
+  virtual void schedulingPass() {
+    assert(false && "schedulingPass can only be called on an IfDependencyNode");
   }
 
-  /// Generates a new operation for this node in the dependency graph
-  /// using the dependencies of the node as operands.
-  void codeGen(OpBuilder &builder) {
-    if (hasCodeGen || isRoot() || isLeaf())
-      return;
+  virtual void contractAllocsPass() {
+    assert(false &&
+           "contractAllocPass can only be called on an IfDependencyNode");
+  }
 
-    if (!quantumOp)
-      for (auto dependency : dependencies)
-        if (!dependency->hasCodeGen)
-          return;
+  virtual void performLiftingPass() {
+    assert(false &&
+           "performLiftingPass can only be called on an IfDependencyNode");
+  }
 
-    auto oldOp = associated;
-    SmallVector<mlir::Value> operands(oldOp->getNumOperands());
+  virtual void allocationPass(LifeTimeAnalysis &set) {
+    assert(false && "allocationPass can only be called on an IfDependencyNode");
+  }
+
+  // virtual void performAnalysis(LifeTimeAnalysis &set) {
+  //   assert(false && "performAnalysis can only be called on an
+  //   IfDependencyNode");
+  // }
+
+  virtual void moveAllocIntoBlock(DependencyNode *init, DependencyNode *root,
+                                  VirtualQID alloc) {
+    assert(false &&
+           "moveAllocIntoBlock can only be called on an IfDependencyNode");
+  }
+
+  virtual std::string getOpName() = 0;
+
+  virtual bool isContainer() { return false; }
+};
+
+class InitDependencyNode : public DependencyNode {
+  friend class DependencyGraph;
+
+protected:
+  Value wire;
+  PhysicalQID pqid = INT_MAX;
+
+  void printNode() override {
+    llvm::outs() << "Initial value for QID " << getQID();
+    if (pqid != INT_MAX)
+      llvm::outs() << "=" << pqid;
+    llvm::outs() << ": ";
+    wire.dump();
+  }
+
+  bool isAlloc() override { return true; }
+  uint numTicks() override { return 0; }
+  bool isQuantumOp() override { return true; }
 
+  Value getResult(uint resultidx) override {
+    assert(resultidx == 0 && "Illegal resultidx");
+    return wire;
+  }
+
+  ValueRange getResults() override { return ValueRange({wire}); }
+
+  void codeGen(OpBuilder &builder, LifeTimeAnalysis &set) override {
+    auto wirety = quake::WireType::get(builder.getContext());
+    auto alloc = builder.create<quake::BorrowWireOp>(
+        builder.getUnknownLoc(), wirety, set.getName(), pqid);
+    wire = alloc.getResult();
+    hasCodeGen = true;
+  }
+
+  void assignToPhysical(PhysicalQID phys) { pqid = phys; }
+
+  VirtualQID getQID() { return qids.front(); }
+
+  std::optional<VirtualQID> getQIDForResult(size_t resultidx) override {
+    assert(resultidx == 0 && "Invalid resultidx");
+    return std::optional(getQID());
+  }
+
+public:
+  InitDependencyNode(quake::BorrowWireOp op) : wire(op.getResult()) {
+    // Should be ensured by assign-ids pass
+
+    // Lookup qid
+    auto qid = op.getIdentity();
+    qids.insert(qid);
+  };
+
+  virtual std::string getOpName() override { return "init"; };
+};
+
+class OpDependencyNode : public DependencyNode {
+  friend class IfDependencyNode;
+
+protected:
+  Operation *associated;
+  bool quantumOp;
+
+  void printNode() override {
+    llvm::outs() << "QIDs: ";
+    bool printComma = false;
+    for (auto qid : qids) {
+      if (printComma)
+        llvm::outs() << ", ";
+      llvm::outs() << qid;
+      printComma = true;
+    }
+    if (isScheduled)
+      llvm::outs() << " @ " << cycle;
+    llvm::outs() << " | " << height << ", " << numTicks() << " | ";
+    associated->dump();
+  }
+
+  uint numTicks() override { return isQuantumOp() ? 1 : 0; }
+  bool isQuantumOp() override { return quantumOp; }
+
+  Value getResult(uint resultidx) override {
+    return associated->getResult(resultidx);
+  }
+
+  ValueRange getResults() override { return associated->getResults(); }
+
+  SmallVector<mlir::Value> gatherOperands(OpBuilder &builder,
+                                          LifeTimeAnalysis &set) {
+    SmallVector<mlir::Value> operands(dependencies.size());
     for (size_t i = 0; i < dependencies.size(); i++) {
       auto dependency = dependencies[i];
 
-      // Ensure classical values are available
-      if (!dependency->quantumOp)
-        dependency->codeGen(builder);
+      // Ensure classical values are available and that any allocs are added
+      if (dependency->isSkip())
+        dependency->codeGen(builder, set);
 
       assert(dependency->hasCodeGen &&
              "Generating code for successor before dependency");
 
       // Get relevant result from dependency's updated op
       // to use as the relevant operand
-      auto result_idx = getResultIdx(i);
-      operands[i] = dependency->associated->getResult(result_idx);
+      operands[i] = dependency->getResult(dependency.resultidx);
     }
 
+    return operands;
+  }
+
+  /// Generates a new operation for this node in the dependency graph
+  /// using the dependencies of the node as operands.
+  virtual void codeGen(OpBuilder &builder, LifeTimeAnalysis &set) override {
+    if (hasCodeGen)
+      return;
+
+    // Non-quake operations have code generated aggressively
+    // This ensures that code gen is not too aggressive
+    if (isSkip())
+      for (auto dependency : dependencies)
+        if (!dependency->hasCodeGen) {
+          if (dependency->isQuantumDependent())
+            // Wait for quantum op dependency to be codeGen'ed
+            return;
+          else
+            dependency->codeGen(builder, set);
+        }
+
+    auto oldOp = associated;
+    auto operands = gatherOperands(builder, set);
+
     associated =
         Operation::create(oldOp->getLoc(), oldOp->getName(),
                           oldOp->getResultTypes(), operands, oldOp->getAttrs());
@@ -249,51 +569,29 @@ class DependencyNode {
     builder.insert(associated);
     hasCodeGen = true;
 
+    // Ensure classical values are generated
     for (auto successor : successors)
-      // Ensure classical values are generated
-      if (!successor->quantumOp)
-        successor->codeGen(builder);
+      if (successor->isSkip())
+        successor->codeGen(builder, set);
   }
 
-  /// Replaces the null_wire op for \p qid with \p init
-  void initializeWire(size_t qid, Operation *init, uint result_idx) {
-    if (!qids.contains(qid))
-      return;
-
-    if (isLeaf()) {
-      associated = init;
-      hasCodeGen = true;
-
-      // Update result_idxs of successors
-      for (auto successor : successors)
-        for (uint i = 0; i < successor->dependencies.size(); i++)
-          if (successor->dependencies[i] == this)
-            successor->result_idxs[i] = result_idx;
-
-      return;
-    }
-
-    for (auto dependency : dependencies)
-      dependency->initializeWire(qid, init, result_idx);
+  std::optional<VirtualQID> getQIDForResult(size_t resultidx) override {
+    if (!isQuantumOp())
+      return std::nullopt;
+    auto operand = getOperandIDXFromResultIDX(resultidx, associated);
+    if (operand >= dependencies.size())
+      return std::nullopt;
+    return dependencies[operand].qid;
   }
 
 public:
-  DependencyNode(Operation *op, SmallVector<DependencyNode *> _dependencies)
-      : dependencies(_dependencies), qids(), associated(op) {
+  OpDependencyNode(Operation *op, SmallVector<DependencyEdge> _dependencies)
+      : associated(op) {
     assert(op && "Cannot make dependency node for null op");
     assert(_dependencies.size() == op->getNumOperands() &&
            "Wrong # of dependencies to construct node");
-    successors = SetVector<DependencyNode *>();
-    result_idxs = SmallVector<size_t>(dependencies.size(), INT_MAX);
 
-    if (isBeginOp(op) || isEndOp(op)) {
-      // Should be ensured by assign-ids pass
-      assert(op->hasAttr("qid") && "quake.null_wire or quake.sink missing qid");
-
-      // Lookup qid
-      auto qid = op->getAttrOfType<IntegerAttr>("qid").getUInt();
-      qids.insert(qid);
-    }
+    dependencies = _dependencies;
 
     quantumOp = isQuakeOperation(op);
     if (dyn_cast<quake::DiscriminateOp>(op))
@@ -302,62 +600,95 @@ class DependencyNode {
     height = 0;
     // Ingest dependencies, setting up metadata
     for (size_t i = 0; i < dependencies.size(); i++) {
-      auto dependency = dependencies[i];
-      auto operand = associated->getOperand(i);
-
-      assert(dependency && "Invalid dependency");
-
-      // Figure out result_idx
-      size_t result_idx = 0;
-      auto results = dependency->associated->getResults();
-      for (; result_idx < results.size(); result_idx++)
-        if (results[result_idx] == operand)
-          break;
+      auto edge = dependencies[i];
 
-      assert(result_idx < results.size() &&
-             "Node passed as dependency isn't actually a dependency!");
-
-      result_idxs[i] = result_idx;
-      // Set relevant successor of dependency to this
-      dependency->successors.insert(this);
+      assert(edge->getResult(edge.resultidx) == associated->getOperand(i) &&
+             "Dependency isn't actually a dependency!");
+      // Add this as a successor to each dependency
+      edge->successors.insert(this);
 
       // Update metadata
-      if (dependency->height > height)
-        height = dependency->height;
-      if (!isEndOp(op))
-        qids.set_union(dependency->qids);
+      if (edge.qid.has_value())
+        qids.insert(edge.qid.value());
     }
 
-    height += numTicks();
+    updateHeight();
   };
 
   void print() { printSubGraph(0); }
 
   uint getHeight() { return height; }
 
-  /// Assuming this is a root, replaces the old sink operation
-  /// with a new one for the "physical" wire we replaced the virtual wire with
-  void addCleanUp(OpBuilder &builder) {
-    assert(isRoot() && "Can only call addCleanUp on a root node");
-    auto last_use = dependencies[0];
-    auto result_idx = getResultIdx(0);
-    auto wire = last_use->associated->getResult(result_idx);
-    auto newOp = builder.create<quake::SinkOp>(builder.getUnknownLoc(), wire);
-    newOp->setAttrs(associated->getAttrs());
-    associated->removeAttr("dnodeid");
-    associated = newOp;
-    hasCodeGen = true;
+  DependencyEdge getDependencyForResult(size_t resultidx) {
+    return dependencies[getOperandIDXFromResultIDX(resultidx, associated)];
+  }
+
+  size_t getResultForDependency(size_t operandidx) {
+    return getResultIDXFromOperandIDX(operandidx, associated);
   }
+
+  /// Remove this dependency node from the path for \p qid by replacing
+  /// successor dependencies on \p qid with the relevant dependency from this
+  /// node.
+  virtual void eraseQID(VirtualQID qid) {
+    for (auto successor : successors) {
+      for (uint j = 0; j < successor->dependencies.size(); j++) {
+        auto edge = successor->dependencies[j];
+        if (edge.node == this && edge.qid == qid) {
+          auto dep = getDependencyForResult(edge.resultidx);
+          successor->dependencies[j] = dep;
+          dep->successors.remove(this);
+          dep->successors.insert(successor);
+        }
+      }
+    }
+    qids.remove(qid);
+  }
+
+  /// Remove this dependency node from the graph by replacing all successor
+  /// dependencies with the relevant dependency from this node.
+  void erase() {
+    for (auto successor : successors) {
+      for (uint j = 0; j < successor->dependencies.size(); j++) {
+        auto edge = successor->dependencies[j];
+        if (edge.node == this) {
+          auto dep = getDependencyForResult(edge.resultidx);
+          successor->dependencies[j] = dep;
+          dep->successors.remove(this);
+          dep->successors.insert(successor);
+        }
+      }
+    }
+  }
+
+  std::string getOpName() override {
+    if (isa<arith::ConstantOp>(associated)) {
+      if (auto cstf = dyn_cast<arith::ConstantFloatOp>(associated)) {
+        auto value = cstf.getValue().cast<FloatAttr>().getValueAsDouble();
+        return std::to_string(value);
+      } else if (auto csti = dyn_cast<arith::ConstantIndexOp>(associated)) {
+        auto value = cstf.getValue().cast<IntegerAttr>().getInt();
+        return std::to_string(value);
+      } else if (auto csti = dyn_cast<arith::ConstantIntOp>(associated)) {
+        auto value = cstf.getValue().cast<IntegerAttr>().getInt();
+        return std::to_string(value);
+      }
+    }
+    return associated->getName().getStringRef().str();
+  };
 };
 
 class DependencyGraph {
 private:
   SetVector<DependencyNode *> roots;
-  SetVector<size_t> qids;
+  DenseMap<VirtualQID, InitDependencyNode *> allocs;
+  DenseMap<VirtualQID, DependencyNode *> leafs;
+  SetVector<VirtualQID> qids;
   uint total_height;
-  DenseMap<size_t, DependencyNode *> firstUses;
   bool isScheduled = false;
   DependencyNode *tallest = nullptr;
+  uint shift;
+  SetVector<DependencyNode *> containers;
 
   /// Starting from \p next, searches through \p next's family
   /// (excluding already seen nodes) to find all the interconnected roots
@@ -377,19 +708,26 @@ class DependencyGraph {
     }
 
     seen.insert(next);
-    qids.set_union(next->qids);
 
-    if (isBeginOp(next->associated))
-      firstUses.insert({next->qids.front(), next->successors.front()});
+    if (next->isLeaf() && next->isQuantumOp()) {
+      leafs.insert({next->qids.front(), next});
+      qids.set_union(next->qids);
+    }
+
+    if (next->isAlloc()) {
+      auto init = static_cast<InitDependencyNode *>(next);
+      allocs[init->getQID()] = init;
+    }
+
+    if (next->isContainer())
+      containers.insert(next);
 
     for (auto successor : next->successors)
       gatherRoots(seen, successor);
     for (auto dependency : next->dependencies)
-      gatherRoots(seen, dependency);
+      gatherRoots(seen, dependency.node);
   }
 
-  void scheduleNodes() { tallest->schedule(total_height); }
-
   SetVector<DependencyNode *> getNodesAtCycle(uint cycle) {
     SetVector<DependencyNode *> nodes;
     SetVector<DependencyNode *> seen;
@@ -398,98 +736,149 @@ class DependencyGraph {
     return nodes;
   }
 
-  /// Ensures that the node is valid and is scheduled properly.
-  /// This is an expensive check that should only be used for
-  /// testing/debugging.
-  void validateNode(DependencyNode *node, uint parent_cycle) {
-    assert(node && "Null node in graph");
-    if (!node->isSkip()) {
-      assert(node->cycle < parent_cycle && "Node scheduled too late");
-      parent_cycle = node->cycle;
-    }
-    for (auto dependency : node->dependencies)
-      validateNode(dependency, parent_cycle);
+  void updateHeight(SetVector<DependencyNode *> &seen, DependencyNode *next) {
+    if (seen.contains(next))
+      return;
+
+    seen.insert(next);
+
+    for (auto dependency : next->dependencies)
+      updateHeight(seen, dependency.node);
+
+    next->updateHeight();
   }
 
 public:
   DependencyGraph(DependencyNode *root) {
+    shift = 0;
     total_height = 0;
     SetVector<DependencyNode *> seen;
     gatherRoots(seen, root);
-    scheduleNodes();
+    if (roots.size() == 0)
+      return;
+
+    qids = SetVector<size_t>();
+    for (auto root : roots) {
+      qids.set_union(root->qids);
+    }
   }
 
   SetVector<DependencyNode *> &getRoots() { return roots; }
 
-  SetVector<size_t> &getQIDs() { return qids; }
+  SetVector<VirtualQID> &getQIDs() { return qids; }
 
   size_t getNumQIDs() { return qids.size(); }
 
-  LifeTime *getLifeTimeForQID(size_t qid) {
-    assert(qids.contains(qid) && "Given qid not in dependency graph");
-    uint first = getFirstUseOf(qid)->cycle;
-    auto last = getLastUseOf(qid)->cycle;
+  LifeTime *getLifeTimeForQID(VirtualQID qid) {
+    uint first = getFirstUseOf(qid)->cycle + shift;
+    auto last = getLastUseOf(qid)->cycle + shift;
 
     return new LifeTime(first, last);
   }
 
-  DependencyNode *getFirstUseOf(size_t qid) {
+  OpDependencyNode *getFirstUseOf(VirtualQID qid) {
     assert(qids.contains(qid) && "Given qid not in dependency graph");
-    return firstUses[qid];
+    DependencyNode *firstUse = leafs[qid]->successors[0];
+    if (firstUse->isRoot())
+      return nullptr;
+    return static_cast<OpDependencyNode *>(firstUse);
   }
 
-  DependencyNode *getLastUseOf(size_t qid) {
+  OpDependencyNode *getLastUseOf(VirtualQID qid) {
     assert(qids.contains(qid) && "Given qid not in dependency graph");
-    return getRootForQID(qid)->dependencies[0];
+    DependencyNode *lastUse = getRootForQID(qid)->dependencies[0].node;
+    if (lastUse->isLeaf())
+      return nullptr;
+    return static_cast<OpDependencyNode *>(lastUse);
   }
 
-  DependencyNode *getRootForQID(size_t qid) {
+  DependencyNode *getRootForQID(VirtualQID qid) {
     assert(qids.contains(qid) && "Given qid not in dependency graph");
     for (auto root : roots)
-      if (root->associated->getAttr("qid").cast<IntegerAttr>().getUInt() == qid)
+      if (root->qids.contains(qid))
         return root;
     return nullptr;
   }
 
-  void codeGenAt(uint cycle, OpBuilder &builder) {
+  InitDependencyNode *getAllocForQID(VirtualQID qid) {
+    assert(allocs.count(qid) == 1 && "Given qid not allocated in graph");
+    return allocs[qid];
+  }
+
+  void codeGenAt(uint cycle, OpBuilder &builder, LifeTimeAnalysis &set) {
     SetVector<DependencyNode *> nodes = getNodesAtCycle(cycle);
 
     for (auto node : nodes)
-      node->codeGen(builder);
+      node->codeGen(builder, set);
+  }
+
+  void allocationPass(LifeTimeAnalysis &set) {
+    for (auto container : containers)
+      container->allocationPass(set);
   }
 
   uint getHeight() { return total_height; }
 
-  SmallVector<size_t> getFirstUsedAtCycle(uint cycle) {
-    SmallVector<size_t> cycles;
-    for (auto qid : qids) {
-      auto first = getFirstUseOf(qid);
-      if (first->cycle == cycle)
-        cycles.push_back(qid);
-    }
+  // SmallVector<VirtualQID> getFirstUsedAtCycle(uint cycle) {
+  //   SmallVector<VirtualQID> fresh;
+  //   for (auto [qid, _] : allocs)
+  //     if (getFirstUseOf(qid)->cycle == cycle)
+  //       fresh.push_back(qid);
+
+  //   return fresh;
+  // }
+
+  // SmallVector<VirtualQID> getLastUsedAtCycle(uint cycle) {
+  //   SmallVector<VirtualQID> stale;
+  //   for (auto [qid, _] : allocs)
+  //     if (getLastUseOf(qid)->cycle == cycle)
+  //       stale.push_back(qid);
+
+  //   return stale;
+  // }
+
+  SetVector<VirtualQID> getAllocs() {
+    SetVector<VirtualQID> allocated;
+    for (auto [qid, _] : allocs)
+      allocated.insert(qid);
+    return allocated;
+  }
 
-    return cycles;
+  void assignToPhysical(VirtualQID qid, PhysicalQID phys) {
+    if (allocs.count(qid) == 1)
+      allocs[qid]->assignToPhysical(phys);
   }
 
-  /// Creates a new physical null wire to replace the
-  /// "virtual" qubit represented by \p qid
-  void initializeWire(size_t qid, OpBuilder &builder) {
-    assert(qids.contains(qid) && "Given qid not in dependency graph");
-    auto ctx = builder.getContext();
-    auto wireTy = quake::WireType::get(ctx);
-    auto initOp =
-        builder.create<quake::NullWireOp>(builder.getUnknownLoc(), wireTy);
-    getFirstUseOf(qid)->initializeWire(qid, initOp, 0);
+  /// Qubits allocated within a dependency block that are only used inside an
+  /// `if` in that block, can be moved inside the `if`.
+  ///
+  /// Works outside-in, to contract as tightly as possible.
+  void contractAllocsPass() {
+    for (auto container : containers)
+      container->contractAllocsPass();
   }
 
-  /// Replaces the "virtual" qubit represented by \p qid with the same
-  /// physical qubit as \p init, which is assumed to be the last use.
-  void initializeWireFromRoot(size_t qid, DependencyNode *init) {
-    assert(qids.contains(qid) && "Given qid not in dependency graph");
-    assert(init && init->isRoot() &&
-           "Can only initialize wire from a valid root");
-    auto lastOp = init->dependencies[0]->associated;
-    getFirstUseOf(qid)->initializeWire(qid, lastOp, init->result_idxs[0]);
+  /// Assigns a cycle to every quantum operation in each dependency graph
+  /// (including `if`s containing quantum operations).
+  ///
+  /// Currently works inside-out, but scheduling is order-agnostic
+  /// as inner-blocks don't rely on parent schedules, and vice-versa.
+  ///
+  /// TODO: should be able to parallelize this across all blocks
+  void schedulingPass() {
+    if (!tallest) {
+      assert(roots.empty() &&
+             "updateHeight not invoked before scheduling graph!");
+      return;
+    }
+    for (auto container : containers)
+      container->schedulingPass();
+    tallest->schedule(total_height);
+  }
+
+  void performLiftingPass() {
+    for (auto container : containers)
+      container->performLiftingPass();
   }
 
   void print() {
@@ -499,102 +888,747 @@ class DependencyGraph {
     llvm::outs() << "Graph End\n";
   }
 
-  Location getIntroductionLoc(size_t qid) {
-    assert(qids.contains(qid) && "Given qid not in dependency graph");
-    return getFirstUseOf(qid)->associated->getLoc();
+  void setCycleOffset(uint cycle) { this->shift = cycle; }
+
+  // void performAnalysis(LifeTimeAnalysis &set) {
+  //   for (auto container : containers)
+  //     container->performAnalysis(set);
+  // }
+
+  // TODO: Cleanup duplicated code to replace/swap nodes (here, in replaceRoot,
+  // and in IfDependencyNode::liftOp)
+  void replaceLeafWithAlloc(VirtualQID qid, DependencyNode *new_leaf) {
+    assert(new_leaf->qids.contains(qid) &&
+           "Replacement dependency has a different QID!");
+    assert(new_leaf->isAlloc() && "replaceLeafWithAlloc passed non-alloc");
+    auto first_use = getFirstUseOf(qid);
+    auto old_leaf = leafs[qid];
+    leafs[qid] = new_leaf;
+    for (uint i = 0; i < first_use->dependencies.size(); i++)
+      if (first_use->dependencies[i].node == old_leaf)
+        first_use->dependencies[i] =
+            DependencyNode::DependencyEdge(new_leaf, 0);
+    old_leaf->successors.remove(first_use);
+    new_leaf->successors.clear();
+    new_leaf->successors.insert(first_use);
+    allocs[qid] = static_cast<InitDependencyNode *>(new_leaf);
   }
 
-  /// Ensures that the graph is valid.
-  /// This is an expensive check that should only be used for
-  /// testing/debugging.
-  void validate() {
-    for (auto root : roots)
-      validateNode(root, total_height);
+  void replaceRoot(VirtualQID qid, DependencyNode *root) {
+    auto last_use = getLastUseOf(qid);
+    DependencyNode *old_root = getRootForQID(qid);
+
+    auto use = std::find_if(old_root->dependencies.begin(),
+                            old_root->dependencies.end(),
+                            [&](DependencyNode::DependencyEdge dep) -> bool {
+                              return dep.qid == qid;
+                            });
+
+    root->dependencies[0] = *use;
+    old_root->dependencies.erase(use);
+    if (old_root->dependencies.size() == 0)
+      roots.remove(old_root);
+
+    root->updateHeight();
+
+    if (tallest == old_root)
+      tallest = root;
+
+    roots.insert(root);
+    last_use->successors.remove(old_root);
+    last_use->successors.insert(root);
+  }
+
+  void removeAlloc(VirtualQID qid) {
+    assert(allocs.count(qid) == 1 && "Given qid not allocated in graph");
+    allocs.erase(allocs.find(qid));
+    auto toRemove = getRootForQID(qid);
+    roots.remove(toRemove);
+    // Reset tallest if needed
+    updateHeight();
+
+    removeQID(qid);
+  }
+
+  void removeQID(VirtualQID qid) {
+    leafs.erase(leafs.find(qid));
+    qids.remove(qid);
+  }
+
+  void updateHeight() {
+    total_height = 0;
+    tallest = nullptr;
+    SetVector<DependencyNode *> seen;
+    for (auto root : roots) {
+      updateHeight(seen, root);
+      if (!tallest || root->height > total_height) {
+        tallest = root;
+        total_height = root->height;
+      }
+    }
   }
 };
 
-struct DependencyAnalysisPass
-    : public cudaq::opt::impl::DependencyAnalysisBase<DependencyAnalysisPass> {
-  using DependencyAnalysisBase::DependencyAnalysisBase;
-  SmallVector<DependencyNode *> perOp;
+class RootDependencyNode : public OpDependencyNode {
+protected:
+  void printNode() override {
+    llvm::outs() << "Dealloc for QID ";
+    for (auto qid : qids)
+      llvm::outs() << qid;
+    llvm::outs() << ": ";
+    associated->dump();
+  }
 
-  /// Validates that \p op meets the assumptions:
-  /// * control flow operations are not allowed
-  bool validateOp(Operation *op) {
-    assert((!isQuakeOperation(op) || quake::isLinearValueForm(op) ||
-      dyn_cast<quake::DiscriminateOp>(op)) && "DependencyAnalysisPass requires all quake operations to be in value form");
-
-    if (op->getRegions().size() != 0) {
-      op->emitOpError(
-        "DependencyAnalysisPass cannot handle non-function operations with regions."
-        " Do you have if statements in a Base Profile QIR program?");
-      signalPassFailure();
-      return false;
+  bool isSkip() override { return true; }
+
+  uint numTicks() override { return 0; }
+
+  void codeGen(OpBuilder &builder, LifeTimeAnalysis &set) override {
+    assert(!hasCodeGen && "Returning same wire twice");
+    auto wire = dependencies[0].getValue();
+    auto newOp =
+        builder.create<quake::ReturnWireOp>(builder.getUnknownLoc(), wire);
+    newOp->setAttrs(associated->getAttrs());
+    newOp->removeAttr("dnodeid");
+    associated = newOp;
+    hasCodeGen = true;
+  }
+
+public:
+  RootDependencyNode(quake::ReturnWireOp op,
+                     SmallVector<DependencyEdge> dependencies)
+      : OpDependencyNode(op, dependencies) {
+    // numTicks won't be properly calculated by OpDependencyNode constructor,
+    // so have to recompute height here
+    updateHeight();
+  };
+};
+
+class ArgDependencyNode : public DependencyNode {
+  friend class DependencyBlock;
+  friend class IfDependencyNode;
+
+protected:
+  BlockArgument barg;
+
+  void printNode() override { barg.dump(); }
+
+  bool isRoot() override { return false; }
+  bool isLeaf() override { return true; }
+  bool isQuantumOp() override { return quake::isQuantumType(barg.getType()); }
+  uint numTicks() override { return 0; }
+
+  Value getResult(uint resultidx) override {
+    assert(resultidx == 0 && "Invalid resultidx");
+    return barg;
+  }
+
+  ValueRange getResults() override { return ValueRange({barg}); }
+
+  void codeGen(OpBuilder &builder, LifeTimeAnalysis &set) override{};
+
+  std::optional<VirtualQID> getQIDForResult(size_t resultidx) override {
+    assert(resultidx == 0 && "Invalid resultidx");
+    return std::optional(qids.front());
+  }
+
+public:
+  ArgDependencyNode(BlockArgument arg, DependencyEdge val) : barg(arg) {
+    auto qid = val->getQIDForResult(val.resultidx);
+    if (qid.has_value())
+      qids.insert(qid.value());
+  }
+
+  ArgDependencyNode(BlockArgument arg) : barg(arg) {}
+
+  virtual std::string getOpName() override {
+    return std::to_string(barg.getArgNumber()).append("arg");
+  };
+};
+
+class TerminatorDependencyNode : public OpDependencyNode {
+protected:
+  void printNode() override {
+    llvm::outs() << "Block Terminator With QIDs ";
+    bool printComma = false;
+    for (auto qid : qids) {
+      if (printComma)
+        llvm::outs() << ", ";
+      llvm::outs() << qid;
+      printComma = true;
     }
+    llvm::outs() << ": ";
+    associated->dump();
+  }
 
-    if (auto br = dyn_cast<mlir::BranchOpInterface>(op)) {
-      br.emitOpError(
-        "DependencyAnalysisPass cannot handle branching operations."
-        " Do you have if statements in a Base Profile QIR program?");
-      signalPassFailure();
-      return false;
+  void codeGen(OpBuilder &builder, LifeTimeAnalysis &set) override{};
+
+public:
+  TerminatorDependencyNode(Operation *terminator,
+                           SmallVector<DependencyEdge> dependencies)
+      : OpDependencyNode(terminator, dependencies) {
+    assert(terminator->hasTrait<mlir::OpTrait::ReturnLike>() &&
+           "Invalid terminator");
+  }
+
+  void genTerminator(OpBuilder &builder, LifeTimeAnalysis &set) {
+    OpDependencyNode::codeGen(builder, set);
+  }
+
+  void eraseQID(VirtualQID qid) override {
+    for (uint i = 0; i < dependencies.size(); i++)
+      if (dependencies[i].qid == qid)
+        dependencies.erase(dependencies.begin() + i);
+  }
+};
+
+class DependencyBlock {
+private:
+  SmallVector<ArgDependencyNode *> argdnodes;
+  DependencyGraph *graph;
+  Block *block;
+  TerminatorDependencyNode *terminator;
+  uint height;
+  SetVector<size_t> pqids;
+
+public:
+  DependencyBlock(SmallVector<ArgDependencyNode *> argdnodes,
+                  DependencyGraph *graph, Block *block,
+                  TerminatorDependencyNode *terminator)
+      : argdnodes(argdnodes), graph(graph), block(block),
+        terminator(terminator), pqids() {
+    height = graph->getHeight();
+  }
+
+  uint getHeight() { return height; }
+
+  void setCycle(uint cycle) { graph->setCycleOffset(cycle); }
+
+  SetVector<VirtualQID> getAllocs() { return graph->getAllocs(); }
+
+  SetVector<VirtualQID> getQIDs() { return graph->getQIDs(); }
+
+  OpDependencyNode *getFirstUseOf(VirtualQID qid) {
+    return graph->getFirstUseOf(qid);
+  }
+
+  OpDependencyNode *getLastUseOf(VirtualQID qid) {
+    return graph->getLastUseOf(qid);
+  }
+
+  void allocationPass(LifeTimeAnalysis &set) {
+    // Perform mapping inside-out
+    // New physical qubits will be captured by `set`
+    graph->allocationPass(set);
+
+    for (auto qid : getAllocs()) {
+      auto lifetime = graph->getLifeTimeForQID(qid);
+      LLVM_DEBUG(llvm::dbgs() << "Qid " << qid);
+      LLVM_DEBUG(llvm::dbgs()
+                 << " is in use from cycle " << lifetime->getBegin());
+      LLVM_DEBUG(llvm::dbgs() << " through cycle " << lifetime->getEnd());
+      LLVM_DEBUG(llvm::dbgs() << "\n");
+
+      auto phys = set.allocatePhysical(qid, lifetime);
+      LLVM_DEBUG(llvm::dbgs()
+                 << "\tIt is mapped to the physical qubit " << phys);
+      LLVM_DEBUG(llvm::dbgs() << "\n\n");
+
+      graph->assignToPhysical(qid, phys);
     }
+  }
 
-    if (auto call = dyn_cast<mlir::CallOpInterface>(op)) {
-      call.emitOpError("DependencyAnalysisPass does not support function calls");
-      signalPassFailure();
-      return false;
+  /// Up to caller to move builder outside block after construction
+  Block *codeGen(OpBuilder &builder, Region *region, LifeTimeAnalysis &set) {
+    Block *newBlock = builder.createBlock(region);
+    for (uint i = 0; i < argdnodes.size(); i++) {
+      auto old_barg = argdnodes[i]->barg;
+      argdnodes[i]->barg =
+          newBlock->addArgument(old_barg.getType(), old_barg.getLoc());
+      argdnodes[i]->hasCodeGen = true;
     }
 
-    return true;
+    builder.setInsertionPointToStart(newBlock);
+
+    for (uint cycle = 0; cycle < height; cycle++)
+      graph->codeGenAt(cycle, builder, set);
+
+    terminator->genTerminator(builder, set);
+
+    block = newBlock;
+
+    return newBlock;
   }
 
-  /// Validates that \p func meets the assumptions:
-  /// * function bodies contain a single block
-  /// * functions have no arguments
-  /// * functions have no results
-  bool validateFunc(func::FuncOp func) {
-    if (!func.getFunctionBody().hasOneBlock()) {
-      func.emitError("DependencyAnalysisPass cannot handle multiple blocks. Do "
-                     "you have if statements in a Base Profile QIR program?");
-      signalPassFailure();
-      return false;
+  void print() {
+    llvm::outs() << "Block with (" << argdnodes.size() << ") args:\n";
+    // block->dump();
+    // llvm::outs() << "Block graph:\n";
+    graph->print();
+    llvm::outs() << "End block\n";
+  }
+
+  void updateHeight() {
+    graph->updateHeight();
+    height = graph->getHeight();
+  }
+
+  // void performAnalysis(LifeTimeAnalysis &set) {
+  //   // First, move allocs in, this works outside-in
+
+  //   for (auto alloc : getAllocs()) {
+  //     auto first_use = getFirstUseOf(alloc);
+  //     auto last_use = getLastUseOf(alloc);
+  //     if (first_use == last_use && first_use->isContainer()) {
+  //       // TODO: move alloc inside
+  //       auto graph = graphMap[alloc];
+  //       auto root = graph->getRootForQID(alloc);
+  //       auto init = graph->getAllocForQID(alloc);
+  //       first_use->moveAllocIntoBlock(init, root, alloc);
+  //       graph->removeQID(alloc);
+  //     }
+  //   }
+  //   // Then, everything else works inside-out, so is handled elsewhere
+  //   for (auto graph : graphs)
+  //     graph->performAnalysis(set);
+  // }
+
+  /// Checks to see if qubits allocated within a block are only used
+  /// inside an `if` in that block, in which case they can be moved
+  /// inside the `if`.
+  ///
+  /// Works outside-in, to contract as tightly as possible.
+  void contractAllocsPass() {
+    // Look for contract-able allocations in this block
+    for (auto alloc : getAllocs()) {
+      auto first_use = getFirstUseOf(alloc);
+      auto last_use = getLastUseOf(alloc);
+      if (first_use == last_use && first_use->isContainer()) {
+        // Move alloc inside
+        auto root = graph->getRootForQID(alloc);
+        auto init = graph->getAllocForQID(alloc);
+        first_use->moveAllocIntoBlock(init, root, alloc);
+        // Qid is no longer used in this block, remove related metadata
+        graph->removeAlloc(alloc);
+      }
     }
 
-    // TODO: I think synthesis and inlining should cover this
-    //       so it may make sense to turn into an assert
-    if (func.getArguments().size() != 0) {
-      func.emitError(
-          "DependencyAnalysisPass cannot handle kernel arguments. "
-          "Was quake synthesis run before this pass?");
-      signalPassFailure();
-      return false;
+    // Outside-in, so recur only after applying pass to this block
+    graph->contractAllocsPass();
+  }
+
+  void performLiftingPass() { graph->performLiftingPass(); }
+
+  void moveAllocIntoBlock(DependencyNode *init, DependencyNode *root,
+                          VirtualQID qid) {
+    for (uint i = 0; i < argdnodes.size(); i++)
+      if (argdnodes[i]->qids.contains(qid))
+        argdnodes.erase(argdnodes.begin() + i);
+
+    graph->replaceLeafWithAlloc(qid, init);
+    graph->replaceRoot(qid, root);
+  }
+
+  void schedulingPass() { graph->schedulingPass(); }
+
+  void removeQID(VirtualQID qid) {
+    for (uint i = 0; i < argdnodes.size(); i++)
+      if (argdnodes[i]->qids.contains(qid)) {
+        argdnodes.erase(argdnodes.begin() + i);
+        break;
+      }
+
+    terminator->eraseQID(qid);
+    graph->removeQID(qid);
+  }
+};
+
+class IfDependencyNode : public OpDependencyNode {
+  friend class ArgDependencyNode;
+
+protected:
+  DependencyBlock *then_block;
+  DependencyBlock *else_block;
+  SmallVector<Type> results;
+
+  // TODO: figure out nice way to display
+  void printNode() override {
+    this->OpDependencyNode::printNode();
+    llvm::outs() << "Then ";
+    then_block->print();
+    llvm::outs() << "Else ";
+    else_block->print();
+  }
+
+  uint numTicks() override {
+    return std::max(then_block->getHeight(), else_block->getHeight());
+  }
+
+  bool isSkip() override { return numTicks() == 0; }
+
+  bool isQuantumOp() override { return numTicks() > 0; }
+
+  void liftOp(OpDependencyNode *op) {
+    auto newDeps = SmallVector<DependencyEdge>();
+
+    // Construct new dependencies
+    for (uint i = 0; i < op->dependencies.size(); i++) {
+      auto dependency = op->dependencies[i];
+      assert(!dependency->isAlloc() && "TODO");
+
+      if (!dependency->isQuantumOp()) {
+        newDeps.push_back(dependency);
+      } else if (dependency->isLeaf()) {
+        ArgDependencyNode *arg =
+            static_cast<ArgDependencyNode *>(dependency.node);
+        auto num = arg->barg.getArgNumber();
+        auto newDep = dependencies[num + 1];
+        newDeps.push_back(newDep);
+        newDep->successors.remove(this);
+        newDep->successors.insert(op);
+        arg->successors.remove(this);
+
+        dependencies[num + 1] =
+            DependencyEdge{op, op->getResultForDependency(i)};
+      }
     }
 
-    if (func.getNumResults() != 0) {
-      func.emitError(
-          "DependencyAnalysisPass cannot handle non-void return types for kernels");
-      signalPassFailure();
-      return false;
+    // Patch successors
+    op->erase();
+
+    op->successors.insert(this);
+    op->dependencies = newDeps;
+  }
+
+  void combineAllocs(SetVector<PhysicalQID> then_allocs,
+                     SetVector<PhysicalQID> else_allocs,
+                     LifeTimeAnalysis &set) {
+    SetVector<PhysicalQID> combined;
+    /*while (!then_allocs.empty() && !else_allocs.empty()) {
+      auto then_alloc = then_allocs.front();
+      then_allocs.erase(then_allocs.begin());
+      auto else_alloc = else_allocs.front();
+      else_allocs.erase(else_allocs.begin());
+      combined.insert(then_alloc);
+    }*/
+    combined.set_union(then_allocs);
+    combined.set_union(else_allocs);
+
+    for (auto pqid : combined)
+      set.reallocatePhysical(pqid, new LifeTime(cycle, cycle + numTicks()));
+  }
+
+  void allocationPass(LifeTimeAnalysis &set) override {
+    then_block->setCycle(cycle);
+    else_block->setCycle(cycle);
+    // set.pushFrame();
+    then_block->allocationPass(set);
+    auto then_allocs = set.popFrame();
+    // set.pushFrame();
+    else_block->allocationPass(set);
+    auto else_allocs = set.popFrame();
+    // TODO: function for combining pqids
+    combineAllocs(then_allocs, else_allocs, set);
+  };
+
+  void codeGen(OpBuilder &builder, LifeTimeAnalysis &set) override {
+    if (hasCodeGen)
+      return;
+
+    cudaq::cc::IfOp oldOp = dyn_cast<cudaq::cc::IfOp>(associated);
+    auto operands = gatherOperands(builder, set);
+
+    if (isSkip())
+      for (auto dependency : dependencies)
+        if (!dependency->hasCodeGen) {
+          if (dependency->isQuantumDependent())
+            // Wait for quantum op dependency to be codeGen'ed
+            return;
+          else
+            dependency->codeGen(builder, set);
+        }
+
+    auto newIf =
+        builder.create<cudaq::cc::IfOp>(oldOp->getLoc(), results, operands);
+    auto *then_region = &newIf.getThenRegion();
+    then_block->codeGen(builder, then_region, set);
+
+    auto *else_region = &newIf.getElseRegion();
+    else_block->codeGen(builder, else_region, set);
+
+    associated = newIf;
+    builder.setInsertionPointAfter(associated);
+    hasCodeGen = true;
+
+    // Ensure classical values are generated
+    for (auto successor : successors)
+      if (successor->isSkip())
+        successor->codeGen(builder, set);
+  };
+
+  void updateHeight() override {
+    height = 0;
+    for (auto edge : dependencies)
+      if (edge->getHeight() > height)
+        height = edge->getHeight();
+    height += numTicks();
+    then_block->updateHeight();
+    else_block->updateHeight();
+  }
+
+public:
+  IfDependencyNode(cudaq::cc::IfOp op, SmallVector<DependencyEdge> dependencies,
+                   DependencyBlock *then_block, DependencyBlock *else_block)
+      : OpDependencyNode(op.getOperation(), dependencies),
+        then_block(then_block), else_block(else_block) {
+    results = SmallVector<mlir::Type>(op.getResultTypes());
+    // numTicks won't be properly calculated by OpDependencyNode constructor,
+    // so have to recompute height here
+    height = 0;
+    for (auto edge : dependencies)
+      if (edge->getHeight() > height)
+        height = edge->getHeight();
+    height += numTicks();
+  }
+
+  void schedulingPass() override {
+    then_block->schedulingPass();
+    else_block->schedulingPass();
+  }
+
+  void contractAllocsPass() override {
+    then_block->contractAllocsPass();
+    else_block->contractAllocsPass();
+  }
+
+  void eraseQID(VirtualQID qid) override {
+    for (uint i = 0; i < dependencies.size(); i++)
+      if (dependencies[i].qid == qid)
+        results.erase(results.begin() + i - 1);
+
+    then_block->removeQID(qid);
+    else_block->removeQID(qid);
+    this->OpDependencyNode::eraseQID(qid);
+  }
+
+  void performLiftingPass() override {
+    then_block->performLiftingPass();
+    else_block->performLiftingPass();
+
+    bool run_more = true;
+
+    // Inside out, so recur first, then apply pass to this node
+    while (run_more) {
+      run_more = false;
+      for (auto qid : qids) {
+        auto then_use = then_block->getFirstUseOf(qid);
+        auto else_use = else_block->getFirstUseOf(qid);
+
+        if (!then_use || !else_use) {
+          if (!then_use && !else_use)
+            eraseQID(qid);
+          continue;
+        }
+
+        if (then_use->equivalentTo(else_use)) {
+          liftOp(then_use);
+          else_use->erase();
+          run_more = true;
+        }
+      }
     }
-    return true;
+
+    // Alloc case todo
+    // for (auto then_alloc : then_allocs) {
+    //   llvm::outs() << "QID: " << then_alloc << "\n";
+    //   auto then_use = then_block->getFirstUseOf(then_alloc);
+    //   // if (then_use->cycle > 0)
+    //   //   continue;
+    //   then_use->printNode();
+    //   for (auto else_alloc : else_allocs) {
+    //     auto else_use = else_block->getFirstUseOf(else_alloc);
+    //     else_use->printNode();
+    //     if (then_use->equivalentTo(else_use))
+    //       llvm::outs() << "The operation on alloc " << then_alloc << "/" <<
+    //       else_alloc << " can be lifted!\n";
+    //   }
+    // }
+  }
+
+  // void performAnalysis(LifeTimeAnalysis &set) override {
+  //   set.pushFrame();
+  //   // First, recur to settle Ifs inside blocks
+  //   then_block->performAnalysis(set);
+  //   else_block->performAnalysis(set);
+  //   // Lift operations as possible
+  //   performLifting();
+  //   // Recompute block heights after lifting
+  //   then_block->updateHeight();
+  //   else_block->updateHeight();
+  //   // TODO: mapToPhysical - update with context
+  //   mapToPhysical(set);
+  //   auto pqids = set.popFrame();
+  // }
+
+  bool isContainer() override { return true; }
+
+  void moveAllocIntoBlock(DependencyNode *init, DependencyNode *root,
+                          VirtualQID qid) override {
+    assert(successors.contains(root) && "Illegal root for contractAlloc");
+    assert(init->successors.contains(this) && "Illegal init for contractAlloc");
+    auto alloc = static_cast<InitDependencyNode *>(init);
+    auto alloc_copy = new InitDependencyNode(*alloc);
+    auto sink = static_cast<RootDependencyNode *>(root);
+    auto sink_copy = new RootDependencyNode(*sink);
+    init->successors.remove(this);
+    successors.remove(root);
+    then_block->moveAllocIntoBlock(alloc, root, qid);
+    else_block->moveAllocIntoBlock(alloc_copy, sink_copy, qid);
+    auto iter = std::find_if(dependencies.begin(), dependencies.end(),
+                             [init](DependencyNode::DependencyEdge edge) {
+                               return edge.node == init;
+                             });
+    size_t offset = iter - dependencies.begin();
+    associated->eraseOperand(offset);
+    results.erase(results.begin() + offset);
+    dependencies.erase(iter);
+
+    // Since we're removing a result, update the result indices of successors
+    for (auto successor : successors)
+      for (uint i = 0; i < successor->dependencies.size(); i++)
+        if (successor->dependencies[i].node == this &&
+            successor->dependencies[i].resultidx >= offset)
+          successor->dependencies[i].resultidx--;
+  }
+};
+
+/// Validates that \p op meets the assumptions:
+/// * control flow operations are not allowed
+bool validateOp(Operation *op) {
+  if (isQuakeOperation(op) && !quake::isLinearValueForm(op) &&
+      !dyn_cast<quake::DiscriminateOp>(op)) {
+    op->emitOpError(
+        "dep-analysis requires all quake operations to be in value form");
+    return false;
+  }
+
+  if (op->getRegions().size() != 0 && !dyn_cast<cudaq::cc::IfOp>(op)) {
+    op->emitOpError(
+        "control flow operations not currently supported in dep-analysis");
+    return false;
+  }
+
+  if (dyn_cast<mlir::BranchOpInterface>(op)) {
+    op->emitOpError(
+        "branching operations not currently supported in dep-analysis");
+    return false;
+  }
+
+  if (dyn_cast<mlir::CallOpInterface>(op)) {
+    op->emitOpError("function calls not currently supported in dep-analysis");
+    return false;
+  }
+
+  return true;
+}
+
+/// Validates that \p func meets the assumptions:
+/// * function bodies contain a single block
+/// * functions have no arguments
+/// * functions have no results
+[[maybe_unused]] bool validateFunc(func::FuncOp func) {
+  if (func.getBlocks().size() != 1) {
+    func.emitOpError("multiple blocks not currently supported in dep-analysis");
+    return false;
+  }
+
+  return true;
+}
+
+class DependencyAnalysisEngine {
+private:
+  SmallVector<DependencyNode *> perOp;
+  DenseMap<BlockArgument, ArgDependencyNode *> argMap;
+
+public:
+  DependencyAnalysisEngine() : perOp({}), argMap({}) {}
+
+  DependencyBlock *
+  visitBlock(mlir::Block *b,
+             SmallVector<DependencyNode::DependencyEdge> dependencies) {
+    SmallVector<ArgDependencyNode *> argdnodes;
+    for (auto targ : b->getArguments()) {
+      ArgDependencyNode *dnode;
+      // Entry block has no argument dependencies
+      if (dependencies.size() > 0)
+        dnode =
+            new ArgDependencyNode(targ, dependencies[targ.getArgNumber() + 1]);
+      else
+        dnode = new ArgDependencyNode(targ);
+      argMap[targ] = dnode;
+      argdnodes.push_back(dnode);
+    }
+
+    DenseMap<DependencyNode *, Operation *> roots;
+    TerminatorDependencyNode *terminator;
+    for (auto &op : b->getOperations()) {
+      bool isTerminator = (&op == b->getTerminator());
+      auto node = visitOp(&op, isTerminator);
+
+      if (!node)
+        return nullptr;
+
+      if (isEndOp(&op))
+        roots[node] = &op;
+
+      if (isTerminator) {
+        assert(op.hasTrait<mlir::OpTrait::IsTerminator>() &&
+               "Illegal terminator op!");
+        terminator = static_cast<TerminatorDependencyNode *>(node);
+      }
+    }
+
+    DependencyGraph *new_graph = new DependencyGraph(terminator);
+    auto included = new_graph->getRoots();
+
+    for (auto [root, op] : roots)
+      if (!included.contains(root))
+        op->emitWarning(
+            "DependencyAnalysisPass: Wire is dead code and its operations will "
+            "be deleted (did you forget to return a value?)");
+
+    return new DependencyBlock(argdnodes, new_graph, b, terminator);
   }
 
   /// Creates and returns a new dependency node for \p op, connecting it to the
   /// nodes created for the defining operations of the operands of \p op
-  DependencyNode *visitOp(Operation *op) {
+  DependencyNode *visitOp(Operation *op, bool isTerminator) {
     if (!validateOp(op))
       return nullptr;
 
-    SmallVector<DependencyNode *> dependencies(op->getNumOperands());
-    for (uint i = 0; i < op->getNumOperands(); i++) {
-      auto dependency = visitValue(op->getOperand(i));
-      assert(dependency && "dependency node not found for dependency");
-      dependencies[i] = dependency;
+    SmallVector<DependencyNode::DependencyEdge> dependencies;
+    for (uint i = 0; i < op->getNumOperands(); i++)
+      dependencies.push_back(visitValue(op->getOperand(i)));
+
+    DependencyNode *newNode;
+
+    if (auto init = dyn_cast<quake::BorrowWireOp>(op))
+      newNode = new InitDependencyNode(init);
+    else if (auto sink = dyn_cast<quake::ReturnWireOp>(op))
+      newNode = new RootDependencyNode(sink, dependencies);
+    else if (auto ifop = dyn_cast<cudaq::cc::IfOp>(op)) {
+      auto then_block = visitBlock(ifop.getThenEntryBlock(), dependencies);
+      auto else_block = visitBlock(ifop.getElseEntryBlock(), dependencies);
+      if (!then_block || !else_block)
+        return nullptr;
+      newNode =
+          new IfDependencyNode(ifop, dependencies, then_block, else_block);
+    } else if (isTerminator) {
+      newNode = new TerminatorDependencyNode(op, dependencies);
+    } else {
+      newNode = new OpDependencyNode(op, dependencies);
     }
 
-    DependencyNode *newNode = new DependencyNode(op, dependencies);
-
     // Dnodeid is the next slot of the dnode vector
     auto id = perOp.size();
 
@@ -609,212 +1643,79 @@ struct DependencyAnalysisPass
   /// Returns the dependency node for the defining operation of \p v
   /// Assumption: defining operation for \p v exists and already has been
   /// visited
-  DependencyNode *visitValue(Value v) {
-    // Skip classical values
-    auto defOp = v.getDefiningOp();
-    if (defOp) {
-      // Since we walk forward through the ast, every value should be defined
-      // before it is used, so we should have already visited defOp,
-      // and thus should have a memoized dnode for defOp, fail if not
-      assert(defOp->hasAttr("dnodeid") &&
-             "Error: no dnodeid found for operation");
-
-      auto id = defOp->getAttr("dnodeid").cast<IntegerAttr>().getUInt();
-      auto dnode = perOp[id];
-      return dnode;
-    }
-
-    // This means that v is a block argument which is not allowed
-    // Return null so the error can be handled nicely by visitOp
-    return nullptr;
-  }
-
-  /// Given a set of qubit lifetimes and a candidate lifetime,
-  /// tries to find a qubit to reuse.
-  /// The result either contains the optimal qubit to reuse,
-  /// or contains no value if no qubit can be reused
-  std::optional<size_t> findBestQubit(SmallVector<LifeTime *> lifetimes,
-                                      LifeTime *lifetime) {
-    std::optional<size_t> best;
-    uint best_distance = INT_MAX;
-    for (uint i = 0; i < lifetimes.size(); i++) {
-      LifeTime *other = lifetimes[i];
-      auto distance = lifetime->distance(other);
-      if (lifetime->isAfter(other) && distance < best_distance) {
-        best = i;
-        best_distance = distance;
-      }
-    }
-
-    return best;
-  }
-
-  uint getTotalCycles(SmallVector<DependencyGraph> &graphs) {
-    uint total = 0;
-    SmallVector<LifeTime *> lifetimes;
-    SmallVector<DependencyNode *> live_wires;
-
-    for (auto graph : graphs)
-      if (graph.getHeight() > total)
-        total = graph.getHeight();
-
-    return total;
-  }
-
-  /// Reorders the program based on the dependency graphs to reuse qubits
-  void codeGen(SmallVector<DependencyGraph> &graphs, OpBuilder &builder) {
-    SmallVector<LifeTime *> lifetimes;
-    SmallVector<DependencyNode *> sinks;
-    numCycles = getTotalCycles(graphs);
-
-    for (uint cycle = 0; cycle < numCycles; cycle++) {
-      for (auto graph : graphs) {
-        // For every "new" qubit, try to find an existing out-of-use qubit
-        // that we can reuse. Failing that, use a new qubit.
-        auto qids = graph.getFirstUsedAtCycle(cycle);
-        std::sort(qids.begin(), qids.end());
-        for (auto qid : qids) {
-          auto lifetime = graph.getLifeTimeForQID(qid);
-          LLVM_DEBUG(llvm::dbgs() << "Qid " << qid);
-          LLVM_DEBUG(llvm::dbgs()
-                     << " is in use from cycle " << lifetime->getBegin());
-          LLVM_DEBUG(llvm::dbgs() << " through cycle " << lifetime->getEnd());
-          LLVM_DEBUG(llvm::dbgs() << "\n");
-
-          auto new_qid = findBestQubit(lifetimes, lifetime);
-          if (!new_qid) {
-            // Can't reuse any qubits, have to allocate a new one
-            new_qid = lifetimes.size();
-            lifetimes.push_back(lifetime);
-            sinks.push_back(graph.getRootForQID(qid));
-            // Initialize the qubit with a null wire op
-            graph.initializeWire(qid, builder);
-            numPhysicalQubits++;
-          } else {
-            // We found a qubit we can reuse!
-            lifetimes[new_qid.value()] =
-                lifetime->combine(lifetimes[new_qid.value()]);
-
-            auto last_user = sinks[new_qid.value()];
-            // We assume that the result of the last use of the old qubit
-            // must have been a null wire (e.g., it was reset),
-            // so we can reuse that result as the initial value to reuse it
-            graph.initializeWireFromRoot(qid, last_user);
-            sinks[new_qid.value()] = graph.getRootForQID(qid);
-          }
-          
-          LLVM_DEBUG(llvm::dbgs() << "It is mapped to physical qubit ");
-          LLVM_DEBUG(llvm::dbgs() << new_qid.value() << "\n\n");
-        }
+  DependencyNode::DependencyEdge visitValue(Value v) {
+    if (auto barg = dyn_cast<BlockArgument>(v))
+      return DependencyNode::DependencyEdge{argMap[barg], 0};
 
-        graph.codeGenAt(cycle, builder);
-      }
-    }
+    auto defOp = v.getDefiningOp();
 
-    // Add teardown instructions
-    for (auto sink : sinks)
-      sink->addCleanUp(builder);
+    auto resultidx = dyn_cast<OpResult>(v).getResultNumber();
+    assert(defOp &&
+           "Cannot handle value that is neither a BlockArgument nor OpResult");
+    // Since we walk forward through the ast, every value should be defined
+    // before it is used, so we should have already visited defOp,
+    // and thus should have a memoized dnode for defOp, fail if not
+    assert(defOp->hasAttr("dnodeid") && "No dnodeid found for operation");
+
+    auto id = defOp->getAttr("dnodeid").cast<IntegerAttr>().getUInt();
+    auto dnode = perOp[id];
+    return DependencyNode::DependencyEdge{dnode, resultidx};
   }
+};
 
+struct DependencyAnalysisPass
+    : public cudaq::opt::impl::DependencyAnalysisBase<DependencyAnalysisPass> {
+  using DependencyAnalysisBase::DependencyAnalysisBase;
   void runOnOperation() override {
-    auto func = getOperation();
-    // Ignore non-quantum functions
-    if (!func->hasAttr("cudaq-kernel") || func.getBlocks().empty())
-      return;
+    auto mod = getOperation();
 
-    if (!validateFunc(func))
-      return;
+    for (auto &op : mod) {
+      if (auto func = dyn_cast<func::FuncOp>(op)) {
+        if (!func->hasAttr("cudaq-kernel"))
+          continue;
 
-    SetVector<DependencyNode *> roots;
+        if (func.getNumResults() == 0) {
+          func->emitRemark("Function marked 'cudaq-kernel' returns no results, "
+                           "qubit management skipped.");
+          continue;
+        }
 
-    for (auto &op : func.front().getOperations()) {
-      if (dyn_cast<func::ReturnOp>(op))
-        continue;
+        validateFunc(func);
+        Block *oldBlock = &func.front();
 
-      auto node = visitOp(&op);
+        auto engine = DependencyAnalysisEngine();
 
-      if (!node) {
-        signalPassFailure();
-        return;
-      }
+        auto body = engine.visitBlock(
+            oldBlock, SmallVector<DependencyNode::DependencyEdge>());
 
-      if (isBeginOp(&op))
-        numVirtualQubits++;
-      if (isEndOp(&op))
-        roots.insert(node);
-    }
-
-    assert(numVirtualQubits == roots.size() && "Too few sinks for qubits -- was add-dealloc run?");
+        if (!body) {
+          signalPassFailure();
+          return;
+        }
 
-    // Construct graphs from roots
-    SmallVector<DependencyGraph> graphs;
-    while (!roots.empty()) {
-      DependencyGraph new_graph(roots.front());
-      roots.set_subtract(new_graph.getRoots());
-      graphs.push_back(new_graph);
+        OpBuilder builder(func);
+        LifeTimeAnalysis set(cudaq::opt::topologyAgnosticWiresetName);
+        // Move allocs in as deep as possible
+        body->contractAllocsPass();
+        // Lift common operations out of `if`s
+        body->performLiftingPass();
+        // Update heights after lifting pass
+        body->updateHeight();
+        // Assign cycles to operations
+        body->schedulingPass();
+        // Using cycle information, allocate physical qubits
+        body->allocationPass(set);
+        // Use allocation information to update allocations
+        // body->assignPhysicalPass(set);
+        // Finally, perform code generation to move back to quake
+        body->codeGen(builder, &func.getRegion(), set);
+        builder.setInsertionPointToStart(mod.getBody());
+
+        // Replace old block
+        oldBlock->erase();
+      }
     }
-
-    // Validate the graphs only in debug mode
-    LLVM_DEBUG(for (auto graph : graphs) graph.validate(););
-
-    // Setup new block to replace function body
-    OpBuilder builder(func.getOperation());
-    Block *oldBlock = &func.front();
-    Block *newBlock = builder.createBlock(&func.getRegion());
-    SmallVector<mlir::Location> locs;
-    for (auto arg : oldBlock->getArguments())
-      locs.push_back(arg.getLoc());
-    newBlock->addArguments(oldBlock->getArgumentTypes(), locs);
-    builder.setInsertionPointToStart(newBlock);
-    // Generate optimized instructions in new block
-    codeGen(graphs, builder);
-    builder.create<func::ReturnOp>(builder.getUnknownLoc());
-
-    // Replace old block
-    oldBlock->erase();
   }
 };
 
-struct ManageQubitsPipelineOptions
-    : public PassPipelineOptions<ManageQubitsPipelineOptions> {
-  PassOptions::Option<bool> runQubitManagement{
-      *this, "run-qubit-management",
-      llvm::cl::desc(
-          "Runs qubit management pipeline. (default: true)"),
-      llvm::cl::init(true)};
-  PassOptions::Option<bool> dumpNumQubits{
-      *this, "dump-num-qubits",
-      llvm::cl::desc(
-          "Dumps the number of physical qubits used to STDERR. (default: false)"),
-      llvm::cl::init(false)};
-};
 } // namespace
-
-
-// TODO: ensure this is run only with BASE profile
-static void createQubitManagementPipeline(OpPassManager &pm) {
-  pm.addPass(createCanonicalizerPass());
-  pm.addPass(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createQuakeAddDeallocs());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createExpandControlVeqs());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createFactorQuantumAllocations());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createQuantumMemToReg());
-  pm.addPass(createCanonicalizerPass());
-  pm.addPass(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createAssignIDs());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createDependencyAnalysis());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createRegToMem());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createCombineQuantumAllocations());
-  pm.addNestedPass<func::FuncOp>(cudaq::opt::createDelayMeasurementsPass());
-  pm.addPass(createCanonicalizerPass());
-  pm.addPass(createCSEPass());
-}
-
-void cudaq::opt::registerQubitManagementPipeline() {
-  PassPipelineRegistration<ManageQubitsPipelineOptions>(
-      "qubit-management-pipeline",
-      "Map virtual qubits to physical qubits, minimizing the # of physical qubits.",
-      [](OpPassManager &pm, const ManageQubitsPipelineOptions &upo) {
-        createQubitManagementPipeline(pm);
-      });
-}
diff --git a/runtime/common/RuntimeMLIR.cpp b/runtime/common/RuntimeMLIR.cpp
index 3be57071c5..c8f0d27117 100644
--- a/runtime/common/RuntimeMLIR.cpp
+++ b/runtime/common/RuntimeMLIR.cpp
@@ -82,7 +82,6 @@ std::unique_ptr<MLIRContext> initializeMLIR() {
     registerToOpenQASMTranslation();
     registerToIQMJsonTranslation();
     cudaq::opt::registerUnrollingPipeline();
-    cudaq::opt::registerQubitManagementPipeline();
     cudaq::opt::registerTargetPipelines();
     mlirLLVMInitialized = true;
   }
diff --git a/runtime/common/RuntimeMLIR.h b/runtime/common/RuntimeMLIR.h
index 428061ca40..64049875a3 100644
--- a/runtime/common/RuntimeMLIR.h
+++ b/runtime/common/RuntimeMLIR.h
@@ -62,8 +62,7 @@ class Translation {
   mlir::LogicalResult operator()(mlir::Operation *op,
                                  llvm::raw_string_ostream &output,
                                  const std::string &additionalPasses,
-                                 bool printIR,
-                                 bool printIntermediateMLIR,
+                                 bool printIR, bool printIntermediateMLIR,
                                  bool printStats) const {
     return function(op, output, additionalPasses, printIR,
                     printIntermediateMLIR, printStats);
diff --git a/runtime/common/RuntimeMLIRCommonImpl.h b/runtime/common/RuntimeMLIRCommonImpl.h
index 076ee2aa7e..256343ad4f 100644
--- a/runtime/common/RuntimeMLIRCommonImpl.h
+++ b/runtime/common/RuntimeMLIRCommonImpl.h
@@ -471,10 +471,9 @@ void registerToQIRTranslation() {
       [](mlir::Operation *op, llvm::raw_string_ostream &output,                \
          const std::string &additionalPasses, bool printIR,                    \
          bool printIntermediateMLIR, bool printStats) {                        \
-        return qirProfileTranslationFunction(_profile, op, output,             \
-                                             additionalPasses, printIR,        \
-                                             printIntermediateMLIR,            \
-                                             printStats);                      \
+        return qirProfileTranslationFunction(                                  \
+            _profile, op, output, additionalPasses, printIR,                   \
+            printIntermediateMLIR, printStats);                                \
       })
 
   // Base Profile and Adaptive Profile are very similar, so they use the same
diff --git a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml
index adb60c5754..bd6753e31e 100644
--- a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml
+++ b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml
@@ -16,7 +16,7 @@ config:
   # Add the rest-qpu library to the link list
   link-libs: ["-lcudaq-rest-qpu"]
   # Define the lowering pipeline
-  platform-lowering-config: "const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),qubit-management-pipeline,ionq-gate-set-mapping"
+  platform-lowering-config: "const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),ionq-gate-set-mapping"
   # Tell the rest-qpu that we are generating QIR.
   codegen-emission: qir-base
   # Additional passes to run after lowering to QIR
diff --git a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml
index 7bed15d759..5cd17d7f57 100644
--- a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml
+++ b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml
@@ -16,7 +16,7 @@ config:
   # Add the rest-qpu library to the link list
   link-libs: ["-lcudaq-rest-qpu"]
   # Define the lowering pipeline
-  platform-lowering-config: "const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,qubit-management-pipeline,func.func(multicontrol-decomposition),oqc-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},regtomem)"
+  platform-lowering-config: "const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),oqc-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,canonicalize,cse,qubit-mapping{device=file(%QPU_ARCH%)},regtomem)"
   # Tell the rest-qpu that we are generating QIR.
   codegen-emission: qir-base
   # Library mode is only for simulators, physical backends must turn this off
diff --git a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml
index 4346f68dee..7c4ca36536 100644
--- a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml
+++ b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml
@@ -16,7 +16,7 @@ config:
   # Add the rest-qpu library to the link list
   link-libs: ["-lcudaq-rest-qpu"]
   # Define the lowering pipeline
-  platform-lowering-config: "const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping"
+  platform-lowering-config: "const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg),canonicalize,cse,add-wireset,func.func(assign-wire-indices),dep-analysis"
   # Tell the rest-qpu that we are generating Adaptive QIR.
   codegen-emission: qir-adaptive
   # Library mode is only for simulators, physical backends must turn this off
diff --git a/targettests/execution/qubit_management/classical_example1.cpp b/targettests/execution/qubit_management/classical_example1.cpp
deleted file mode 100644
index 23f7eadda4..0000000000
--- a/targettests/execution/qubit_management/classical_example1.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-// clang-format off
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target ionq --emulate %s -o %t && %t |& FileCheck %s
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target oqc  --emulate %s -o %t && %t |& FileCheck %s
-// RUN: nvq++ -std=c++17 --enable-mlir %s -o %t
-
-#include <cudaq.h>
-
-struct run_test {
-  __qpu__ auto operator()() {
-    cudaq::qubit q;
-    double x = 1.;
-
-    rx(x, q);
-    mz(q);
-  }
-};
-
-int main() {
-  auto counts = cudaq::sample(run_test{});
-  return 0;
-}
-
-// CHECK: (S) 1 num-cycles
-// CHECK: (S) 1 num-physical-qubits
-// CHECK: (S) 1 num-virtual-qubits
\ No newline at end of file
diff --git a/targettests/execution/qubit_management/classical_example2.cpp b/targettests/execution/qubit_management/classical_example2.cpp
deleted file mode 100644
index 6b49d5fa68..0000000000
--- a/targettests/execution/qubit_management/classical_example2.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-// clang-format off
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target ionq --emulate %s -o %t && %t |& FileCheck %s
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target oqc  --emulate %s -o %t && %t |& FileCheck %s
-// RUN: nvq++ -std=c++17 --enable-mlir %s -o %t
-
-#include <cudaq.h>
-
-struct run_test {
-  __qpu__ auto operator()() {
-    cudaq::qubit q,p;
-    double d = 1.;
-
-    rx(d, q);
-    x(q);
-    y(q);
-    rx(d,p);
-    x<cudaq::ctrl>(q,p);
-    mz(q);
-    mz(p);
-  }
-};
-
-int main() {
-  auto counts = cudaq::sample(run_test{});
-  return 0;
-}
-
-// CHECK: (S) 5 num-cycles
-// CHECK: (S) 2 num-physical-qubits
-// CHECK: (S) 2 num-virtual-qubits
\ No newline at end of file
diff --git a/targettests/execution/qubit_management/classical_example3.cpp b/targettests/execution/qubit_management/classical_example3.cpp
deleted file mode 100644
index affcdd7bf0..0000000000
--- a/targettests/execution/qubit_management/classical_example3.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-// clang-format off
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target ionq --emulate %s -o %t && %t |& FileCheck %s
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target oqc  --emulate %s -o %t && %t |& FileCheck %s
-// RUN: nvq++ -std=c++17 --enable-mlir %s -o %t
-
-#include <cudaq.h>
-
-struct run_test {
-  __qpu__ auto operator()() {
-    cudaq::qubit q,p;
-
-    h(q);
-    double d = (double)mz(q);
-    rx(d,p);
-    mz(p);
-  }
-};
-
-int main() {
-  auto counts = cudaq::sample(run_test{});
-  return 0;
-}
-
-// CHECK: (S) 3 num-cycles
-// CHECK: (S) 1 num-physical-qubits
-// CHECK: (S) 2 num-virtual-qubits
\ No newline at end of file
diff --git a/targettests/execution/qubit_management/reuse_example1.cpp b/targettests/execution/qubit_management/reuse_example1.cpp
deleted file mode 100644
index b97490c1c4..0000000000
--- a/targettests/execution/qubit_management/reuse_example1.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-// clang-format off
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target ionq --emulate %s -o %t && %t |& FileCheck %s
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target oqc  --emulate %s -o %t && %t |& FileCheck %s
-// RUN: nvq++ -std=c++17 --enable-mlir %s -o %t
-
-#include <cudaq.h>
-
-struct run_test {
-  __qpu__ auto operator()() {
-    cudaq::qubit q,p,r;
-
-    h(q);
-    h(p);
-    h(r);
-    x<cudaq::ctrl>(q,p);
-    // Ops on p
-    y(p);
-    z(p);
-    x<cudaq::ctrl>(r,p);
-    // Reset q
-    h(q);
-    // Reset r
-    h(r);
-    // Measure p
-    mz(p);
-  }
-};
-
-int main() {
-  auto counts = cudaq::sample(run_test{});
-  return 0;
-}
-
-// CHECK: (S) 6 num-cycles
-// CHECK: (S) 2 num-physical-qubits
-// CHECK: (S) 3 num-virtual-qubits
\ No newline at end of file
diff --git a/targettests/execution/qubit_management/scheduling_test1.cpp b/targettests/execution/qubit_management/scheduling_test1.cpp
deleted file mode 100644
index 4d25804118..0000000000
--- a/targettests/execution/qubit_management/scheduling_test1.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-// clang-format off
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target ionq --emulate %s -o %t && %t |& FileCheck %s
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target oqc  --emulate %s -o %t && %t |& FileCheck %s
-// RUN: nvq++ -std=c++17 --enable-mlir %s -o %t
-
-#include <cudaq.h>
-
-struct run_test {
-  __qpu__ auto operator()() {
-    cudaq::qubit q,p;
-
-    x<cudaq::ctrl>(q,p);
-    h(p);
-    x<cudaq::ctrl>(q,p);
-    mz(q);
-    mz(p);
-  }
-};
-
-int main() {
-  auto counts = cudaq::sample(run_test{});
-  return 0;
-}
-
-// CHECK: (S) 4 num-cycles
-// CHECK: (S) 2 num-physical-qubits
-// CHECK: (S) 2 num-virtual-qubits
\ No newline at end of file
diff --git a/targettests/execution/qubit_management/scheduling_test2.cpp b/targettests/execution/qubit_management/scheduling_test2.cpp
deleted file mode 100644
index 6ccc4eced6..0000000000
--- a/targettests/execution/qubit_management/scheduling_test2.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-// clang-format off
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target ionq --emulate %s -o %t && %t |& FileCheck %s
-// RUN: CUDAQ_MLIR_PASS_STATISTICS=true nvq++ %cpp_std --target oqc  --emulate %s -o %t && %t |& FileCheck %s
-// RUN: nvq++ -std=c++17 --enable-mlir %s -o %t
-
-#include <cudaq.h>
-
-struct run_test {
-  __qpu__ auto operator()() {
-    cudaq::qubit q,p,r;
-
-    h(r);
-    x(r);
-    y(r);
-    h(p);
-    x<cudaq::ctrl>(q,p);
-    x<cudaq::ctrl>(q,r);
-    z(p);
-    y(p);
-    x(p);
-    mz(p);
-  }
-};
-
-int main() {
-  auto counts = cudaq::sample(run_test{});
-  return 0;
-}
-
-// CHECK: (S) 6 num-cycles
-// CHECK: (S) 3 num-physical-qubits
-// CHECK: (S) 3 num-virtual-qubits
\ No newline at end of file