Merge branch 'main' of https://github.com/NVIDIA/cuda-quantum into us…

…eless-stores Signed-off-by: Anna Gringauze <agringauze@nvidia.com>
NVIDIA · Feb 2, 2025 · 0b739e8 · 0b739e8
2 parents 9b55600 + 3b0f04c
commit 0b739e8
Show file tree

Hide file tree

Showing 5 changed files with 100 additions and 35 deletions.
diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py
@@ -1089,6 +1089,8 @@ def visit_Assign(self, node):
                 self.subscriptPushPointerValue = True
                 # Visit the subscript node, get the pointer value
                 self.visit(node.targets[0])
+                # Reset the push pointer value flag
+                self.subscriptPushPointerValue = False
                 ptrVal = self.popValue()
                 if not cc.PointerType.isinstance(ptrVal.type):
                     self.emitFatalError(
@@ -1108,8 +1110,6 @@ def visit_Assign(self, node):
                 valueToStore = self.popValue()
                 # Store the value
                 cc.StoreOp(valueToStore, ptrVal)
-                # Reset the push pointer value flag
-                self.subscriptPushPointerValue = False
                 return
 
         else:
@@ -1456,11 +1456,13 @@ def get_full_module_path(partial_path):
 
                 # The total number of elements in the iterable
                 # we are generating should be `N == endVal - startVal`
-                totalSize = math.AbsIOp(arith.SubIOp(endVal,
-                                                     startVal).result).result
+                actualSize = arith.SubIOp(endVal, startVal).result
+                totalSize = math.AbsIOp(actualSize).result
 
                 # If the step is not == 1, then we also have
                 # to update the total size for the range iterable
+                actualSize = arith.DivSIOp(actualSize,
+                                           math.AbsIOp(stepVal).result).result
                 totalSize = arith.DivSIOp(totalSize,
                                           math.AbsIOp(stepVal).result).result
 
@@ -1499,7 +1501,7 @@ def bodyBuilder(iterVar):
                                             isDecrementing=isDecrementing)
 
                 self.pushValue(iterable)
-                self.pushValue(totalSize)
+                self.pushValue(actualSize)
                 return
 
             if node.func.id == 'enumerate':

diff --git a/python/tests/kernel/test_kernel_features.py b/python/tests/kernel/test_kernel_features.py
@@ -557,6 +557,74 @@ def kernel(myList: list[int]):
     assert '1' * 5 in counts
 
 
+def test_list_boundaries():
+
+    @cudaq.kernel
+    def kernel1():
+        qubits = cudaq.qvector(2)
+        r = range(0, 0)
+        for i in r:
+            x(qubits[i])
+
+    counts = cudaq.sample(kernel1)
+    assert len(counts) == 1
+    assert '00' in counts
+
+    @cudaq.kernel
+    def kernel2():
+        qubits = cudaq.qvector(2)
+        r = range(1, 0)
+        for i in r:
+            x(qubits[i])
+
+    counts = cudaq.sample(kernel2)
+    assert len(counts) == 1
+    assert '00' in counts
+
+    @cudaq.kernel
+    def kernel3():
+        qubits = cudaq.qvector(2)
+        for i in range(-1):
+            x(qubits[i])
+
+    counts = cudaq.sample(kernel3)
+    assert len(counts) == 1
+    assert '00' in counts
+
+    @cudaq.kernel
+    def kernel4():
+        qubits = cudaq.qvector(4)
+        r = [i * 2 + 1 for i in range(-1)]
+        for i in r:
+            x(qubits[i])
+
+    counts = cudaq.sample(kernel4)
+    assert len(counts) == 1
+    assert '0000' in counts
+
+    @cudaq.kernel
+    def kernel5():
+        qubits = cudaq.qvector(4)
+        r = [i * 2 + 1 for i in range(0)]
+        for i in r:
+            x(qubits[i])
+
+    counts = cudaq.sample(kernel5)
+    assert len(counts) == 1
+    assert '0000' in counts
+
+    @cudaq.kernel
+    def kernel6():
+        qubits = cudaq.qvector(4)
+        r = [i * 2 + 1 for i in range(2)]
+        for i in r:
+            x(qubits[i])
+
+    counts = cudaq.sample(kernel6)
+    assert len(counts) == 1
+    assert '0101' in counts
+
+
 def test_control_operations():
 
     @cudaq.kernel

diff --git a/python/tests/mlir/test_output_qir.py b/python/tests/mlir/test_output_qir.py
@@ -25,9 +25,7 @@ def ghz(numQubits: int):
     print(cudaq.translate(ghz_synth, format='qir-base'))
 
 
-# CHECK:    %[[VAL_0:.*]] = tail call
-# CHECK:    %[[VAL_1:.*]]* @__quantum__rt__qubit_allocate_array(i64
-# CHECK:    %[[VAL_2:.*]])
+# CHECK:         %[[VAL_0:.*]] = tail call %[[VAL_1:.*]]* @__quantum__rt__qubit_allocate_array(i64 %[[VAL_2:.*]])
 # CHECK:         %[[VAL_3:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_1]]* %[[VAL_0]], i64 0)
 # CHECK:         %[[VAL_4:.*]] = bitcast i8* %[[VAL_3]] to %[[VAL_5:.*]]**
 # CHECK:         %[[VAL_6:.*]] = load %[[VAL_5]]*, %[[VAL_5]]** %[[VAL_4]], align 8
@@ -43,25 +41,24 @@ def ghz(numQubits: int):
 # CHECK:         store i64 %[[VAL_14]], i64* %[[VAL_16]], align 8
 # CHECK:         %[[VAL_15]] = add nuw nsw i64 %[[VAL_14]], 1
 # CHECK:         %[[VAL_17:.*]] = icmp slt i64 %[[VAL_15]], %[[VAL_7]]
-# CHECK:         br i1 %[[VAL_17]], label %[[VAL_11]], label %[[VAL_12]]
-# CHECK:       ._crit_edge:                                      ; preds = %[[VAL_11]], %[[VAL_13]]
-# CHECK:         %[[VAL_18:.*]] = alloca { i64, i64 }, i64 %[[VAL_8]], align 8
-# CHECK:         %[[VAL_19:.*]] = icmp sgt i64 %[[VAL_8]], 0
-# CHECK:         br i1 %[[VAL_19]], label %[[VAL_20:.*]], label %[[VAL_21:.*]]
+# CHECK:         br i1 %[[VAL_17]], label %[[VAL_11]], label %[[VAL_21:.*]]
+# CHECK:       ._crit_edge:                                      ; preds = %[[VAL_11]]
+# CHECK:         %[[VAL_18:.*]] = alloca { i64, i64 }, i64 %[[VAL_7]], align 8
+# CHECK:         br i1 %[[VAL_10]], label %[[VAL_20:.*]], label %[[VAL_21]]
 # CHECK:       .preheader:                                       ; preds = %[[VAL_20]]
-# CHECK:         br i1 %[[VAL_19]], label %[[VAL_22:.*]], label %[[VAL_21]]
-# CHECK:       .lr.ph9:                                          ; preds = %[[VAL_12]], %[[VAL_20]]
-# CHECK:         %[[VAL_23:.*]] = phi i64 [ %[[VAL_24:.*]], %[[VAL_20]] ], [ 0, %[[VAL_12]] ]
+# CHECK:         br i1 %[[VAL_10]], label %[[VAL_22:.*]], label %[[VAL_21]]
+# CHECK:       .lr.ph10:                                          ; preds = %[[VAL_21]], %[[VAL_20]]
+# CHECK:         %[[VAL_23:.*]] = phi i64 [ %[[VAL_24:.*]], %[[VAL_20]] ], [ 0, %[[VAL_21]] ]
 # CHECK:         %[[VAL_25:.*]] = getelementptr i64, i64* %[[VAL_9]], i64 %[[VAL_23]]
 # CHECK:         %[[VAL_26:.*]] = load i64, i64* %[[VAL_25]], align 8
 # CHECK:         %[[VAL_27:.*]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAL_18]], i64 %[[VAL_23]], i32 0
 # CHECK:         store i64 %[[VAL_23]], i64* %[[VAL_27]], align 8
 # CHECK:         %[[VAL_28:.*]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAL_18]], i64 %[[VAL_23]], i32 1
 # CHECK:         store i64 %[[VAL_26]], i64* %[[VAL_28]], align 8
 # CHECK:         %[[VAL_24]] = add nuw nsw i64 %[[VAL_23]], 1
-# CHECK:         %[[VAL_29:.*]] = icmp slt i64 %[[VAL_24]], %[[VAL_8]]
+# CHECK:         %[[VAL_29:.*]] = icmp slt i64 %[[VAL_24]], %[[VAL_7]]
 # CHECK:         br i1 %[[VAL_29]], label %[[VAL_20]], label %[[VAL_30:.*]]
-# CHECK:       .lr.ph10:                                         ; preds = %[[VAL_30]], %[[VAL_22]]
+# CHECK:       .lr.ph11:                                         ; preds = %[[VAL_30]], %[[VAL_22]]
 # CHECK:         %[[VAL_31:.*]] = phi i64 [ %[[VAL_32:.*]], %[[VAL_22]] ], [ 0, %[[VAL_30]] ]
 # CHECK:         %[[VAL_33:.*]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAL_18]], i64 %[[VAL_31]], i32 0
 # CHECK:         %[[VAL_34:.*]] = load i64, i64* %[[VAL_33]], align 8
@@ -76,9 +73,9 @@ def ghz(numQubits: int):
 # CHECK:         %[[VAL_43:.*]] = load %[[VAL_5]]*, %[[VAL_5]]** %[[VAL_42]], align 8
 # CHECK:         tail call void (i64, void (%[[VAL_1]]*, %[[VAL_5]]*)*, ...) @invokeWithControlQubits(i64 1, void (%[[VAL_1]]*, %[[VAL_5]]*)* nonnull @__quantum__qis__x__ctl, %[[VAL_5]]* %[[VAL_39]], %[[VAL_5]]* %[[VAL_43]])
 # CHECK:         %[[VAL_32]] = add nuw nsw i64 %[[VAL_31]], 1
-# CHECK:         %[[VAL_44:.*]] = icmp slt i64 %[[VAL_32]], %[[VAL_8]]
+# CHECK:         %[[VAL_44:.*]] = icmp slt i64 %[[VAL_32]], %[[VAL_7]]
 # CHECK:         br i1 %[[VAL_44]], label %[[VAL_22]], label %[[VAL_21]]
-# CHECK:       ._crit_edge11:                                    ; preds = %[[VAL_22]], %[[VAL_12]], %[[VAL_30]]
+# CHECK:       ._crit_edge12:                                    ; preds = %[[VAL_22]], %[[VAL_13]], %[[VAL_21]], %[[VAL_30]]
 # CHECK:         tail call void @__quantum__rt__qubit_release_array(%[[VAL_1]]* %[[VAL_0]])
 # CHECK:         ret void
 

diff --git a/python/tests/mlir/test_output_translate_qir.py b/python/tests/mlir/test_output_translate_qir.py
@@ -25,9 +25,7 @@ def ghz(numQubits: int):
     print(cudaq.translate(ghz_synth, format='qir-base'))
 
 
-# CHECK:    %[[VAL_0:.*]] = tail call
-# CHECK:    %[[VAL_1:.*]]* @__quantum__rt__qubit_allocate_array(i64
-# CHECK:    %[[VAL_2:.*]])
+# CHECK:         %[[VAL_0:.*]] = tail call %[[VAL_1:.*]]* @__quantum__rt__qubit_allocate_array(i64 %[[VAL_2:.*]])
 # CHECK:         %[[VAL_3:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_1]]* %[[VAL_0]], i64 0)
 # CHECK:         %[[VAL_4:.*]] = bitcast i8* %[[VAL_3]] to %[[VAL_5:.*]]**
 # CHECK:         %[[VAL_6:.*]] = load %[[VAL_5]]*, %[[VAL_5]]** %[[VAL_4]], align 8
@@ -43,25 +41,24 @@ def ghz(numQubits: int):
 # CHECK:         store i64 %[[VAL_14]], i64* %[[VAL_16]], align 8
 # CHECK:         %[[VAL_15]] = add nuw nsw i64 %[[VAL_14]], 1
 # CHECK:         %[[VAL_17:.*]] = icmp slt i64 %[[VAL_15]], %[[VAL_7]]
-# CHECK:         br i1 %[[VAL_17]], label %[[VAL_11]], label %[[VAL_12]]
-# CHECK:       ._crit_edge:                                      ; preds = %[[VAL_11]], %[[VAL_13]]
-# CHECK:         %[[VAL_18:.*]] = alloca { i64, i64 }, i64 %[[VAL_8]], align 8
-# CHECK:         %[[VAL_19:.*]] = icmp sgt i64 %[[VAL_8]], 0
-# CHECK:         br i1 %[[VAL_19]], label %[[VAL_20:.*]], label %[[VAL_21:.*]]
+# CHECK:         br i1 %[[VAL_17]], label %[[VAL_11]], label %[[VAL_21:.*]]
+# CHECK:       ._crit_edge:                                      ; preds = %[[VAL_11]]
+# CHECK:         %[[VAL_18:.*]] = alloca { i64, i64 }, i64 %[[VAL_7]], align 8
+# CHECK:         br i1 %[[VAL_10]], label %[[VAL_20:.*]], label %[[VAL_21]]
 # CHECK:       .preheader:                                       ; preds = %[[VAL_20]]
-# CHECK:         br i1 %[[VAL_19]], label %[[VAL_22:.*]], label %[[VAL_21]]
-# CHECK:       .lr.ph9:                                          ; preds = %[[VAL_12]], %[[VAL_20]]
-# CHECK:         %[[VAL_23:.*]] = phi i64 [ %[[VAL_24:.*]], %[[VAL_20]] ], [ 0, %[[VAL_12]] ]
+# CHECK:         br i1 %[[VAL_10]], label %[[VAL_22:.*]], label %[[VAL_21]]
+# CHECK:       .lr.ph10:                                          ; preds = %[[VAL_21]], %[[VAL_20]]
+# CHECK:         %[[VAL_23:.*]] = phi i64 [ %[[VAL_24:.*]], %[[VAL_20]] ], [ 0, %[[VAL_21]] ]
 # CHECK:         %[[VAL_25:.*]] = getelementptr i64, i64* %[[VAL_9]], i64 %[[VAL_23]]
 # CHECK:         %[[VAL_26:.*]] = load i64, i64* %[[VAL_25]], align 8
 # CHECK:         %[[VAL_27:.*]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAL_18]], i64 %[[VAL_23]], i32 0
 # CHECK:         store i64 %[[VAL_23]], i64* %[[VAL_27]], align 8
 # CHECK:         %[[VAL_28:.*]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAL_18]], i64 %[[VAL_23]], i32 1
 # CHECK:         store i64 %[[VAL_26]], i64* %[[VAL_28]], align 8
 # CHECK:         %[[VAL_24]] = add nuw nsw i64 %[[VAL_23]], 1
-# CHECK:         %[[VAL_29:.*]] = icmp slt i64 %[[VAL_24]], %[[VAL_8]]
+# CHECK:         %[[VAL_29:.*]] = icmp slt i64 %[[VAL_24]], %[[VAL_7]]
 # CHECK:         br i1 %[[VAL_29]], label %[[VAL_20]], label %[[VAL_30:.*]]
-# CHECK:       .lr.ph10:                                         ; preds = %[[VAL_30]], %[[VAL_22]]
+# CHECK:       .lr.ph11:                                         ; preds = %[[VAL_30]], %[[VAL_22]]
 # CHECK:         %[[VAL_31:.*]] = phi i64 [ %[[VAL_32:.*]], %[[VAL_22]] ], [ 0, %[[VAL_30]] ]
 # CHECK:         %[[VAL_33:.*]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAL_18]], i64 %[[VAL_31]], i32 0
 # CHECK:         %[[VAL_34:.*]] = load i64, i64* %[[VAL_33]], align 8
@@ -76,9 +73,9 @@ def ghz(numQubits: int):
 # CHECK:         %[[VAL_43:.*]] = load %[[VAL_5]]*, %[[VAL_5]]** %[[VAL_42]], align 8
 # CHECK:         tail call void (i64, void (%[[VAL_1]]*, %[[VAL_5]]*)*, ...) @invokeWithControlQubits(i64 1, void (%[[VAL_1]]*, %[[VAL_5]]*)* nonnull @__quantum__qis__x__ctl, %[[VAL_5]]* %[[VAL_39]], %[[VAL_5]]* %[[VAL_43]])
 # CHECK:         %[[VAL_32]] = add nuw nsw i64 %[[VAL_31]], 1
-# CHECK:         %[[VAL_44:.*]] = icmp slt i64 %[[VAL_32]], %[[VAL_8]]
+# CHECK:         %[[VAL_44:.*]] = icmp slt i64 %[[VAL_32]], %[[VAL_7]]
 # CHECK:         br i1 %[[VAL_44]], label %[[VAL_22]], label %[[VAL_21]]
-# CHECK:       ._crit_edge11:                                    ; preds = %[[VAL_22]], %[[VAL_12]], %[[VAL_30]]
+# CHECK:       ._crit_edge12:                                    ; preds = %[[VAL_22]], %[[VAL_13]], %[[VAL_21]], %[[VAL_30]]
 # CHECK:         tail call void @__quantum__rt__qubit_release_array(%[[VAL_1]]* %[[VAL_0]])
 # CHECK:         ret void
 

diff --git a/runtime/nvqir/cudensitymat/CMakeLists.txt b/runtime/nvqir/cudensitymat/CMakeLists.txt
@@ -19,6 +19,7 @@ find_file(CUDENSITYMAT_INC
         $ENV{CUQUANTUM_INSTALL_PREFIX}/include      
         /usr/include    
         ENV CPATH
+    REQUIRED
 )
 
 message(STATUS "cudensitymat header: ${CUDENSITYMAT_INC}")