From 6eeba935cddca44cd7670aa86ecb57c8b6b01d89 Mon Sep 17 00:00:00 2001
From: ademeure <arundemeure@gmail.com>
Date: Sat, 20 Jul 2024 12:12:15 +0000
Subject: [PATCH] uncommented asserts in kernel

---
 dev/cuda/matmul_backward_bias.cu | 4 ++--
 llmc/matmul.cuh                  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/dev/cuda/matmul_backward_bias.cu b/dev/cuda/matmul_backward_bias.cu
index 82db70da2..147c96306 100644
--- a/dev/cuda/matmul_backward_bias.cu
+++ b/dev/cuda/matmul_backward_bias.cu
@@ -477,8 +477,8 @@ __global__ void reduce_add_sum_kernel(floatX* dst, const float* src, size_t n, s
 template <int block_dim_x=2, int block_dim_y=512, bool accumulate=true, typename OutFloat=floatX>
 __global__ void column_reduction_kernel(OutFloat* output, const floatX* input,
                                         int num_rows, int num_columns, int row_stride) {
-    //assert(block_dim_x == blockDim.x && block_dim_y == blockDim.y); // check template parameters
-    //assert(num_columns == gridDim.x * block_dim_x * x128::size); // must match, no partial blocks
+    assert(block_dim_x == blockDim.x && block_dim_y == blockDim.y); // check template parameters
+    assert(num_columns == gridDim.x * block_dim_x * x128::size); // must match, no partial blocks
     constexpr int block_size = block_dim_x * block_dim_y;
     __shared__ float smem[block_size * x128::size];
 
diff --git a/llmc/matmul.cuh b/llmc/matmul.cuh
index fa26b3c0a..cf37cb5f5 100644
--- a/llmc/matmul.cuh
+++ b/llmc/matmul.cuh
@@ -24,8 +24,8 @@ Matrix Multiplication, with help from cuBLASLt
 template <int block_dim_x=2, int block_dim_y=512, bool accumulate=true, typename OutFloat=floatX>
 __global__ void column_reduction_kernel(OutFloat* output, const floatX* input,
                                         int num_rows, int num_columns, int row_stride) {
-    //assert(block_dim_x == blockDim.x && block_dim_y == blockDim.y); // check template parameters
-    //assert(num_columns == gridDim.x * block_dim_x * x128::size); // must match, no partial blocks
+    assert(block_dim_x == blockDim.x && block_dim_y == blockDim.y); // check template parameters
+    assert(num_columns == gridDim.x * block_dim_x * x128::size); // must match, no partial blocks
     constexpr int block_size = block_dim_x * block_dim_y;
     __shared__ float smem[block_size * x128::size];