Skip to content

Commit

Permalink
uncommented asserts in kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
ademeure committed Jul 20, 2024
1 parent 27f0654 commit 6eeba93
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions dev/cuda/matmul_backward_bias.cu
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,8 @@ __global__ void reduce_add_sum_kernel(floatX* dst, const float* src, size_t n, s
template <int block_dim_x=2, int block_dim_y=512, bool accumulate=true, typename OutFloat=floatX>
__global__ void column_reduction_kernel(OutFloat* output, const floatX* input,
int num_rows, int num_columns, int row_stride) {
//assert(block_dim_x == blockDim.x && block_dim_y == blockDim.y); // check template parameters
//assert(num_columns == gridDim.x * block_dim_x * x128::size); // must match, no partial blocks
assert(block_dim_x == blockDim.x && block_dim_y == blockDim.y); // check template parameters
assert(num_columns == gridDim.x * block_dim_x * x128::size); // must match, no partial blocks
constexpr int block_size = block_dim_x * block_dim_y;
__shared__ float smem[block_size * x128::size];

Expand Down
4 changes: 2 additions & 2 deletions llmc/matmul.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ Matrix Multiplication, with help from cuBLASLt
template <int block_dim_x=2, int block_dim_y=512, bool accumulate=true, typename OutFloat=floatX>
__global__ void column_reduction_kernel(OutFloat* output, const floatX* input,
int num_rows, int num_columns, int row_stride) {
//assert(block_dim_x == blockDim.x && block_dim_y == blockDim.y); // check template parameters
//assert(num_columns == gridDim.x * block_dim_x * x128::size); // must match, no partial blocks
assert(block_dim_x == blockDim.x && block_dim_y == blockDim.y); // check template parameters
assert(num_columns == gridDim.x * block_dim_x * x128::size); // must match, no partial blocks
constexpr int block_size = block_dim_x * block_dim_y;
__shared__ float smem[block_size * x128::size];

Expand Down

0 comments on commit 6eeba93

Please sign in to comment.