Encoder fwd tested & working

karpathy · Jul 23, 2024 · eab8ac9 · eab8ac9
1 parent 1de35ca
commit eab8ac9
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 4 deletions.
diff --git a/llmc/encoder.cuh b/llmc/encoder.cuh
@@ -169,9 +169,6 @@ void encoder_forward(floatX* out,
     const int grid_size = CEIL_DIV(N, (int)(block_size * x128::size));
     encoder_forward_kernel3<<<grid_size, block_size, 0, stream>>>(out, inp, wte, wpe, B, T, C, use_kv);
 
-    // Create a CPU B*T*C size buffer and memcopy the out tensor to it
-
-
     if (use_kv) {
         inp -= kv_offset;
         wpe -= kv_offset * C;

diff --git a/train_gpt2.cu b/train_gpt2.cu
@@ -1744,7 +1744,7 @@ int main(int argc, char *argv[]) {
             }
             // now sample from the model autoregressively
             printf("generating:\n---\n");
-            model.use_kv = 0; // we need to use the KV cache for generation
+            model.use_kv = 1; // we need to use the KV cache for generation
             for (int t = 1; t < genT; t++) {
                 NvtxRange generation_range("Generation step", t);
                 // we try not to be too wasteful for inference by not calculating all of B,T