Skip to content

Commit

Permalink
Encoder fwd tested & working
Browse files Browse the repository at this point in the history
  • Loading branch information
gordicaleksa committed Jul 23, 2024
1 parent 1de35ca commit eab8ac9
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 4 deletions.
3 changes: 0 additions & 3 deletions llmc/encoder.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,6 @@ void encoder_forward(floatX* out,
const int grid_size = CEIL_DIV(N, (int)(block_size * x128::size));
encoder_forward_kernel3<<<grid_size, block_size, 0, stream>>>(out, inp, wte, wpe, B, T, C, use_kv);

// Create a CPU B*T*C size buffer and memcopy the out tensor to it


if (use_kv) {
inp -= kv_offset;
wpe -= kv_offset * C;
Expand Down
2 changes: 1 addition & 1 deletion train_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1744,7 +1744,7 @@ int main(int argc, char *argv[]) {
}
// now sample from the model autoregressively
printf("generating:\n---\n");
model.use_kv = 0; // we need to use the KV cache for generation
model.use_kv = 1; // we need to use the KV cache for generation
for (int t = 1; t < genT; t++) {
NvtxRange generation_range("Generation step", t);
// we try not to be too wasteful for inference by not calculating all of B,T
Expand Down

0 comments on commit eab8ac9

Please sign in to comment.