Skip to content

Commit

Permalink
Added compilation steps in makefile for permute.cu
Browse files Browse the repository at this point in the history
  • Loading branch information
indianspeedster committed Jul 25, 2024
1 parent 8952c52 commit a80bce6
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion dev/cuda/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ MPI_PATHS = -I/usr/lib/x86_64-linux-gnu/openmpi/include -L/usr/lib/x86_64-linux-
$(NVCC) $(CFLAGS) $(NVCCFLAGS) $< -o $@

# Build all targets
TARGETS = adamw attention_backward attention_forward classifier_fused crossentropy_forward crossentropy_softmax_backward encoder_backward encoder_forward gelu_backward gelu_forward layernorm_backward layernorm_forward matmul_backward matmul_backward_bias matmul_forward nccl_all_reduce residual_forward softmax_forward trimat_forward fused_residual_forward global_norm
TARGETS = adamw attention_backward attention_forward classifier_fused crossentropy_forward crossentropy_softmax_backward encoder_backward encoder_forward gelu_backward gelu_forward layernorm_backward layernorm_forward matmul_backward matmul_backward_bias matmul_forward nccl_all_reduce residual_forward softmax_forward trimat_forward fused_residual_forward global_norm permute
all: $(TARGETS)
all_ptx: $(TARGETS:%=%.ptx)
all_sass: $(TARGETS:%=%.sass)
Expand Down Expand Up @@ -64,6 +64,8 @@ matmul_backward: matmul_backward.cu
adamw: adamw.cu
global_norm: global_norm.cu

permute: permute.cu

# NCCL communication kernels
nccl_all_reduce: nccl_all_reduce.cu
$(NVCC) -lmpi -lnccl $(NVCCFLAGS) $(MPI_PATHS) nccl_all_reduce.cu -o nccl_all_reduce
Expand Down

0 comments on commit a80bce6

Please sign in to comment.