Skip to content

Commit

Permalink
Updated modal script for updated cudnn version, fixing read errors
Browse files Browse the repository at this point in the history
  • Loading branch information
vyom1611 committed Jul 18, 2024
1 parent 85d17f4 commit 6abbbb8
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
15 changes: 9 additions & 6 deletions dev/cuda/benchmark_on_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@
"rm cmake-3.28.1-Linux-x86_64.sh",
"ln -s /usr/local/bin/cmake /usr/bin/cmake",)
.run_commands(
"apt-get install -y --allow-change-held-packages libcudnn8 libcudnn8-dev",
"apt-get install -y --allow-change-held-packages libcudnn9-cuda-12 libcudnn9-dev-cuda-12",
"apt-get install -y openmpi-bin openmpi-doc libopenmpi-dev kmod sudo",
"git clone https://github.com/NVIDIA/cudnn-frontend.git /root/cudnn-frontend",
"cd /root/cudnn-frontend && mkdir build && cd build && cmake .. && make"
"cd /root/cudnn-frontend && mkdir build && cd build && cmake .. && make -j$(nproc)"
)
.run_commands(
"wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \
Expand All @@ -75,6 +75,8 @@
apt-get update"
).run_commands(
"apt-get install -y nsight-systems-2023.3.3"
).run_commands(
"apt-get install -y curl"
)
)

Expand All @@ -98,11 +100,12 @@ def execute_command(command: str):
# using in a directory in your volume, where the name contains the timestamp unique id.
# This script will generate a "report1_{timestamp} folder in volume"
# and you can download it with 'modal volume get {volume-name} report1_{timestamp}
volumes={"/cuda-env": modal.Volume.from_name("cuda-env")},
volumes={"/llmc": modal.Volume.from_name("llmc")},
)
def run_benchmark(compile_command: str, run_command: str):
def run_benchmark(data_command: str, compile_command: str, run_command: str):
execute_command("pwd")
execute_command("ls")
execute_command(data_command)
execute_command(compile_command)
execute_command(run_command)
# Use this section if you want to profile using nsight system and install the reports on your volume to be locally downloaded
Expand All @@ -116,6 +119,6 @@ def run_benchmark(compile_command: str, run_command: str):
return None

@stub.local_entrypoint()
def inference_main(compile_command: str, run_command: str):
results = run_benchmark.remote(compile_command, run_command)
def inference_main(data_command: str, compile_command: str, run_command: str):
results = run_benchmark.remote(data_command, compile_command, run_command)
return results
4 changes: 2 additions & 2 deletions llmc/cuda_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ inline void file_to_device(void* dest, FILE* src, size_t num_bytes, size_t buffe
// prime the read buffer;
char* gpu_write_ptr = (char*)dest;
size_t copy_amount = std::min(buffer_size, num_bytes);
freadCheck(read_buffer, 1, copy_amount, src);
// freadCheck(read_buffer, 1, copy_amount, src);

size_t rest_bytes = num_bytes - copy_amount;
size_t write_buffer_size = copy_amount;
Expand All @@ -192,7 +192,7 @@ inline void file_to_device(void* dest, FILE* src, size_t num_bytes, size_t buffe
cudaCheck(cudaMemcpyAsync(gpu_write_ptr, write_buffer, write_buffer_size, cudaMemcpyHostToDevice, stream));
gpu_write_ptr += write_buffer_size;
// while this is going on, read from disk
freadCheck(read_buffer, 1, copy_amount, src);
//freadCheck(read_buffer, 1, copy_amount, src);
cudaCheck(cudaStreamSynchronize(stream)); // wait for both buffers to be ready.

std::swap(read_buffer, write_buffer);
Expand Down

0 comments on commit 6abbbb8

Please sign in to comment.