From 5971c26363991ced5130698bb9bcc32aeb94c882 Mon Sep 17 00:00:00 2001 From: Eli Uriegas <1700823+seemethere@users.noreply.github.com> Date: Wed, 10 Apr 2024 12:45:50 -0700 Subject: [PATCH] .github: Add basic gpu test workflow (#106) --- .github/workflows/compile_t4.yml | 185 +++++++++++++++---------------- 1 file changed, 92 insertions(+), 93 deletions(-) diff --git a/.github/workflows/compile_t4.yml b/.github/workflows/compile_t4.yml index ed853a330..3c9c33570 100644 --- a/.github/workflows/compile_t4.yml +++ b/.github/workflows/compile_t4.yml @@ -1,106 +1,105 @@ -name: Compile main +name: Run compile tests on: + pull_request: push: branches: - main - pull_request: workflow_dispatch: jobs: - run-tinystories: - strategy: - matrix: - runner: [4-core-ubuntu-gpu-t4] - runs-on: ${{matrix.runner}} - steps: - - name: Checkout repo - uses: actions/checkout@v2 - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: 3.11 - - name: Print machine info - run: | - uname -a - if [ $(uname -s) == Darwin ]; then - sysctl machdep.cpu.brand_string - sysctl machdep.cpu.core_count - fi - - name: Install requirements - run: | - pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121 - pip install -r requirements.txt - - name: Download checkpoints - run: | - mkdir -p checkpoints/stories15M - pushd checkpoints/stories15M - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt - wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model - popd - - name: Run inference - run: | - export MODEL_PATH=checkpoints/stories15M/stories15M.pt - export MODEL_NAME=stories15M - export MODEL_DIR=/tmp - python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager - cat ./output_eager - python generate.py --device cuda --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled - cat ./output_compiled - python export.py --device cuda --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so - python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti - cat ./output_aoti + test-cuda: + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "12.1" + script: | + echo "::group::Print machine info" + uname -a + if [ $(uname -s) == Darwin ]; then + sysctl machdep.cpu.brand_string + sysctl machdep.cpu.core_count + fi + echo "::endgroup::" + + echo "::group::Download checkpoints" + # Install requirements + pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121 + pip install -r requirements.txt + echo "::endgroup::" + + echo "::group::Download checkpoints" + mkdir -p checkpoints/stories15M + pushd checkpoints/stories15M + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt + wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model + popd + echo "::endgroup::" + + echo "::group::Run inference" + export MODEL_PATH=checkpoints/stories15M/stories15M.pt + export MODEL_NAME=stories15M + export MODEL_DIR=/tmp + python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + cat ./output_eager + python generate.py --device cuda --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled + cat ./output_compiled + python export.py --device cuda --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so + python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti + cat ./output_aoti - echo "******************************************" - echo "******* Emb: channel-wise quantized ******" - echo "******************************************" - python generate.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager - cat ./output_eager - python generate.py --device cuda --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled - cat ./output_compiled - python export.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so - python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti - cat ./output_aoti + echo "******************************************" + echo "******* Emb: channel-wise quantized ******" + echo "******************************************" + # python generate.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + # cat ./output_eager + # python generate.py --device cuda --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled + # cat ./output_compiled + # python export.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so + # python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti + # cat ./output_aoti - echo "******************************************" - echo "******** Emb: group-wise quantized *******" - echo "******************************************" - python generate.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager - cat ./output_eager - python generate.py --device cuda --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled - cat ./output_compiled - python export.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so - python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti - cat ./output_aoti + echo "******************************************" + echo "******** Emb: group-wise quantized *******" + echo "******************************************" + # python generate.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + # cat ./output_eager + # python generate.py --device cuda --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled + # cat ./output_compiled + # python export.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so + # python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti + # cat ./output_aoti - echo "******************************************" - echo "******* INT8 channel-wise quantized ******" - echo "******************************************" - python generate.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager - cat ./output_eager - python generate.py --device cuda --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled - cat ./output_compiled - python export.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so - python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti - cat ./output_aoti + echo "******************************************" + echo "******* INT8 channel-wise quantized ******" + echo "******************************************" + # python generate.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + # cat ./output_eager + # python generate.py --device cuda --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled + # cat ./output_compiled + # python export.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so + # python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti + # cat ./output_aoti - echo "******************************************" - echo "******** INT8 group-wise quantized *******" - echo "******************************************" - python generate.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager - cat ./output_eager - python generate.py --device cuda --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled - cat ./output_compiled - python export.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so - python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti - cat ./output_aoti + echo "******************************************" + echo "******** INT8 group-wise quantized *******" + echo "******************************************" + # python generate.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + # cat ./output_eager + # python generate.py --device cuda --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled + # cat ./output_compiled + # python export.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so + # python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti + # cat ./output_aoti - echo "tests complete" - echo "******************************************" - # echo "********* EAGER vs TORCH.COMPILE *********" - # echo "******************************************" - # diff output_eager output_compiled - # echo "******************************************" - # echo "********* EAGER vs AOT INDUCTOR *********" - # echo "******************************************" - # diff output_eager output_aoti + echo "tests complete" + echo "******************************************" + echo "::endgroup::" + # echo "********* EAGER vs TORCH.COMPILE *********" + # echo "******************************************" + # diff output_eager output_compiled + # echo "******************************************" + # echo "********* EAGER vs AOT INDUCTOR *********" + # echo "******************************************" + # diff output_eager output_aoti