Skip to content

Commit

Permalink
Merge branch 'main' into Jack-Khuu-patch-23
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack-Khuu authored Feb 25, 2025
2 parents a3d8cd8 + 2766a95 commit 5d5615f
Show file tree
Hide file tree
Showing 27 changed files with 543 additions and 278 deletions.
12 changes: 12 additions & 0 deletions .ci/scripts/check_gibberish
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ else
fi
fi

#######################################################################
#
# check whether aspell spell check evailable

if command -v aspell &> /dev/null; then
echo "Checking $TMPFILE for gibberish"
else
echo "Aspell is not installed or not in PATH."
echo "Gibberish unchecked in $TMPFILE"
exit 0
fi

#######################################################################
#
# run spell check on the extracted sequence
Expand Down
206 changes: 66 additions & 140 deletions .ci/scripts/run-docs
Original file line number Diff line number Diff line change
@@ -1,145 +1,71 @@
# /bin/bash -x
#!/bin/bash -x

if [ "X$1" == "X" ]; then
# Check if an argument was provided
if [ -z "$1" ]; then
echo "Must specify document to run"
exit 1
fi

if [ "$1" == "readme" ]; then
echo "::group::Create script to run README"
python3 torchchat/utils/scripts/updown.py --create-sections --file README.md --replace 'llama3.1:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-readme.sh
echo "::endgroup::"

echo "::group::Run README"
echo "*******************************************"
cat ./run-readme.sh
echo "*******************************************"
bash -x ./run-readme.sh
echo "::endgroup::"

exit 0
fi

if [ "$1" == "quantization" ]; then
echo "::group::Create script to run quantization"
python3 torchchat/utils/scripts/updown.py --create-sections --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-quantization.sh
echo "::endgroup::"

echo "::group::Run quantization"
echo "*******************************************"
cat ./run-quantization.sh
echo "*******************************************"
bash -x ./run-quantization.sh
echo "::endgroup::"

exit 0
fi

if [ "$1" == "gguf" ]; then
echo "::group::Create script to run gguf"
python3 torchchat/utils/scripts/updown.py --file docs/GGUF.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-gguf.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-gguf.sh
echo "::endgroup::"

echo "::group::Run gguf"
echo "*******************************************"
cat ./run-gguf.sh
echo "*******************************************"
bash -x ./run-gguf.sh
echo "::endgroup::"
fi


if [ "$1" == "advanced" ]; then
echo "::group::Create script to run advanced"
python3 torchchat/utils/scripts/updown.py --file docs/ADVANCED-USERS.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-advanced.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-advanced.sh
echo "::endgroup::"

echo "::group::Run advanced"
echo "*******************************************"
cat ./run-advanced.sh
echo "*******************************************"
bash -x ./run-advanced.sh
echo "::endgroup::"
fi

if [ "$1" == "evaluation" ]; then
echo "::group::Create script to run evaluation"
python3 torchchat/utils/scripts/updown.py --file torchchat/utils/docs/evaluation.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-evaluation.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-evaluation.sh
echo "::endgroup::"

echo "::group::Run evaluation"
echo "*******************************************"
cat ./run-evaluation.sh
echo "*******************************************"
bash -x ./run-evaluation.sh
fi

if [ "$1" == "multimodal" ]; then

# Expecting that this might fail this test as-is, because
# it's the first on-pr test depending on github secrets for access with HF token access

echo "::group::Create script to run multimodal"
python3 torchchat/utils/scripts/updown.py --file docs/multimodal.md > ./run-multimodal.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-multimodal.sh
echo "::endgroup::"

echo "::group::Run multimodal"
echo "*******************************************"
cat ./run-multimodal.sh
echo "*******************************************"
bash -x ./run-multimodal.sh
echo "::endgroup::"
fi

if [ "$1" == "native" ]; then

echo "::group::Create script to run native-execution"
python3 torchchat/utils/scripts/updown.py --file docs/native-execution.md > ./run-native.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-native.sh
echo "::endgroup::"

echo "::group::Run native-execution"
echo "*******************************************"
cat ./run-native.sh
echo "*******************************************"
bash -x ./run-native.sh
echo "::endgroup::"
fi

if [ "$1" == "distributed" ]; then

echo "::group::Create script to run distributed"
python3 torchchat/utils/scripts/updown.py --file docs/distributed.md --replace 'llama3.1:stories110M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-distributed.sh
python3 torchchat/utils/scripts/updown.py --file docs/distributed.md --suppress huggingface-cli,HF_TOKEN > ./run-distributed.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-distributed.sh
echo "::endgroup::"

echo "::group::Run distributed"
echo "*******************************************"
cat ./run-distributed.sh
echo "*******************************************"
bash -x ./run-distributed.sh
echo "::endgroup::"
fi
# Pre-initialize variables
filepath=""
# cuda supports padding, so no need to replace quantization for now.
# otherwise add: 'cuda.json:cuda-32.json' to replace rules
parameters="--replace llama3:stories15M,-l3:-l2,mobile.json:mobile-32.json --suppress huggingface-cli,HF_TOKEN"
script_name="./run-${1}.sh" # Dynamically initialize script name

# Use a case statement to handle the $1 argument
case "$1" in
"readme")
filepath="README.md"
parameters="--replace llama3.1:stories15M,-l3:-l2,mobile.json:mobile-32.json --suppress huggingface-cli,HF_TOKEN"
;;
"quantization")
filepath="docs/quantization.md"
;;
"gguf")
filepath="docs/GGUF.md"
;;
"advanced")
filepath="docs/ADVANCED-USERS.md"
;;
"evaluation")
filepath="torchchat/utils/docs/evaluation.md"
;;
"multimodal")
filepath="docs/multimodal.md"
parameters="" # Clear parameters
;;
"native")
filepath="docs/native-execution.md"
parameters="" # Clear parameters
;;
"distributed")
filepath="docs/distributed.md"
parameters="--replace llama3.1:stories110M,-l3:-l2 --suppress huggingface-cli,HF_TOKEN" # Use stories110M to avoid need for authentication
;;
"local")
filepath="docs/local-model.md"
parameters="" # Clear parameters
;;

*)
echo "Unknown option: $1"
exit 1
;;
esac

# Generate the script
echo "::group::Create script to run $1"
python3 torchchat/utils/scripts/updown.py --file "$filepath" $parameters > "$script_name"
# if something happened to updown processor, and it did not error out, fail with an exit 1
echo "exit 1" >> "$script_name"
echo "::endgroup::"

# Run the script
echo "::group::Run $1"
echo "*******************************************"
cat "$script_name"
echo "*******************************************"
set -x
. "$script_name"
echo "::endgroup::"
69 changes: 67 additions & 2 deletions .github/workflows/more-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ jobs:
gpu-arch-version: "12.4"
timeout: 60
script: |
set -xeou pipefail
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
Expand All @@ -39,9 +40,10 @@ jobs:
echo "::endgroup::"
echo "::group::Run inference"
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_DIR=checkpoints/stories15M/
export MODEL_PATH=${MODEL_DIR}/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
for DTYPE in bfloat16 float16 float32; do
###################################################################
Expand Down Expand Up @@ -83,3 +85,66 @@ jobs:
echo "tests complete"
echo "******************************************"
echo "::endgroup::"
test-sdpa-backends-export:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.4"
timeout: 60
script: |
set -xeou pipefail
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Download checkpoints"
# Install requirements
./install/install_requirements.sh cuda
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoints"
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
echo "::endgroup::"
echo "::group::Run inference"
export MODEL_DIR=checkpoints/stories15M/
export MODEL_PATH=${MODEL_DIR}/stories15M.pt
export MODEL_NAME=stories15M
./torchchat/utils/scripts/build_native.sh aoti
for DEVICE in cpu cuda; do
# depending on how the parameter passing works, may only be able to do bfloat16 for aoti_run, similar to runner-cuda-dtype.yml
# (although the runner environment should not have an opinion what we us in the artifact, and we might suitably abstract that)
for DTYPE in bfloat16 float16 float32; do
for SDPA in 'math' 'flash_attention' 'efficient_attention' 'cudnn_attention'; do
echo "***************************************************************"
echo "*** $DEVICE $DTYPE $SDPA"
###################################################################
# Export DSO and run with Python
python torchchat.py export --output-dso dso.so --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE}
python torchchat.py generate --dso-path dso.so --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE} --temperature 0 --prompt "Once upon a time"
###################################################################
# Export AOTI and run with aoti_run
python torchchat.py export --output-aoti /tmp/model.pt2 --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE}
./cmake-out/aoti_run /tmp/model.pt2 -z ${MODEL_DIR}/tokenizer.model -i "Once upon a time"
###################################################################
done
done
done
echo "tests complete"
echo "******************************************"
echo "::endgroup::"
Loading

0 comments on commit 5d5615f

Please sign in to comment.