Skip to content

Commit

Permalink
Merge branch 'main' into new-intx-quantizer
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack-Khuu authored Feb 20, 2025
2 parents 76e8ec5 + f810de3 commit c2108d6
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 15 deletions.
33 changes: 25 additions & 8 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,16 @@ jobs:
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
echo "::endgroup::"
echo "::group::Run inference with quantize file"
for DEVICE in cpu; do # cuda
# cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
# follow up with torchao as a separate PR
echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
done
echo "::endgroup::"
test-gpu-aoti-float32:
permissions:
id-token: write
Expand Down Expand Up @@ -335,6 +345,11 @@ jobs:
fi
echo "::endgroup::"
# echo "::group::Run inference with quantize file"
# python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
# python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
# echo "::endgroup::"
test-gpu-aoti-float16:
permissions:
id-token: write
Expand Down Expand Up @@ -376,10 +391,15 @@ jobs:
echo "::group::Run inference with quantize file"
if [ $(uname -s) == Darwin ]; then
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
fi
echo "::endgroup::"
# echo "::group::Run inference with quantize file"
# python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
# python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
# echo "::endgroup::"
test-gpu-eval-sanity-check:
permissions:
id-token: write
Expand Down Expand Up @@ -495,10 +515,11 @@ jobs:
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "*** --quantize torchchat/quant_config/mobile.json ***"
echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
echo "******************************************"
# python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
Expand Down Expand Up @@ -1147,10 +1168,6 @@ jobs:
run: |
echo "Installing runner"
bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
- name: Install runner AOTI
id: install-runner-aoti
run: |
bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
- name: Run inference
run: |
python torchchat.py download stories110M
Expand Down
2 changes: 1 addition & 1 deletion install/.pins/et-pin.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
9836b39fe690e1906f133b4a233863149c30d499
791472d6706b027552f39f11b28d034e4839c9af
6 changes: 3 additions & 3 deletions install/install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
# NOTE: If a newly-fetched version of the executorch repo changes the value of
# PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
# package versions.
PYTORCH_NIGHTLY_VERSION=dev20250124
PYTORCH_NIGHTLY_VERSION=dev20250131

# Nightly version for torchvision
VISION_NIGHTLY_VERSION=dev20250124
VISION_NIGHTLY_VERSION=dev20250131

# Nightly version for torchtune
TUNE_NIGHTLY_VERSION=dev20250124
TUNE_NIGHTLY_VERSION=dev20250131

# The pip repository that hosts nightly torch packages. cpu by default.
# If cuda is available, based on presence of nvidia-smi, install the pytorch nightly
Expand Down
2 changes: 1 addition & 1 deletion torchchat/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def arg_init(args):
precision_handler = args.quantize.get("precision", None)
if precision_handler:
if precision_handler["dtype"] != args.dtype:
print('overriding json-specified dtype {precision_handler["dtype"]} with cli dtype {args.dtype}')
print(f'overriding json-specified dtype {precision_handler["dtype"]} with cli dtype {args.dtype}')
precision_handler["dtype"] = args.dtype

if getattr(args, "output_pte_path", None):
Expand Down
3 changes: 3 additions & 0 deletions torchchat/utils/scripts/build_native.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ if [[ "$TARGET" == "et" ]]; then
EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
install_torchao_executorch_ops
fi
elif [[ "$LINK_TORCHAO_OPS" == "ON" ]]; then
# Install OMP when using AOTI with linked torchao ops
brew install libomp
fi
popd

Expand Down
4 changes: 2 additions & 2 deletions torchchat/utils/scripts/install_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ install_executorch_python_libs() {
echo "Building and installing python libraries"
if [ "${ENABLE_ET_PYBIND}" = false ]; then
echo "Not installing pybind"
bash ./install_requirements.sh --pybind off
bash ./install_executorch.sh --pybind off
else
echo "Installing pybind"
bash ./install_requirements.sh --pybind xnnpack
bash ./install_executorch.sh --pybind xnnpack
fi

# TODO: figure out the root cause of 'AttributeError: module 'evaluate'
Expand Down

0 comments on commit c2108d6

Please sign in to comment.