diff --git a/.ci/scripts/convert_checkpoint.sh b/.ci/scripts/convert_checkpoint.sh new file mode 100644 index 000000000..c83edc92e --- /dev/null +++ b/.ci/scripts/convert_checkpoint.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +set -eu + +function convert_checkpoint() { + local MODEL_REPO="$1" + local CHECKPOINT_NAME="${MODEL_REPO##*/}" + + if [[ $CHECKPOINT_NAME == *"stories15M"* || $CHECKPOINT_NAME == *"stories42M"* || $CHECKPOINT_NAME == *"stories110M"* ]]; then + # We need this to make the workflow unique for all models because convert_hf_checkpoint will always convert the checkpoint to model.pth + pushd "checkpoints/${MODEL_REPO}" + if [ ! -f "model.pth" ]; then + mv "$CHECKPOINT_NAME.pt" "model.pth" + fi + popd + return 0 + fi + + if [ -f "checkpoints/$MODEL_REPO/model.pth" ]; then + echo "Converted checkpoint already exists. Skipping conversion for $MODEL_REPO." + return 0 + fi + echo "Convert Huggingface checkpoint for $MODEL_REPO" + python scripts/convert_hf_checkpoint.py --checkpoint-dir "checkpoints/$MODEL_REPO" +} + + +convert_checkpoint $1 diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py new file mode 100644 index 000000000..0583a569c --- /dev/null +++ b/.ci/scripts/gather_test_models.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import itertools +import json +import os +from typing import Any + + +MODEL_REPOS = { + "tinyllamas/stories15M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin", + # "tinyllamas/stories42M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories42M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin", + "tinyllamas/stories110M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin", +} + +JOB_RUNNERS = { + "32-core-ubuntu": "linux x86", + "macos-13": "macos x86", + "macos-14": "macos M1", +} + + +def set_output(name: str, val: Any) -> None: + """ + Set the GitHb output so that it can be accessed by other jobs + """ + print(f"Setting {val} to GitHub output") + + if os.getenv("GITHUB_OUTPUT"): + with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env: + print(f"{name}={val}", file=env) + else: + print(f"::set-output name={name}::{val}") + + +def export_models_for_ci() -> dict[str, dict]: + """ + This gathers all the models that we want to test on GitHub OSS CI + """ + + # This is the JSON syntax for configuration matrix used by GitHub + # https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs + models = {"include": []} + + for repo_name, runner in itertools.product( + MODEL_REPOS.keys(), + JOB_RUNNERS.keys(), + ): + record = { + "repo_name": repo_name, + "resources": MODEL_REPOS[repo_name], + "runner": runner, + "platform": JOB_RUNNERS[runner], + "timeout": 90, + } + + models["include"].append(record) + + set_output("models", json.dumps(models)) + + +if __name__ == "__main__": + export_models_for_ci() diff --git a/.ci/scripts/validate.sh b/.ci/scripts/validate.sh new file mode 100644 index 000000000..ca9766d4c --- /dev/null +++ b/.ci/scripts/validate.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +set -u + +function generate_eager_model_output() { + local CHECKPOINT_PATH="$1" + local TARGET_DEVICE="${2:-cpu}" + local MODEL_DIR="${CHECKPOINT_PATH%/*}" + local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//') + echo "Run inference with eager model for $MODEL_NAME" + python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" + cat "$MODEL_DIR/output_eager" +} + +function generate_compiled_model_output() { + local CHECKPOINT_PATH="$1" + local TARGET_DEVICE="${2:-cpu}" + local MODEL_DIR="${CHECKPOINT_PATH%/*}" + local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//') + echo ""############### Run inference with torch.compile for $MODEL_NAME "###############" + python -W ignore generate.py --compile --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" + cat "$MODEL_DIR/output_compiled" +} + +function generate_aoti_model_output() { + local CHECKPOINT_PATH="$1" + local TARGET_DEVICE="${2:-cpu}" + local MODEL_DIR="${CHECKPOINT_PATH%/*}" + local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//') + echo ""############### Run inference with AOTInductor for $MODEL_NAME "###############" + python -W ignore export.py --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path "${MODEL_DIR}/${MODEL_NAME}.so" --device "$TARGET_DEVICE" + python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --dso-path "$MODEL_DIR/${MODEL_NAME}.so" --prompt "$PROMPT" > "$MODEL_DIR/output_aoti" + cat "$MODEL_DIR/output_aoti" +} + +function generate_executorch_model_output() { + local CHECKPOINT_PATH="$1" + local TARGET_DEVICE="${2:-cpu}" + local MODEL_DIR="${CHECKPOINT_PATH%/*}" + local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//') + echo ""############### Run inference with ExecuTorch using XNNPACK for $MODEL_NAME "###############" + python -W ignore export.py --checkpoint-path "$CHECKPOINT_PATH" --output-pte-path "$MODEL_DIR/${MODEL_NAME}.pte" -d "fp32" + python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" --pte-path "$MODEL_DIR/${MODEL_NAME}.pte" > "$MODEL_DIR/output_et" + cat "$MODEL_DIR/output_et" +} + + +CHECKPOINT_PATH="$1" +TARGET_DEVICE="${2:-cpu}" +PROMPT="Hello, my name is" + +generate_compiled_model_output $CHECKPOINT_PATH $TARGET_DEVICE +generate_aoti_model_output $CHECKPOINT_PATH $TARGET_DEVICE +generate_executorch_model_output $CHECKPOINT_PATH $TARGET_DEVICE diff --git a/.ci/scripts/wget_checkpoint.sh b/.ci/scripts/wget_checkpoint.sh new file mode 100644 index 000000000..eb9e59cfc --- /dev/null +++ b/.ci/scripts/wget_checkpoint.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +MODEL_REPO="$1" +RESOURCES_STRING="$2" +CHECKPOINT_NAME="${MODEL_REPO##*/}" + +pushd "${LLAMA_FAST_ROOT}" || exit + +# Create the directory for the checkpoint +mkdir -p "checkpoints/${MODEL_REPO}" +cd "checkpoints/${MODEL_REPO}" || exit + +# Download all resources +IFS=',' # Set the field separator to comma +for resource in $RESOURCES_STRING; do + echo "Downloading: $resource" + if ! wget "$resource" 2>&1; then + echo "Error: Failed to download $resource" >&2 + exit 1 + fi +done + +popd || exit diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml new file mode 100644 index 000000000..5576f1d4d --- /dev/null +++ b/.github/workflows/pull.yml @@ -0,0 +1,59 @@ +name: pull + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + gather-models: + runs-on: ubuntu-22.04 + outputs: + models: ${{ steps.gather-models.outputs.models }} + steps: + - uses: actions/checkout@v3 + with: + submodules: 'false' + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Extract the list of models to test + id: gather-models + run: | + set -eux + PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py + test-cpu: + name: test-cpu (${{ matrix.platform }}, ${{ matrix.repo_name }}) + needs: gather-models + strategy: + matrix: ${{ fromJSON(needs.gather-models.outputs.models) }} + fail-fast: false + runs-on: ${{ matrix.runner }} + env: + LLAMA_FAST_ROOT: ${{ github.workspace }} + REPO_NAME: ${{ matrix.repo_name }} + ENABKE_ET_PYBIND: ${{ matrix.runner == 'macos-14' && 'false' || 'true' }} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Print machine info + run: | + echo "$(uname -a)" + - name: Install dependencies + run: | + bash ${LLAMA_FAST_ROOT}/scripts/install_et.sh $ENABKE_ET_PYBIND + - name: Download checkpoints + run: | + bash ${LLAMA_FAST_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}" + - name: Run validation + run: | + pushd ${LLAMA_FAST_ROOT} + export CHECKPOINT_PATH=${LLAMA_FAST_ROOT}/checkpoints/${REPO_NAME}/model.pth + bash ${LLAMA_FAST_ROOT}/.ci/scripts/convert_checkpoint.sh ${REPO_NAME} + bash ${LLAMA_FAST_ROOT}/.ci/scripts/validate.sh ${CHECKPOINT_PATH} diff --git a/.github/workflows/runner_et.yml b/.github/workflows/runner_et.yml index da84507fc..a8da11487 100644 --- a/.github/workflows/runner_et.yml +++ b/.github/workflows/runner_et.yml @@ -37,8 +37,8 @@ jobs: pip install -r requirements.txt export LLAMA_FAST_ROOT=${PWD} - export ET_NO_PYBIND=1 - ./scripts/install_et.sh + export ENABLE_ET_PYBIND=false + ./scripts/install_et.sh $ENABLE_ET_PYBIND cmake -S ./runner-et -B build/cmake-out -G Ninja cmake --build ./build/cmake-out - name: Download checkpoints diff --git a/scripts/install_et.sh b/scripts/install_et.sh index a40c07f01..7537c95b3 100755 --- a/scripts/install_et.sh +++ b/scripts/install_et.sh @@ -1,32 +1,63 @@ -cd ${LLAMA_FAST_ROOT} -echo "Inside: $LLAMA_FAST_ROOT" - -echo "Cloning executorch to ${LLAMA_FAST_ROOT}/build/src" -rm -rf ${LLAMA_FAST_ROOT}/build -mkdir -p ${LLAMA_FAST_ROOT}/build/src -cd ${LLAMA_FAST_ROOT}/build/src -git clone https://github.com/pytorch/executorch.git -cd executorch -echo "Install executorch: submodule update" -git submodule sync -git submodule update --init - -echo "Applying fixes" -cp ${LLAMA_FAST_ROOT}/scripts/fixes_et/module.cpp ${LLAMA_FAST_ROOT}/build/src/executorch/extension/module/module.cpp # ET uses non-standard C++ that does not compile in GCC -cp ${LLAMA_FAST_ROOT}/scripts/fixes_et/managed_tensor.h ${LLAMA_FAST_ROOT}/build/src/executorch/extension/runner_util/managed_tensor.h # ET is missing headers for vector/memory. This causes downstream issues when building runner-et. - -echo "Building and installing python libraries" -if [ -n "${ET_NO_PYBIND}" ]; then - echo "Not installing pybind" - ./install_requirements.sh -else - echo "Installing pybind" - ./install_requirements.sh --pybind xnnpack -fi - -echo "Building and installing C++ libraries" -echo "Inside: ${PWD}" -mkdir cmake-out -cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_OPTIMIZED=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON -DEXECUTORCH_BUILD_XNNPACK=ON -S . -B cmake-out -G Ninja -cmake --build cmake-out -cmake --install cmake-out --prefix ${LLAMA_FAST_ROOT}/build/install +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +install_pip_dependencies() { + echo "Intalling common pip packages" + + pip install wheel + pip install cmake + pip install ninja + pip install zstd + pushd ${LLAMA_FAST_ROOT} + pip install -r ./requirements.txt + popd +} + +install_executorch() { + echo "Cloning executorch to ${LLAMA_FAST_ROOT}/build/src" + rm -rf ${LLAMA_FAST_ROOT}/build + mkdir -p ${LLAMA_FAST_ROOT}/build/src + pushd ${LLAMA_FAST_ROOT}/build/src + git clone https://github.com/pytorch/executorch.git + cd executorch + echo "Install executorch: submodule update" + git submodule sync + git submodule update --init + + echo "Applying fixes" + cp ${LLAMA_FAST_ROOT}/scripts/fixes_et/module.cpp ${LLAMA_FAST_ROOT}/build/src/executorch/extension/module/module.cpp # ET uses non-standard C++ that does not compile in GCC + cp ${LLAMA_FAST_ROOT}/scripts/fixes_et/managed_tensor.h ${LLAMA_FAST_ROOT}/build/src/executorch/extension/runner_util/managed_tensor.h # ET is missing headers for vector/memory. This causes downstream issues when building runner-et. + + echo "Building and installing python libraries" + echo "Building and installing python libraries" + if [ "${ENABLE_ET_PYBIND}" = false ]; then + echo "Not installing pybind" + bash ./install_requirements.sh + else + echo "Installing pybind" + bash ./install_requirements.sh --pybind xnnpack + fi + pip list + + echo "Building and installing C++ libraries" + echo "Inside: ${PWD}" + mkdir cmake-out + cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_OPTIMIZED=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON -DEXECUTORCH_BUILD_XNNPACK=ON -S . -B cmake-out -G Ninja + cmake --build cmake-out + cmake --install cmake-out --prefix ${LLAMA_FAST_ROOT}/build/install + popd +} + + +ENABLE_ET_PYBIND="${1:-true}" + +pushd ${LLAMA_FAST_ROOT} +install_pip_dependencies +install_executorch $ENABLE_ET_PYBIND +popd diff --git a/scripts/workflow.sh b/scripts/workflow.sh new file mode 100644 index 000000000..00c31266e --- /dev/null +++ b/scripts/workflow.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +set -eu + +function download_tinyllamas() { + local MODEL_REPO="$1" + local FORCE_DOWNLOAD="${2:-false}" + local CHECKPOINT_DIR="checkpoints/$MODEL_REPO" + local MODEL_NAME="${MODEL_REPO##*/}" + + if [ "$FORCE_DOWNLOAD" = true ] || [ ! -d "$CHECKPOINT_DIR" ] || [ -z "$(ls -A "$CHECKPOINT_DIR")" ]; then + echo "Download checkpoint for $MODEL_REPO" + rm -rf "$CHECKPOINT_DIR" + + mkdir -p checkpoints/$MODEL_REPO + pushd checkpoints/$MODEL_REPO + wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/${MODEL_NAME}.pt" + wget "https://github.com/karpathy/llama2.c/raw/master/tokenizer.model" + wget "https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin" + popd + else + echo "Checkpoint directory for $MODEL_REPO is not empty. Skipping download." + fi +} + +function download_checkpoint() { + local MODEL_REPO="$1" + local FORCE_DOWNLOAD="${2:-false}" + local CHECKPOINT_DIR="checkpoints/$MODEL_REPO" + + if [ "$MODEL_REPO" = "tinyllamas/stories15M" ] || [ "$MODEL_REPO" = "tinyllamas/stories42M" ] || [ "$MODEL_REPO" = "tinyllamas/stories110M" ]; then + echo "Download checkpoint for $MODEL_REPO" + download_tinyllamas "$MODEL_REPO" "$FORCE_DOWNLOAD" + return 0 + fi + + if [ "$FORCE_DOWNLOAD" = true ] || [ ! -d "$CHECKPOINT_DIR" ] || [ -z "$(ls -A "$CHECKPOINT_DIR")" ]; then + echo "Download checkpoint for $MODEL_REPO" + rm -rf "$CHECKPOINT_DIR" + python scripts/download.py --repo-id "$MODEL_REPO" + else + echo "Checkpoint directory for $MODEL_REPO is not empty. Skipping download." + fi +} + + +# List of models to validate +MODEL_REPOS=( + "tinyllamas/stories15M" + # "tinyllamas/stories42M" + "tinyllamas/stories110M" + # "mistralai/Mistral-7B-v0.1" + # "mistralai/Mistral-7B-Instruct-v0.1" + # "mistralai/Mistral-7B-Instruct-v0.2" + # "openlm-research/open_llama_7b" + # "codellama/CodeLlama-7b-Python-hf" + # "codellama/CodeLlama-34b-Python-hf" + # "meta-llama/Llama-2-7b-chat-hf" + # "meta-llama/Llama-2-13b-chat-hf" + # "meta-llama/Llama-2-70b-chat-hf" +) + +PROMPT="Hello, my name is" +DEVICE="${1:-cpu}" +CHECKPOINT_FILENAME="model.pth" + +echo "###############################################################" +echo "############## Start LLama-fast Model Validation ##############" +echo "###############################################################" +for MODEL_REPO in "${MODEL_REPOS[@]}"; do + echo "############### Validating ${MODEL_REPO##*/} ###############" + download_checkpoint "$MODEL_REPO" + bash .ci/scripts/convert_checkpoint.sh "$MODEL_REPO" + + set +e + CHECKPOINT_PATH="checkpoints/$MODEL_REPO/$CHECKPOINT_FILENAME" + bash .ci/scripts/validate.sh "$CHECKPOINT_PATH" "$DEVICE" +done