diff --git a/.github/actions/release/action.yaml b/.github/actions/release/action.yaml new file mode 100644 index 000000000..38157f59e --- /dev/null +++ b/.github/actions/release/action.yaml @@ -0,0 +1,174 @@ +name: release +description: "Cut a release of all LeapfrogAI artifacts" + +inputs: + releaseTag: + description: The release tag to be published, cannot be left empty + required: true + subRepository: + description: The sub-repository to publish the artifacts to + required: false + default: /uds/ + registry1Username: + description: Registry1 Username + registry1Password: + description: Registry1 Password + ghToken: + description: GitHub Token + chainguardIdentity: + description: Chainguard login identity + +runs: + using: composite + + steps: + - name: Setup UDS Environment + uses: defenseunicorns/uds-common/.github/actions/setup@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1 + with: + registry1Username: ${{ inputs.registry1Username }} + registry1Password: ${{ inputs.registry1Password }} + ghToken: ${{ inputs.ghToken }} + chainguardIdentity: ${{ inputs.chainguardIdentity }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # v3.0.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@0d103c3126aa41d772a8362f6aa67afac040f80c # v3.1.0 + + - name: Setup Python + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + with: + python-version-file: "pyproject.toml" + + - name: Install Dev Dependencies + shell: bash + run: | + python -m pip install ".[dev]" ".[dev-vllm]" ".[dev-whisper]" + + - name: Build and Publish K3d GPU + shell: bash + run: | + cd packages/k3d-gpu + docker build \ + --platform linux/amd64 \ + -t ghcr.io/defenseunicorns/leapfrogai/k3d-gpu:${{ inputs.releaseTag }} . + docker push ghcr.io/defenseunicorns/leapfrogai/k3d-gpu:${{ inputs.releaseTag }} + cd ../.. + + - name: Download Python Wheels and Publish Builder Image + shell: bash + run: | + docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-sdk:${{ inputs.releaseTag }} --push -f src/leapfrogai_sdk/Dockerfile . + + - name: Install Zarf + uses: defenseunicorns/setup-zarf@10e539efed02f75ec39eb8823e22a5c795f492ae #v1.0.1 + + - name: Build and Publish API + shell: bash + run: | + docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${{ inputs.releaseTag }} --push -f packages/api/Dockerfile . + docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/api-migrations:${{ inputs.releaseTag }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/api/supabase/migrations" . + + zarf package create packages/api --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm + zarf package create packages/api --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm + + zarf package publish zarf-package-leapfrogai-api-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + zarf package publish zarf-package-leapfrogai-api-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + + docker image prune -af + rm zarf-package-leapfrogai-api-*.tar.zst + + - name: Build and Publish UI + shell: bash + run: | + docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-ui:${{ inputs.releaseTag }} --push src/leapfrogai_ui + docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/ui-migrations:${{ inputs.releaseTag }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=src/leapfrogai_ui/supabase/migrations" . + + zarf package create packages/ui --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm + zarf package create packages/ui --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm + + zarf package publish zarf-package-leapfrogai-ui-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + zarf package publish zarf-package-leapfrogai-ui-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + + docker image prune -af + rm zarf-package-leapfrogai-ui-*.tar.zst + + - name: Build and Publish Supabase + shell: bash + run: | + docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/supabase-migrations:${{ inputs.releaseTag }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/supabase/migrations" . + + zarf package create packages/supabase --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm + zarf package create packages/supabase --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm + + zarf package publish zarf-package-supabase-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + zarf package publish zarf-package-supabase-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + + - name: Build and Publish Repeater + shell: bash + run: | + docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/repeater:${{ inputs.releaseTag }} --push -f packages/repeater/Dockerfile . + + zarf package create packages/repeater --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm + zarf package create packages/repeater --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm + + zarf package publish zarf-package-repeater-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + zarf package publish zarf-package-repeater-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + + docker image prune -af + rm zarf-package-repeater-*.tar.zst + + - name: Build and Publish LLaMA-CPP-Python + shell: bash + run: | + docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/llama-cpp-python:${{ inputs.releaseTag }} --push -f packages/llama-cpp-python/Dockerfile . + + zarf package create packages/llama-cpp-python --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm + zarf package create packages/llama-cpp-python --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm + + zarf package publish zarf-package-llama-cpp-python-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + zarf package publish zarf-package-llama-cpp-python-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + + docker image prune -af + rm zarf-package-llama-*.tar.zst + + - name: Build and Publish vLLM + shell: bash + run: | + docker buildx build --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/vllm:${{ inputs.releaseTag }} --push -f packages/vllm/Dockerfile . + + ZARF_CONFIG=packages/vllm/zarf-config.yaml zarf package create packages/vllm --set=IMAGE_VERSION=${{ inputs.releaseTag }} --flavor upstream --confirm + + zarf package publish zarf-package-vllm-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + + docker image prune -af + rm zarf-package-vllm-*.tar.zst + + - name: Build and Publish Text-Embeddings + shell: bash + run: | + docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/text-embeddings:${{ inputs.releaseTag }} --push -f packages/text-embeddings/Dockerfile . + + zarf package create packages/text-embeddings --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm + zarf package create packages/text-embeddings --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm + + zarf package publish zarf-package-text-embeddings-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + zarf package publish zarf-package-text-embeddings-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + + docker image prune -af + rm zarf-package-text-embeddings-*.tar.zst + + - name: Build and Publish Whisper + shell: bash + run: | + docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/whisper:${{ inputs.releaseTag }} --push -f packages/whisper/Dockerfile . + + zarf package create packages/whisper --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm + zarf package create packages/whisper --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm + + zarf package publish zarf-package-whisper-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + zarf package publish zarf-package-whisper-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai + + docker image prune -af + rm zarf-package-whisper-*.tar.zst diff --git a/.github/actions/uds-cluster/action.yaml b/.github/actions/uds-cluster/action.yaml index 34620025b..97396397b 100644 --- a/.github/actions/uds-cluster/action.yaml +++ b/.github/actions/uds-cluster/action.yaml @@ -8,6 +8,8 @@ inputs: description: Registry1 Password ghToken: description: GitHub Token + chainguardIdentity: + description: Chainguard login identity runs: using: composite @@ -18,10 +20,10 @@ runs: registry1Username: ${{ inputs.registry1Username }} registry1Password: ${{ inputs.registry1Password }} ghToken: ${{ inputs.ghToken }} - udsCliVersion: 0.14.0 + chainguardIdentity: ${{ inputs.chainguardIdentity }} - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Create UDS Cluster shell: bash diff --git a/.github/release-please-config.json b/.github/release-please-config.json index 6cf564cdb..8f475204d 100644 --- a/.github/release-please-config.json +++ b/.github/release-please-config.json @@ -26,6 +26,11 @@ "path": "**/zarf.yaml", "glob": true }, + { + "type": "generic", + "path": "**/zarf-config.yaml", + "glob": true + }, { "type": "generic", "path": "**/uds-bundle.yaml", diff --git a/.github/scripts/uds_verification_report.py b/.github/scripts/uds_verification_report.py new file mode 100755 index 000000000..0e4d4e8fe --- /dev/null +++ b/.github/scripts/uds_verification_report.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 + +import os +import re + + +def remove_ansi_escape_sequences(text): + ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") + return ansi_escape.sub("", text) + + +# Capabilities that affect the entire capability, not just a single package +def uds_capability_wide_errors(text: str) -> bool: + if "Not all pods have the istio sidecar" in text: + return True + return False + + +# CI environment variable enables GitHub annotations +def print_package_info( + package_name, + failures_count, + errors_count, + warnings_count, + failure_descriptions, + error_descriptions, + warning_descriptions, + uds_capability_wide_errors_count, +): + if uds_capability_wide_errors_count >= 1: + errors_count -= uds_capability_wide_errors_count + if package_name: + print("-----------------------------") + if os.getenv("CI") == "true": + print(f"::group::{package_name}") + print(f"Package: {package_name}\n") + if failures_count > 0: + if os.getenv("CI") == "true": + print("::error::", end="") + print(f"⛔ Failures: {failures_count}") + else: + if errors_count > 0: + if os.getenv("CI") == "true": + print("::error::", end="") + print(f"❌ Errors: {errors_count}") + if warnings_count > 0: + if os.getenv("CI") == "true": + print("::warning::", end="") + print(f"⚠️ Warnings: {warnings_count}") + if failures_count > 0: + print("\n⛔ Failure Descriptions:") + for desc in failure_descriptions: + print(f" - {desc}") + else: + if errors_count > 0: + print("\n❌ Error Descriptions:") + for desc in error_descriptions: + print(f" - {desc}") + if warnings_count > 0: + print("\n⚠️ Warning Descriptions:") + for desc in warning_descriptions: + print(f" - {desc}") + if os.getenv("CI") == "true": + print("::endgroup::") + + +def main(): + # Read data from the specified file instead of stdin + file_path = os.path.join( + os.getenv("GITHUB_WORKSPACE", ""), "reports/intermediate-report.txt" + ) + with open(file_path, mode="r", encoding="utf-8", errors="ignore") as file: + data = file.read() + # Remove ANSI escape sequences + clean_data = remove_ansi_escape_sequences(data) + # Initialize variables + package_name = "" + failures_count = 0 + errors_count = 0 + warnings_count = 0 + uds_capability_wide_errors_count = 0 + failure_descriptions = [] + error_descriptions = [] + warning_descriptions = [] + uds_capability_wide_error_descriptions = [] + previous_package_name = None + + # Process each line + for line in clean_data.splitlines(): + # Remove leading and trailing whitespace + line = line.strip() + + # Match and extract the package name + match = re.match(r"^ℹ️\s+Package\s+Name:\s+(.*)$", line) + if match: + # Print the previous package's info before starting a new one + if previous_package_name is not None: + print_package_info( + previous_package_name, + failures_count, + errors_count, + warnings_count, + failure_descriptions, + error_descriptions, + warning_descriptions, + uds_capability_wide_errors_count, + ) + # Reset variables for the new package + package_name = match.group(1) + failures_count = 0 + errors_count = 0 + warnings_count = 0 + failure_descriptions = [] + error_descriptions = [] + warning_descriptions = [] + previous_package_name = package_name + continue + + if uds_capability_wide_errors(line): + uds_capability_wide_errors_count = 1 + uds_capability_wide_error_descriptions = [ + "Not all pods have the istio sidecar" + ] + continue + else: + # Match and extract counts for failures, errors, and warnings + match = re.match(r"^(❌|⚠️|⛔)\s+(\d+)\s+([a-z]+)\s+found$", line) + if match: + count = int(match.group(2)) + type_ = match.group(3) + if type_ == "errors": + errors_count = count + elif type_ == "warnings": + warnings_count = count + elif type_ == "failures": + failures_count = count + continue + + # Match and collect issue descriptions + match = re.match(r"^(❌|⚠️|⛔)\s+(.*)$", line) + if match: + emoji = match.group(1) + description = match.group(2) + if emoji == "❌": + error_descriptions.append(description) + elif emoji == "⚠️": + warning_descriptions.append(description) + elif emoji == "⛔": + failure_descriptions.append(description) + continue + + # Print the last package's information + if previous_package_name is not None: + print_package_info( + previous_package_name, + failures_count, + errors_count, + warnings_count, + failure_descriptions, + error_descriptions, + warning_descriptions, + uds_capability_wide_errors_count, + ) + if uds_capability_wide_errors_count >= 1: + print("-----------------------------") + if os.getenv("CI") == "true": + print("::group::UDS Capability-Wide Issues") + print("::error::", end="") + print("UDS Capability Issues") + print("\n❌ Error Descriptions:") + for desc in uds_capability_wide_error_descriptions: + print(f" - {desc}") + if os.getenv("CI") == "true": + print("::endgroup::") + + +if __name__ == "__main__": + main() + # Print the final ending separator + print("-----------------------------") diff --git a/.github/workflows/commit-lint.yaml b/.github/workflows/commit-lint.yaml index 3d8cd67ff..9c4b1c8b6 100644 --- a/.github/workflows/commit-lint.yaml +++ b/.github/workflows/commit-lint.yaml @@ -22,7 +22,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 diff --git a/.github/workflows/docker-lint.yaml b/.github/workflows/docker-lint.yaml index c46e5557d..98f2124d0 100644 --- a/.github/workflows/docker-lint.yaml +++ b/.github/workflows/docker-lint.yaml @@ -31,7 +31,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0 with: diff --git a/.github/workflows/e2e-llama-cpp-python.yaml b/.github/workflows/e2e-llama-cpp-python.yaml index e3d573bba..e116616e7 100644 --- a/.github/workflows/e2e-llama-cpp-python.yaml +++ b/.github/workflows/e2e-llama-cpp-python.yaml @@ -32,6 +32,7 @@ on: # Ignore local development files - "!.pre-commit-config.yaml" + - "!tasks.yaml" # Ignore non e2e tests changes - "!tests/pytest/**" @@ -56,9 +57,14 @@ jobs: runs-on: ai-ubuntu-big-boy-8-core if: ${{ !github.event.pull_request.draft }} + permissions: + contents: read + packages: read + id-token: write # This is needed for OIDC federation. + steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Python uses: ./.github/actions/python @@ -69,6 +75,7 @@ jobs: registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} - name: Setup API and Supabase uses: ./.github/actions/lfai-core diff --git a/.github/workflows/e2e-playwright.yaml b/.github/workflows/e2e-playwright.yaml index 7200155fe..3b6464dbd 100644 --- a/.github/workflows/e2e-playwright.yaml +++ b/.github/workflows/e2e-playwright.yaml @@ -34,6 +34,7 @@ on: # Ignore local development files - "!.pre-commit-config.yaml" + - "!tasks.yaml" # Ignore non e2e tests changes - "!tests/pytest/**" @@ -57,9 +58,14 @@ jobs: runs-on: ai-ubuntu-big-boy-8-core if: ${{ !github.event.pull_request.draft }} + permissions: + contents: read + packages: read + id-token: write # This is needed for OIDC federation. + steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Node uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 @@ -82,6 +88,7 @@ jobs: registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} - name: Create Test User run: | @@ -120,7 +127,7 @@ jobs: - name: UI/API/Supabase E2E Playwright Tests run: | cp src/leapfrogai_ui/.env.example src/leapfrogai_ui/.env - rm src/leapfrogai_ui/tests/global.teardown.ts + rm src/leapfrogai_ui/tests/global.teardown.ts mkdir -p src/leapfrogai_ui/playwright/.auth SERVICE_ROLE_KEY=$(uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o jsonpath={.data.service-key} | base64 -d) echo "::add-mask::$SERVICE_ROLE_KEY" diff --git a/.github/workflows/e2e-text-backend-full-cpu.yaml b/.github/workflows/e2e-text-backend-full-cpu.yaml index 6e8507ae3..bca3364b4 100644 --- a/.github/workflows/e2e-text-backend-full-cpu.yaml +++ b/.github/workflows/e2e-text-backend-full-cpu.yaml @@ -32,6 +32,7 @@ on: # Ignore local development files - "!.pre-commit-config.yaml" + - "!tasks.yaml" # Ignore non e2e tests changes - "!tests/pytest/**" @@ -57,9 +58,14 @@ jobs: runs-on: ai-ubuntu-big-boy-8-core if: ${{ !github.event.pull_request.draft }} + permissions: + contents: read + packages: read + id-token: write # This is needed for OIDC federation. + steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Python uses: ./.github/actions/python @@ -69,6 +75,8 @@ jobs: with: registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} + ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} - name: Setup LFAI-API and Supabase uses: ./.github/actions/lfai-core @@ -97,5 +105,7 @@ jobs: # Test ########## - name: Test Text Backend + env: + LEAPFROGAI_MODEL: llama-cpp-python run: | python -m pytest ./tests/e2e/test_text_backend_full.py -v diff --git a/.github/workflows/e2e-text-embeddings.yaml b/.github/workflows/e2e-text-embeddings.yaml index 20f7eb97a..c61f77fcd 100644 --- a/.github/workflows/e2e-text-embeddings.yaml +++ b/.github/workflows/e2e-text-embeddings.yaml @@ -32,6 +32,7 @@ on: # Ignore local development files - "!.pre-commit-config.yaml" + - "!tasks.yaml" # Ignore non e2e tests changes - "!tests/pytest/**" @@ -58,9 +59,14 @@ jobs: runs-on: ai-ubuntu-big-boy-8-core if: ${{ !github.event.pull_request.draft }} + permissions: + contents: read + packages: read + id-token: write # This is needed for OIDC federation. + steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Python uses: ./.github/actions/python @@ -71,6 +77,7 @@ jobs: registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} - name: Setup LFAI-API and Supabase uses: ./.github/actions/lfai-core diff --git a/.github/workflows/e2e-vllm.yaml b/.github/workflows/e2e-vllm.yaml index 07e9f046f..585e8b6a8 100644 --- a/.github/workflows/e2e-vllm.yaml +++ b/.github/workflows/e2e-vllm.yaml @@ -32,6 +32,7 @@ on: # Ignore local development files - "!.pre-commit-config.yaml" + - "!tasks.yaml" # Ignore non e2e tests changes - "!tests/pytest/**" @@ -58,9 +59,14 @@ jobs: runs-on: ai-ubuntu-big-boy-8-core if: ${{ !github.event.pull_request.draft }} + permissions: + contents: read + packages: read + id-token: write # This is needed for OIDC federation. + steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Python uses: ./.github/actions/python @@ -73,7 +79,7 @@ jobs: registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} ghToken: ${{ secrets.GITHUB_TOKEN }} - udsCliVersion: 0.14.0 + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} ########## # vLLM @@ -82,4 +88,4 @@ jobs: ########## - name: Build vLLM run: | - make build-vllm LOCAL_VERSION=e2e-test + make build-vllm LOCAL_VERSION=e2e-test ZARF_CONFIG=packages/vllm/zarf-config.yaml diff --git a/.github/workflows/e2e-whisper.yaml b/.github/workflows/e2e-whisper.yaml index dee2cf45a..a4620b89a 100644 --- a/.github/workflows/e2e-whisper.yaml +++ b/.github/workflows/e2e-whisper.yaml @@ -32,6 +32,7 @@ on: # Ignore local development files - "!.pre-commit-config.yaml" + - "!tasks.yaml" # Ignore non e2e tests changes - "!tests/pytest/**" @@ -56,9 +57,14 @@ jobs: runs-on: ai-ubuntu-big-boy-8-core if: ${{ !github.event.pull_request.draft }} + permissions: + contents: read + packages: read + id-token: write # This is needed for OIDC federation. + steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Python uses: ./.github/actions/python @@ -71,6 +77,7 @@ jobs: registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} - name: Setup LFAI-API and Supabase uses: ./.github/actions/lfai-core diff --git a/.github/workflows/helm-lint.yaml b/.github/workflows/helm-lint.yaml index 14bd6b107..b5f085944 100644 --- a/.github/workflows/helm-lint.yaml +++ b/.github/workflows/helm-lint.yaml @@ -31,7 +31,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Helm uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 7f51aec04..c7e43d2bf 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Validate Lint uses: chartboost/ruff-action@e18ae971ccee1b2d7bbef113930f00c670b78da4 # v1.0.0 diff --git a/.github/workflows/markdown-lint.yaml b/.github/workflows/markdown-lint.yaml index 45fea49ce..b34888167 100644 --- a/.github/workflows/markdown-lint.yaml +++ b/.github/workflows/markdown-lint.yaml @@ -32,7 +32,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - uses: DavidAnson/markdownlint-cli2-action@db43aef879112c3119a410d69f66701e0d530809 # v17.0.0 with: diff --git a/.github/workflows/nightly-snapshot-release.yaml b/.github/workflows/nightly-snapshot-release.yaml new file mode 100644 index 000000000..82936a339 --- /dev/null +++ b/.github/workflows/nightly-snapshot-release.yaml @@ -0,0 +1,212 @@ +name: nightly-snapshot-release + +on: + schedule: + - cron: "0 8 * * *" # Runs daily at 12 AM PST + workflow_dispatch: # trigger manually as needed + pull_request: + types: + - opened # default trigger + - reopened # default trigger + - synchronize # default trigger + - ready_for_review # don't run on draft PRs + - milestoned # allows us to trigger on bot PRs + paths: + - .github/workflows/nightly-snapshot-release.yaml + +concurrency: + group: nightly-snapshot-release-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +env: + SNAPSHOT_VERSION: snapshot-latest + SNAPSHOT_SUB_REPOSITORY: /uds/snapshots/ + +permissions: + contents: read + packages: write + id-token: write # This is needed for OIDC federation. + +jobs: + snapshot-release: + runs-on: ai-ubuntu-big-boy-8-core + name: nightly_snapshot_release + if: ${{ !github.event.pull_request.draft }} + + steps: + - name: Checkout Repo + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + + - name: Release LeapfrogAI ${{ env.SNAPSHOT_VERSION }} + uses: ./.github/actions/release + with: + releaseTag: ${{ env.SNAPSHOT_VERSION }} + subRepository: ${{ env.SNAPSHOT_SUB_REPOSITORY }} + registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} + registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} + ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} + + test-snapshot: + runs-on: ai-ubuntu-big-boy-8-core + name: nightly_test_snapshot + if: ${{ !github.event.pull_request.draft }} + needs: snapshot-release + + permissions: + contents: read + packages: write + id-token: write # This is needed for OIDC federation. + + steps: + # Checkout main just to see the latest release in the release-please manifest + - name: Checkout Repo (main) + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + ref: main + + - name: Get Latest Release Version + id: get_version + run: | + LFAI_VERSION=$(jq -r '.["."]' .github/.release-please-manifest.json) + echo "LFAI_VERSION=$LFAI_VERSION" >> $GITHUB_OUTPUT + + ################ + # LATEST RELEASE + ################ + + # Checkout the latest release in the release-please manifest + - name: Checkout Repo (v${{ steps.get_version.outputs.LFAI_VERSION }}) + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + ref: v${{ steps.get_version.outputs.LFAI_VERSION }} + + - name: Setup UDS Cluster (v${{ steps.get_version.outputs.LFAI_VERSION }}) + uses: ./.github/actions/uds-cluster + with: + registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} + registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} + ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} + + # This is needed due to delay in tagged releases versus the version refs within the UDS bundles + - name: Mutation of the UDS Bundle (v${{ steps.get_version.outputs.LFAI_VERSION }}) + run: | + uds zarf tools yq -i '.metadata.version = "v${{ steps.get_version.outputs.LFAI_VERSION }}"' bundles/latest/cpu/uds-bundle.yaml + + uds zarf tools yq -i '.packages[].ref |= sub("^[^ ]+-upstream$", "${{ steps.get_version.outputs.LFAI_VERSION }}-upstream")' bundles/latest/cpu/uds-bundle.yaml + + - name: Create and Deploy UDS Bundle (v${{ steps.get_version.outputs.LFAI_VERSION }}) + run: | + cd bundles/latest/cpu + uds create . --confirm && \ + uds deploy uds-bundle-leapfrogai-amd64-v${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst --confirm --no-progress && \ + rm -rf uds-bundle-leapfrogai-amd64-v${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst && \ + docker system prune -af + + ################# + # MAIN (SNAPSHOT) + ################# + + - name: Checkout Repo (main) + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + ref: main + + - name: Print the Commit SHA (main) + run: | + COMMIT_SHA=$(git rev-parse HEAD) + echo "The latest commit on the main branch is: $COMMIT_SHA" + + - name: Setup Python (main) + uses: ./.github/actions/python + + # Set UDS CPU bundle refs and repositories to snapshot-latest + - name: Mutation of the UDS Bundle (main) + run: | + uds zarf tools yq -i '.metadata.version = "${{ env.SNAPSHOT_VERSION }}"' bundles/latest/cpu/uds-bundle.yaml + + uds zarf tools yq -i '.packages[].ref |= sub("^[^ ]+-upstream$", "${{ env.SNAPSHOT_VERSION }}-upstream")' bundles/latest/cpu/uds-bundle.yaml + + uds zarf tools yq -i '.packages[].repository |= sub("/uds/", "/uds/snapshots/")' bundles/latest/cpu/uds-bundle.yaml + + - name: Create and Deploy UDS Bundle (main) + run: | + cd bundles/latest/cpu + uds create . --confirm && \ + uds deploy uds-bundle-leapfrogai-amd64-${{ env.SNAPSHOT_VERSION }}.tar.zst --confirm --no-progress && \ + rm -rf uds-bundle-leapfrogai-amd64-${{ env.SNAPSHOT_VERSION }}.tar.zst && \ + docker system prune -af + + ######### + # TESTING + ######### + + - name: Generate Secrets + id: generate_secrets + run: | + ANON_KEY=$(uds zarf tools kubectl get secret supabase-bootstrap-jwt -n leapfrogai -o jsonpath='{.data.anon-key}' | base64 -d) + echo "::add-mask::$ANON_KEY" + echo "ANON_KEY=$ANON_KEY" >> $GITHUB_OUTPUT + FAKE_PASSWORD=$(cat <(openssl rand -base64 32 | tr -dc 'a-zA-Z0-9!@#$%^&*()_+-=[]{}|;:,.<>?' | head -c 20) <(echo '!@1Aa') | fold -w1 | shuf | tr -d '\n') + echo "::add-mask::$FAKE_PASSWORD" + echo "FAKE_PASSWORD=$FAKE_PASSWORD" >> $GITHUB_OUTPUT + SERVICE_KEY=$(uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o jsonpath={.data.service-key} | base64 -d) + echo "::add-mask::$SERVICE_KEY" + echo "SERVICE_KEY=$SERVICE_KEY" >> $GITHUB_OUTPUT + + - name: Verify Secrets + run: | + echo "FAKE_PASSWORD is set: ${{ steps.generate_secrets.outputs.FAKE_PASSWORD != '' }}" + echo "ANON_KEY is set: ${{ steps.generate_secrets.outputs.ANON_KEY != '' }}" + echo "SERVICE_KEY is set: ${{ steps.generate_secrets.outputs.SERVICE_KEY != '' }}" + + # Backends + - name: Run Backend E2E Tests + env: + ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }} + SERVICE_KEY: ${{ steps.generate_secrets.outputs.SERVICE_KEY }} + LEAPFROGAI_MODEL: llama-cpp-python + run: | + python -m pytest -vvv -s ./tests/e2e + + - name: Setup Playwright + run: | + npm --prefix src/leapfrogai_ui ci + npx --prefix src/leapfrogai_ui playwright install + + - name: Run Playwright E2E Tests + env: + SERVICE_ROLE_KEY: ${{ steps.generate_secrets.outputs.SERVICE_KEY }} + FAKE_E2E_USER_PASSWORD: ${{ steps.generate_secrets.outputs.FAKE_PASSWORD }} + ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }} + run: | + chmod +x ./.github/scripts/createUser.sh + ./.github/scripts/createUser.sh + + cp src/leapfrogai_ui/.env.example src/leapfrogai_ui/.env + mkdir -p playwright/auth + touch playwright/auth.user.json + + SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY TEST_ENV=CI USERNAME=doug PASSWORD=$FAKE_E2E_USER_PASSWORD PUBLIC_SUPABASE_ANON_KEY=$ANON_KEY DEFAULT_MODEL=llama-cpp-python npm --prefix src/leapfrogai_ui run test:integration:ci + + - name: Archive Playwright Report + uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 + if: ${{ !cancelled() }} + with: + name: playwright-report + path: src/leapfrogai_ui/e2e-report/ + retention-days: 30 + + - name: Get Cluster Debug Information + id: debug + if: ${{ !cancelled() }} + uses: defenseunicorns/uds-common/.github/actions/debug-output@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1 + + - name: Get Cluster Debug Information + if: ${{ !cancelled() && steps.debug.conclusion == 'success' }} + uses: defenseunicorns/uds-common/.github/actions/save-logs@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1 diff --git a/.github/workflows/nightly-uds-badge-verification.yaml b/.github/workflows/nightly-uds-badge-verification.yaml new file mode 100644 index 000000000..d500b9693 --- /dev/null +++ b/.github/workflows/nightly-uds-badge-verification.yaml @@ -0,0 +1,94 @@ +name: nightly-uds-badge-verification + +on: + schedule: + - cron: "0 11 * * *" # Runs daily at 3 AM PST + workflow_dispatch: # trigger manually as needed + pull_request: + paths: + - .github/workflows/nightly-uds-badge-verification.yaml + - tasks.yaml + +concurrency: + group: nightly-uds-badge-verification-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +env: + SNAPSHOT_VERSION: snapshot-latest + +permissions: + contents: read + packages: read + id-token: write # This is needed for OIDC federation. + +jobs: + uds-badge-verification: + runs-on: ai-ubuntu-big-boy-8-core + name: nightly_uds_badge_verification + + steps: + - name: Checkout Repo + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + ref: main + + - name: Setup UDS Cluster + uses: ./.github/actions/uds-cluster + with: + registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} + registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} + ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} + + - name: Print the Commit SHA + run: | + COMMIT_SHA=$(git rev-parse HEAD) + echo "The latest commit on the main branch is: $COMMIT_SHA" + + # Set UDS CPU bundle refs and repositories to snapshot-latest + - name: Mutation of the UDS Bundle + run: | + uds zarf tools yq -i '.metadata.version = "${{ env.SNAPSHOT_VERSION }}"' bundles/latest/cpu/uds-bundle.yaml + + uds zarf tools yq -i '.packages[].ref |= sub("^[^ ]+-upstream$", "${{ env.SNAPSHOT_VERSION }}-upstream")' bundles/latest/cpu/uds-bundle.yaml + + uds zarf tools yq -i '.packages[].repository |= sub("/uds/", "/uds/snapshots/")' bundles/latest/cpu/uds-bundle.yaml + + - name: Create and Deploy UDS Bundle (${{ env.SNAPSHOT_VERSION }}) + run: | + cd bundles/latest/cpu + uds create . --confirm && \ + uds deploy uds-bundle-leapfrogai-amd64-${{ env.SNAPSHOT_VERSION }}.tar.zst --confirm --no-progress && \ + rm -rf uds-bundle-leapfrogai-amd64-${{ env.SNAPSHOT_VERSION }}.tar.zst && \ + docker system prune -af + + # Workaround for handling emojis in the upstream badge verification UDS task + - name: Set Locale to UTF-8 + run: | + sudo apt-get update + sudo apt-get install -y locales + sudo locale-gen en_US.UTF-8 + export LANG=en_US.UTF-8 + export LANGUAGE=en_US:en + export LC_ALL=en_US.UTF-8 + + # Setup Python for the report cleaning script in the next step + - name: Set up Python + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + with: + python-version-file: "pyproject.toml" + + - name: Run UDS Badge Verification Task + run: | + uds run nightly-uds-badge-verification --no-progress + + - name: Archive UDS Badge Verification Report + uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 + with: + name: uds-badge-verification-report + path: reports + retention-days: 7 diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 93d0f0832..f906032a3 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -31,7 +31,10 @@ on: - "!packages/ui/**" # Declare default permissions as read only. -permissions: read-all +permissions: + contents: read + packages: read + id-token: write # This is needed for OIDC federation. concurrency: group: pytest-integration-${{ github.ref }} @@ -43,7 +46,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Python uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 @@ -64,6 +67,7 @@ jobs: run: make test-api-unit env: LFAI_RUN_REPEATER_TESTS: true + DEV: true integration: runs-on: ai-ubuntu-big-boy-8-core @@ -74,7 +78,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Setup Python uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 @@ -97,6 +101,7 @@ jobs: registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} - name: Setup API and Supabase uses: ./.github/actions/lfai-core diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 298f361e6..58336ef0a 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,12 +1,11 @@ name: Publish Release Artifacts -on: - workflow_call +on: workflow_call permissions: contents: read packages: write - + id-token: write # This is needed for OIDC federation. jobs: build-and-publish-artifacts: @@ -14,25 +13,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Login to GitHub Container Registry - uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # v3.0.0 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@0d103c3126aa41d772a8362f6aa67afac040f80c # v3.1.0 - - - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 - with: - python-version-file: "pyproject.toml" + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Get Version id: get_version @@ -40,123 +21,11 @@ jobs: LFAI_VERSION=$(jq -r '.["."]' .github/.release-please-manifest.json) echo "LFAI_VERSION=$LFAI_VERSION" >> $GITHUB_OUTPUT - - name: Instal Python Deps - run: | - python -m pip install ".[dev,dev-whisper,dev-vllm]" - - - name: Build and Publish k3d-gpu image - run: | - cd packages/k3d-gpu - docker build \ - --platform linux/amd64 \ - -t ghcr.io/defenseunicorns/leapfrogai/k3d-gpu:${{ steps.get_version.outputs.LFAI_VERSION }} . - docker push ghcr.io/defenseunicorns/leapfrogai/k3d-gpu:${{ steps.get_version.outputs.LFAI_VERSION }} - cd ../.. - - - name: Download Python Wheels and Publish Builder Image - run: | - docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-sdk:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f src/leapfrogai_sdk/Dockerfile . - - - name: Install Zarf - uses: defenseunicorns/setup-zarf@10e539efed02f75ec39eb8823e22a5c795f492ae #v1.0.1 - - - name: Build and Publish API - run: | - docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/api/Dockerfile . - docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/api-migrations:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/api/supabase/migrations" . - - zarf package create packages/api --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm - zarf package create packages/api --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm - - zarf package publish zarf-package-leapfrogai-api-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - zarf package publish zarf-package-leapfrogai-api-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - - docker image prune -af - rm zarf-package-leapfrogai-api-*.tar.zst - - - name: Build and Publish UI - run: | - docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-ui:${{ steps.get_version.outputs.LFAI_VERSION }} --push src/leapfrogai_ui - docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/ui-migrations:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=src/leapfrogai_ui/supabase/migrations" . - - zarf package create packages/ui --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm - zarf package create packages/ui --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm - - zarf package publish zarf-package-leapfrogai-ui-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - zarf package publish zarf-package-leapfrogai-ui-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - - docker image prune -af - rm zarf-package-leapfrogai-ui-*.tar.zst - - - name: Build and Publish Supabase - run: | - docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/supabase-migrations:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/supabase/migrations" . - - zarf package create packages/supabase --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm - zarf package create packages/supabase --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm - - zarf package publish zarf-package-supabase-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - zarf package publish zarf-package-supabase-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - - - name: Build and Publish repeater - run: | - docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/repeater:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/repeater/Dockerfile . - - zarf package create packages/repeater --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm - zarf package create packages/repeater --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm - - zarf package publish zarf-package-repeater-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - zarf package publish zarf-package-repeater-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - - docker image prune -af - rm zarf-package-repeater-*.tar.zst - - - name: Build and Publish llama - run: | - docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/llama-cpp-python:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/llama-cpp-python/Dockerfile . - - zarf package create packages/llama-cpp-python --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm - zarf package create packages/llama-cpp-python --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm - - zarf package publish zarf-package-llama-cpp-python-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - zarf package publish zarf-package-llama-cpp-python-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - - docker image prune -af - rm zarf-package-llama-*.tar.zst - - - name: Build and Publish vLLM - run: | - docker buildx build --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/vllm:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/vllm/Dockerfile . - - zarf package create packages/vllm --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --flavor upstream --confirm - - zarf package publish zarf-package-vllm-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - - docker image prune -af - rm zarf-package-vllm-*.tar.zst - - - name: Build and Publish Text-Embeddings - run: | - docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/text-embeddings:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/text-embeddings/Dockerfile . - - zarf package create packages/text-embeddings --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm - zarf package create packages/text-embeddings --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm - - zarf package publish zarf-package-text-embeddings-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - zarf package publish zarf-package-text-embeddings-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - - docker image prune -af - rm zarf-package-text-embeddings-*.tar.zst - - - name: Build and Publish whisper - run: | - docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/whisper:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/whisper/Dockerfile . - - zarf package create packages/whisper --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm - zarf package create packages/whisper --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm - - zarf package publish zarf-package-whisper-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - zarf package publish zarf-package-whisper-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai - - docker image prune -af - rm zarf-package-whisper-*.tar.zst + - name: Release LeapfrogAI ${{ steps.get_version.outputs.LFAI_VERSION }} + uses: ./.github/actions/release + with: + releaseTag: ${{ steps.get_version.outputs.LFAI_VERSION }} + registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} + registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} + ghToken: ${{ secrets.GITHUB_TOKEN }} + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index 3454adae3..527469efd 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -23,7 +23,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: persist-credentials: false diff --git a/.github/workflows/secret-scan.yaml b/.github/workflows/secret-scan.yaml index 4270bbcca..9e15c4693 100644 --- a/.github/workflows/secret-scan.yaml +++ b/.github/workflows/secret-scan.yaml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 - name: Secret Scanning diff --git a/.github/workflows/uds-lint.yaml b/.github/workflows/uds-lint.yaml index 8f2e6834c..168a43818 100644 --- a/.github/workflows/uds-lint.yaml +++ b/.github/workflows/uds-lint.yaml @@ -22,7 +22,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Set up Python uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 @@ -33,7 +33,7 @@ jobs: run: pip install check-jsonschema==0.28.0 - name: Download UDS Bundle Schema - run: curl -o uds.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json + run: curl -o uds.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json - name: Validate uds-bundle.yaml (dev) if: always() @@ -46,3 +46,11 @@ jobs: run: | check-jsonschema bundles/latest/gpu/uds-bundle.yaml --schemafile uds.schema.json check-jsonschema bundles/latest/cpu/uds-bundle.yaml --schemafile uds.schema.json + + - name: Download UDS Tasks Schema + run: curl -o tasks.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/tasks.schema.json + + - name: Validate tasks.yaml + if: always() + run: | + check-jsonschema tasks.yaml --schemafile tasks.schema.json diff --git a/.github/workflows/ui-test.yaml b/.github/workflows/ui-test.yaml index 24414ad52..19370a35b 100644 --- a/.github/workflows/ui-test.yaml +++ b/.github/workflows/ui-test.yaml @@ -23,7 +23,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 with: diff --git a/.github/workflows/e2e-registry1-weekly.yaml b/.github/workflows/weekly-registry1-flavor-test.yaml similarity index 55% rename from .github/workflows/e2e-registry1-weekly.yaml rename to .github/workflows/weekly-registry1-flavor-test.yaml index 65f4c5897..f7b583546 100644 --- a/.github/workflows/e2e-registry1-weekly.yaml +++ b/.github/workflows/weekly-registry1-flavor-test.yaml @@ -1,8 +1,8 @@ -name: e2e-registry1-weekly +name: weekly-registry1-flavor-test on: schedule: - - cron: "0 0 * * 6" # Run every Sunday at 12 AM EST + - cron: "0 8 * * 0" # Run every Sunday at 12 AM PST workflow_dispatch: # trigger manually as needed pull_request: types: @@ -12,11 +12,11 @@ on: - ready_for_review # don't run on draft PRs - milestoned # allows us to trigger on bot PRs paths: - - .github/workflows/e2e-registry1-weekly.yaml + - .github/workflows/weekly-registry1-flavor-test.yaml - bundles/latest/** concurrency: - group: e2e-registry1-weekly-${{ github.ref }} + group: weekly-registry1-flavor-test-${{ github.ref }} cancel-in-progress: true defaults: @@ -24,67 +24,98 @@ defaults: shell: bash jobs: - test-flavors: + registry1-flavor-test: runs-on: ai-ubuntu-big-boy-8-core - name: e2e_registry1_weekly + name: weekly_registry1_flavor_test if: ${{ !github.event.pull_request.draft }} permissions: contents: read - packages: write + packages: read id-token: write # This is needed for OIDC federation. steps: - - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + # Checkout main just to see the latest release in the release-please manifest + - name: Checkout Repo (main) + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: - # x-release-please-start-version - ref: "caf4f9c3093a55a003b49fcbf05c03221be6a232" # 0.12.2 w/ integration tests turned-on - # x-release-please-end + ref: main - - name: Setup Python - uses: ./.github/actions/python + - name: Get Latest Release Version + id: get_version + run: | + LFAI_VERSION=$(jq -r '.["."]' .github/.release-please-manifest.json) + echo "LFAI_VERSION=$LFAI_VERSION" >> $GITHUB_OUTPUT - - name: Install API and SDK Dev Dependencies - run : | - make install + ################ + # LATEST RELEASE + ################ + + - name: Checkout Repo + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + fetch-tags: true + ref: v${{ steps.get_version.outputs.LFAI_VERSION }} - - name: Setup UDS Cluster - uses: ./.github/actions/uds-cluster + - name: Setup UDS Environment + uses: defenseunicorns/uds-common/.github/actions/setup@24c8a2a48eeb33773b76b3587c489cb17496c9e0 # v0.12.0 with: registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} ghToken: ${{ secrets.GITHUB_TOKEN }} - udsCliVersion: 0.14.0 + chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} - - name: Create UDS Cluster - shell: bash + - name: Setup Python + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c #v5.0.0 + with: + python-version-file: "pyproject.toml" + + - name: Install Python Dependencies + run: pip install ".[dev]" "src/leapfrogai_api" "src/leapfrogai_sdk" --no-cache-dir + + - name: Mutation of the Zarf Packages run: | - UDS_CONFIG=.github/config/uds-config.yaml make create-uds-cpu-cluster + uds zarf tools yq -i ' + .components[].images[0] |= sub(":v[0-9\.]+$", ":v${{ steps.get_version.outputs.LFAI_VERSION }}") + ' packages/api/zarf.yaml + uds zarf tools yq -i '.api.image.tag = "v${{ steps.get_version.outputs.LFAI_VERSION }}"' packages/api/values/registry1-values.yaml - - name: Setup Playwright + - name: Print the Modified Zarf Packages run: | - npm --prefix src/leapfrogai_ui ci - npx --prefix src/leapfrogai_ui playwright install + cat packages/api/zarf.yaml + cat packages/api/values/registry1-values.yaml - - name: Create Registry1 Packages + - name: Create Registry1 Zarf Packages run: | - LOCAL_VERSION=registry1 FLAVOR=registry1 make build-api + uds zarf package create packages/api --set image_version="${{ steps.get_version.outputs.LFAI_VERSION }}" --flavor registry1 -a amd64 --confirm # Mutate UDS bundle definition to use Registry1 packages - - name: Mutation to Registry1 Bundle - # TODO: fix bundle path + # Mutate non-Registry1 packages to be the current tagged version + - name: Mutation of the UDS Bundle run: | - uds zarf tools yq -i '.packages[1] |= del(.repository)' bundles/latest/cpu/uds-bundle.yaml - uds zarf tools yq -i '.packages[1] |= .ref = "registry1"' bundles/latest/cpu/uds-bundle.yaml - uds zarf tools yq -i '.packages[1] |= .path = "../../../packages/api"' bundles/latest/cpu/uds-bundle.yaml uds zarf tools yq -i '.metadata.version = "registry1"' bundles/latest/cpu/uds-bundle.yaml - - name: Create and Deploy Bundle + uds zarf tools yq -i '.packages[].ref |= sub("^[^ ]+-upstream$", "${{ steps.get_version.outputs.LFAI_VERSION }}-upstream")' bundles/latest/cpu/uds-bundle.yaml + + uds zarf tools yq -i '.packages[1] |= del(.repository)' bundles/latest/cpu/uds-bundle.yaml + uds zarf tools yq -i '.packages[1] |= .ref = "${{ steps.get_version.outputs.LFAI_VERSION }}"' bundles/latest/cpu/uds-bundle.yaml + uds zarf tools yq -i '.packages[1] |= .path = "../../../"' bundles/latest/cpu/uds-bundle.yaml + + - name: Print the Modified UDS Bundle + run: | + cat bundles/latest/cpu/uds-config.yaml + cat bundles/latest/cpu/uds-bundle.yaml + + - name: Create UDS Cluster + shell: bash + run: | + UDS_CONFIG=.github/config/uds-config.yaml make create-uds-cpu-cluster + + - name: Create and Deploy Registry1 Bundle run: | cd bundles/latest/cpu uds create . --confirm && \ - uds deploy uds-bundle-leapfrogai-amd64-registry1.tar.zst --confirm --no-progress && \ + uds deploy uds-bundle-leapfrogai-amd64-registry1.tar.zst --confirm --no-progress --log-level debug && \ rm -rf uds-bundle-leapfrogai-amd64-registry1.tar.zst && \ docker system prune -af @@ -107,32 +138,19 @@ jobs: echo "ANON_KEY is set: ${{ steps.generate_secrets.outputs.ANON_KEY != '' }}" echo "SERVICE_KEY is set: ${{ steps.generate_secrets.outputs.SERVICE_KEY != '' }}" - - name: Run Integration Tests - env: - SUPABASE_ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }} - SUPABASE_PASS: ${{ steps.generate_secrets.outputs.FAKE_PASSWORD }} - SUPABASE_EMAIL: integration@uds.dev - SUPABASE_URL: https://supabase-kong.uds.dev - # Turn off NIAH tests that are not applicable for integration testing using the Repeater model - LFAI_RUN_NIAH_TESTS: "false" - run: | - uds zarf connect --name=llama-cpp-python-model --namespace=leapfrogai --local-port=50051 --remote-port=50051 & - while ! nc -z localhost 50051; do sleep 1; done - - make test-user-pipeline - env $(cat .env | xargs) python -m pytest -v -s tests/integration/api - # Backends - name: Run Backend E2E Tests env: ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }} SERVICE_KEY: ${{ steps.generate_secrets.outputs.SERVICE_KEY }} + LEAPFROGAI_MODEL: llama-cpp-python run: | - python -m pytest ./tests/e2e/test_llama.py -vv - python -m pytest ./tests/e2e/test_text_embeddings.py -vv - python -m pytest ./tests/e2e/test_whisper.py -vv - python -m pytest ./tests/e2e/test_supabase.py -vv - python -m pytest ./tests/e2e/test_api.py -vv + python -m pytest -vvv -s ./tests/e2e + + - name: Setup Playwright + run: | + npm --prefix src/leapfrogai_ui ci + npx --prefix src/leapfrogai_ui playwright install - name: Run Playwright E2E Tests env: @@ -156,3 +174,12 @@ jobs: name: playwright-report path: src/leapfrogai_ui/e2e-report/ retention-days: 30 + + - name: Get Cluster Debug Information + id: debug + if: ${{ !cancelled() }} + uses: defenseunicorns/uds-common/.github/actions/debug-output@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1 + + - name: Get Cluster Debug Information + if: ${{ !cancelled() && steps.debug.conclusion == 'success' }} + uses: defenseunicorns/uds-common/.github/actions/save-logs@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1 diff --git a/.github/workflows/zarf-lint.yaml b/.github/workflows/zarf-lint.yaml index 2abf681b8..1a3e232de 100644 --- a/.github/workflows/zarf-lint.yaml +++ b/.github/workflows/zarf-lint.yaml @@ -22,7 +22,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Set up Python uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 @@ -31,7 +31,7 @@ jobs: - name: Download Zarf Package Schema # TODO: renovate setup - run: curl -o zarf.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json + run: curl -o zarf.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json - name: Install jsonschema run: pip install check-jsonschema==0.28.0 diff --git a/.gitignore b/.gitignore index 645bd6ff5..d0c8a20f3 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ node_modules package.json package-lock.json **/*.schema.json +reports # local model and tokenizer files *.bin diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6caadd6c8..693b07a28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -102,7 +102,7 @@ repos: if [ -f "$(git rev-parse --show-toplevel)/$FILE" ]; then echo "$FILE already exists in the root of the git project, skipping download." else - curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json + curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json fi' language: system @@ -125,7 +125,7 @@ repos: if [ -f "$(git rev-parse --show-toplevel)/$FILE" ]; then echo "$FILE already exists in the root of the git project, skipping download." else - curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json + curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json fi' language: system @@ -137,3 +137,26 @@ repos: files: "uds-bundle.yaml" types: [yaml] args: ["--schemafile", "uds-v0.14.0.schema.json"] + + # UDS TASKS CHECK + - repo: local + hooks: + - id: download-schema + name: "Download UDS Tasks Schema" + entry: | + bash -c 'FILE="tasks-v0.14.0.schema.json" + if [ -f "$(git rev-parse --show-toplevel)/$FILE" ]; then + echo "$FILE already exists in the root of the git project, skipping download." + else + curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/tasks.schema.json + fi' + language: system + + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.14.0 + hooks: + - id: check-jsonschema + name: "Validate UDS Bundles Against Schema" + files: "tasks.yaml" + types: [yaml] + args: ["--schemafile", "tasks-v0.14.0.schema.json"] diff --git a/Makefile b/Makefile index bf8afb315..da9266246 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ ARCH ?= amd64 +FLAVOR ?= upstream REG_PORT ?= 5000 REG_NAME ?= registry LOCAL_VERSION ?= $(shell git rev-parse --short HEAD) DOCKER_FLAGS := ZARF_FLAGS := -FLAVOR := upstream SILENT_DOCKER_FLAGS := --quiet SILENT_ZARF_FLAGS := --no-progress -l warn --no-color MAX_JOBS := 4 @@ -55,24 +55,34 @@ build-supabase: local-registry docker-supabase docker-api: local-registry sdk-wheel @echo $(DOCKER_FLAGS) @echo $(ZARF_FLAGS) -ifeq ($(FLAVOR),upstream) + ## Build the API image (and tag it for the local registry) docker build ${DOCKER_FLAGS} --platform=linux/${ARCH} --build-arg LOCAL_VERSION=${LOCAL_VERSION} -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION} -f packages/api/Dockerfile . docker tag ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION} localhost:${REG_PORT}/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION} -endif + ## Build the migration container for this version of the API docker build ${DOCKER_FLAGS} --platform=linux/${ARCH} -t ghcr.io/defenseunicorns/leapfrogai/api-migrations:${LOCAL_VERSION} -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/api/supabase/migrations" . docker tag ghcr.io/defenseunicorns/leapfrogai/api-migrations:${LOCAL_VERSION} localhost:${REG_PORT}/defenseunicorns/leapfrogai/api-migrations:${LOCAL_VERSION} -build-api: local-registry docker-api ## Build the leapfrogai_api container and Zarf package +## If registry1, don't locally Docker-build anything +ifeq ($(FLAVOR),upstream) + DOCKER_TARGETS := local-registry docker-api +else + DOCKER_TARGETS := +endif + +build-api: $(DOCKER_TARGETS) ## Build the leapfrogai_api container and Zarf package + ## Only push to local registry and build if this is an upstream-flavored package ifeq ($(FLAVOR),upstream) ## Push the images to the local registry (Zarf is super slow if the image is only in the local daemon) docker push ${DOCKER_FLAGS} localhost:${REG_PORT}/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION} -endif docker push ${DOCKER_FLAGS} localhost:${REG_PORT}/defenseunicorns/leapfrogai/api-migrations:${LOCAL_VERSION} - ## Build the Zarf package uds zarf package create packages/api --flavor ${FLAVOR} -a ${ARCH} -o packages/api --registry-override=ghcr.io=localhost:${REG_PORT} --insecure --set IMAGE_VERSION=${LOCAL_VERSION} ${ZARF_FLAGS} --confirm +else + ## Build the registry1 Zarf package + ZARF_CONFIG=packages/api/zarf-config.yaml uds zarf package create packages/api --flavor ${FLAVOR} -a ${ARCH} -o packages/api ${ZARF_FLAGS} --confirm +endif docker-ui: ## Build the UI image (and tag it for the local registry) @@ -113,7 +123,7 @@ build-vllm: local-registry docker-vllm ## Build the vllm container and Zarf pack docker push ${DOCKER_FLAGS} localhost:${REG_PORT}/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION} ## Build the Zarf package - uds zarf package create packages/vllm --flavor ${FLAVOR} -a ${ARCH} -o packages/vllm --registry-override=ghcr.io=localhost:${REG_PORT} --insecure --set IMAGE_VERSION=${LOCAL_VERSION} ${ZARF_FLAGS} --confirm + ZARF_CONFIG=packages/vllm/zarf-config.yaml uds zarf package create packages/vllm --flavor ${FLAVOR} -a ${ARCH} -o packages/vllm --registry-override=ghcr.io=localhost:${REG_PORT} --insecure --set IMAGE_VERSION=${LOCAL_VERSION} ${ZARF_FLAGS} --confirm docker-text-embeddings: sdk-wheel ## Build the image (and tag it for the local registry) @@ -253,7 +263,7 @@ silent-deploy-llama-cpp-python-package: silent-deploy-vllm-package: @echo "Starting VLLM deployment..." @mkdir -p .logs - @uds zarf package deploy packages/vllm/zarf-package-vllm-${ARCH}-${LOCAL_VERSION}.tar.zst ${ZARF_FLAGS} --confirm > .logs/deploy-vllm.log 2>&1 + @ZARF_CONFIG=packages/vllm/zarf-config.yaml uds zarf package deploy packages/vllm/zarf-package-vllm-${ARCH}-${LOCAL_VERSION}.tar.zst ${ZARF_FLAGS} --confirm > .logs/deploy-vllm.log 2>&1 @echo "VLLM deployment completed" silent-deploy-text-embeddings-package: diff --git a/README.md b/README.md index 7c09b075b..2429da763 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ ![LeapfrogAI](https://github.com/defenseunicorns/leapfrogai/raw/main/docs/imgs/leapfrogai.png) [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/defenseunicorns/leapfrogai/badge)](https://api.securityscorecards.dev/projects/github.com/defenseunicorns/leapfrogai) +[![Nightly Snapshot Tests](https://github.com/defenseunicorns/leapfrogai/actions/workflows/nightly-snapshot-release.yaml/badge.svg?branch=main)](https://github.com/defenseunicorns/leapfrogai/actions/workflows/nightly-snapshot-release.yaml) +[![Nightly Made for UDS Test](https://github.com/defenseunicorns/leapfrogai/actions/workflows/nightly-uds-badge-verification.yaml/badge.svg?branch=main)](https://github.com/defenseunicorns/leapfrogai/actions/workflows/nightly-uds-badge-verification.yaml) +[![Weekly Registry1 Test](https://github.com/defenseunicorns/leapfrogai/actions/workflows/weekly-registry1-flavor-test.yaml/badge.svg?branch=main)](https://github.com/defenseunicorns/leapfrogai/actions/workflows/weekly-registry1-flavor-test.yaml) ## Table of Contents diff --git a/adr/0004-rag-eval-framework.md b/adr/0004-rag-eval-toolset.md similarity index 100% rename from adr/0004-rag-eval-framework.md rename to adr/0004-rag-eval-toolset.md diff --git a/adr/0007-rag-eval-framework.md b/adr/0007-rag-eval-framework.md new file mode 100644 index 000000000..a1be25465 --- /dev/null +++ b/adr/0007-rag-eval-framework.md @@ -0,0 +1,233 @@ +# LeapfrogAI RAG Evaluation Framework MVP + +## Table of Contents + +- [LeapfrogAI RAG Evaluation Framework MVP](#leapfrogai-rag-evaluation-framework-mvp) + - [Table of Contents](#table-of-contents) + - [Status](#status) + - [Context](#context) + - [Decisions and Rationale](#decisions-and-rationale) + - [Tools](#tools) + - [Datasets](#datasets) + - [Models to Evaluate](#models-to-evaluate) + - [LLM-as-Judge / LLMs-as-Jury](#llm-as-judge--llms-as-jury) + - [Metrics / Evaluations](#metrics--evaluations) + - [Execution / Delivery](#execution--delivery) + - [Model Card](#model-card) + - [Related ADRs](#related-adrs) + - [References](#references) + +## Status + +APPROVED + +## Context + +LeapfrogAI uses RAG to provide context-aware responses to users who have specific data they need to reference. In order to make sure RAG is operating at the levels we need it to, we need to get measurable feedback from our RAG pipeline to make it better. We also need a standard to show to mission heroes that we are in fact operating at that level. We do this with RAG-focused evaluations. Additionally, utilizing evaluations as a whole and developing a standard approach will allow customizations of RAG and its components (for various deployment scenarios) to be better tested and evaluated against. This ADR documents all of the decisions and lessons learned for enabling a full-scale RAG evaluations pipeline MVP. + +## Decisions and Rationale + +This section covers all of the decision points that needed to be made along side an explanation of how those decisions were made. Each section covers a different aspect of the RAG evaluations framework. + +### Tools +
+ Details + + #### Decision + The primary toolset for architecting RAG evaluations will be **[DeepEval](https://docs.confident-ai.com/)**. + #### Rationale + Please see the the [RAG Evaluations Toolset](/adr/0004-rag-eval-toolset.md) ADR for an in-depth discussion of why DeepEval was chosen over other alternatives. + +
+ +### Datasets +
+ Details + + #### Decision + To handle RAG evaluations, two types of datasets were determined to be needed: + - Question/Answer (QA) + - Needle in a Haystack (NIAH) + + A QA dataset should contain a set of [test cases](https://docs.confident-ai.com/docs/evaluation-test-cases) that have: + - Questions, which will be prompted to the LLM + - Ground truth answers, which will be used to compare against the generated answer by the LLM + - Context, which will contain the correct piece of source documentation that supports the true answer + - The full source documentation from which the context is derived + + A dataset for [NIAH Testing](https://arize.com/blog-course/the-needle-in-a-haystack-test-evaluating-the-performance-of-llm-rag-systems/) should contain: + - A series of irrelevant texts of varying context length that have one point of information hidden within + + To support these needs, two datasets were created: + - [LFAI_RAG_qa_v1](https://huggingface.co/datasets/defenseunicorns/LFAI_RAG_qa_v1) + - [LFAI_RAG_niah_v1](https://huggingface.co/datasets/defenseunicorns/LFAI_RAG_niah_v1) + + These two datasets will be used as the basis for MVP LeapfrogAI RAG evaluations that require data sources. + + Advanced versions of these datasets will be needed after MVP status as LeapfrogAI baseline performance grows. If baseline LeapfrogAI can pass all tests and score top marks on all metrics for these tests, then the tests lose their ability to assist in tracking growth over time. + + An advanced QA dataset differs in the following ways: + - More documents to use as the basis for questions. This provides a larger pool that RAG has to perform retrieval on and provides more opportunities for question types + - Narrow the scope of the types of documents used. By keeping the topics of each document more similar to each other, this makes retrieval a more difficult task + + An advanced NIAH dataset has the following: + - A collection of documents (the haystack) where one document contains a target piece of information (the needle) hidden somewhere within + - The documents should be of the same topic (or in other words, be semantically similar) so it's not obvious which document has the right information + - The needle itself should also be topically related to the rest of the documents but identifiable as unique information (i.e this information should be not obvious and only exists in one location out of all the documents) + + #### Rationale + + These datasets were created because it filled a gap in the openly available datasets that could have been used. For example, in QA datasets, there did not exist any dataset that had all **4** components listed above. Many had the questions, answers, and context, but none also included the source documents in a readily accessible manner. Therefore, the fastest and most effective course of action was to generate a QA dataset from source documentation using the [DeepEval Synthesizer](https://docs.confident-ai.com/docs/evaluation-datasets-synthetic-data). The documentation that was used to create the QA dataset was chosen to be both representative of deployment needs (by including some DoD specific documentation) and a variety of topics (including technical documents and financial reports). + + As for the NIAH dataset, there was a similar "incompleteness" problem that was observed. While other iterations of NIAH datasets are more readily available than QA datasets, some [datasets](https://huggingface.co/datasets/nanotron/simple_needle_in_a_hay_stack) had haystacks constructed of small repeating sentences, which did not mirror what a deployment context is more likely to look like. Other implementations mirrored the original [NIAH experiment](https://x.com/GregKamradt/status/1722386725635580292?lang=en) using [Paul Graham essays](https://paulgraham.com/articles.html), but did not release their specific datasets. Therefore, it made sense to quickly generate a dataset that uses the same Paul Graham essays as context, while inserting individual "needles" into certain context lengths to create a custom dataset. LFAI_RAG_niah_v1 includes context lengths from 512 to 128k characters. + +
+ +### Models to Evaluate +
+ Details + + #### Decision + + The three models that will initially be evaluated are going to be: + + - [SynthIA-7B](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GPTQ) (the initial default model for LeapfrogAI) + - [Hermes 2 Pro](https://huggingface.co/defenseunicorns/Hermes-2-Pro-Mistral-7B-4bit-32g-GPTQ) (Defense Unicorns quantization) + - [Llama3.1-8B](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-bnb-4bit) (using a 4 bit quantization) + + GPT-4o will also be used as a point of comparison in the results. + + #### Rationale + Three models were chosen to evaluate against initially in order to balance the scale between complexity and variety. There are endless variations of models that could be evaluated against, but these ones were chosen with specific reasons in mind. + - **SynthIA-7B**: This model has been the default backbone of LeapfrogAI since the beginning and (at the time of writing this ADR) is still the default model deployment choice. It is a 4 bit QPTQ quantization, so it is small enough to load on edge deployments. It is also compatible with both backend deployment options: llama-cpp-python and vllm. As it is still the default model choice, it should be evaluated on to see how it performs as time has gone on. + - **Hermes 2 Pro**: This model is a fine-tune of the Mistral-7b-Instruct model using the [OpenHermes-2.5](https://huggingface.co/datasets/teknium/OpenHermes-2.5) dataset. Hermes 2 Pro also includes [Hermes Function Calling](https://github.com/NousResearch/Hermes-Function-Calling). This particular model is a 4 bit GPTQ quantization on the [VMWare Open Instruct](https://huggingface.co/datasets/vmware/open-instruct) dataset that was generated by Defense Unicorns. Hermes 2 Pro advances on Mistral 7b with excellent general task and conversation capabilities and enhanced function calling and generation of JSON structured outputs. This model also meets the requirements of being small enough to load in edge deployment scenarios. + - **Llama3.1-8B**: This model has been shown to be an exemplary addition to the small model space [(Model Card)](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md). With additional language capabilities (trained on 8 languages), the Llama3.1 family of models offers high performance under a variety of scenarios. The model that will be evaluated against is a 4 bit bnb quanitzation of LLama3.1-8B. This quantization again allows for smaller deployment scenarios and makes a more relevant comparison point to the models already in use within LeapfrogAI. + +All of the above models have similar vRAM requirements (able to be run on < 16Gb of vRAM), similar parameter count (7-8 billion parameters), and the same quantization level (4-bit). By balancing these factors, we can verify that each of these models can be swapped out for another and the system requirements do not need to change. This will assist in being able to provide comparisons that are different by as few variables as possible. + +As time goes on, additional models will be considered and added as comparison points. + +
+ +### LLM-as-Judge / LLMs-as-Jury +
+ Details + + #### Decision + + For the RAG Evals MVP, [Claude 3.5 Sonnet](https://www.anthropic.com/news/claude-3-5-sonnet) by Anthropic will be used as a single LLM-as-Judge. + + #### Rationale + + There are two points to rationalize; the model choice and the decision to use a single judge. + + In order to reach an MVP product, a single LLM judge will be utilized for the evaluations that require it. This will be the first stage so that the evaluation framework can begin receiving results. As progress is made, additional LLM-based judges will be incorporated to develop an LLM-jury styled approach. For context, please see the following [paper](https://arxiv.org/pdf/2404.18796). + + Claude 3.5 Sonnet was chosen to be used as the first judge due to it's high levels of [performance](https://artificialanalysis.ai/models/claude-35-sonnet), which is crucial when utilizing an LLM judge. Claude 3.5 Sonnet as compared to other models (as seen in it's [model card](https://www-cdn.anthropic.com/fed9cc193a14b84131812372d8d5857f8f304c52/Model_Card_Claude_3_Addendum.pdf)) outperforms other large models on various evaluation benchmarks. These benchmarks include: + - MMLU (general multitask reasoning) + - DROP (reading comprehension) + - BIG-Bench Hard (mixed task evaluations) + - Needle in a Haystack recall (for understanding lots of context) + - XSTest (for testing rejection of harmful requests) + + By utilizing a model that outperforms other similarly large models on all of these tasks, we can have confidence that we are using the most capable LLM-as-judge model. + + Additionally, Claude 3.5 Sonnet exists outside the family of models that will be evaluated against, which has been shown to be effective in comparison to using models of the same family due to [self-enhancement bias](https://arxiv.org/pdf/2306.05685). + +
+ +### Metrics / Evaluations +
+ Details + + #### Decision + + The LeapfrogAI RAG evaluation framework will utilize the following evaluations: + + LLM-as-a-judge metrics to use: + - [Contextual Recall](https://docs.confident-ai.com/docs/metrics-contextual-recall) (for evaluating retrieval) + - [Answer Correctness](https://docs.confident-ai.com/docs/metrics-llm-evals) (for evaluating generation) + - [Faithfulness](https://docs.confident-ai.com/docs/metrics-faithfulness) (for evaluating generation) + + Non-LLM-enabled evaluations: + - Needle in a Haystack (for evaluating retrieval and generation) + - Annotation Relevancy (for evaluating retrieval) + + Standard LLM benchmarks: + - [HumanEval](https://docs.confident-ai.com/docs/benchmarks-human-eval) (for evaluating code generation) + - [MMLU](https://docs.confident-ai.com/docs/benchmarks-mmlu) (for evaluating reasoning across multiple subjects; generation only) + + Performance Metrics: + - Total Execution Runtime + + #### Rationale + + These metrics were chosen to balance the explainability/understandability of non-LLM based evaluations and the flexibility/scalability of LLM-as-judge evaluations. + - Contextual Recall: evaluates the extent to which the context retrieved by RAG corresponds to an expected output + - Answer Correctness: evaluates if an answer generated by an LLM is accurate when compared to the question asked and its context + - Faithfulness: evaluates whether an answer generated by an LLM factually aligns with the context provided + - Needle in a Haystack (retrieval): determines if a needle of information is correctly retrieved from the vector store by RAG + - Needle in a Haystack (response): determines if a needle of information is correctly given in the final response of the LLM in a RAG pipeline + - HumanEval: Evaluates an LLM's code generation abilities (not RAG-enabled, but useful as an established baseline to compare against) + - MMLU: Evaluates an LLM's ability to reason on multiple task topics using multiple choice questions (not RAG-enabled, but useful as an established baseline to compare against) + - Annotation Relevancy: A custom metric that measures how often documents that have nothing to do with the question are cited in the annotations. Higher is better + + Established LLM benchmarks (MMLU and HumanEval) are included in this MVP evaluation framework despite not requiring information from a retrieval system. It's important that this framework have a few generation-only metrics to be better at diagnosing whether issues in performance are happening due to RAG or the model. The other metrics included in this MVP evaluate either the retrieval stage on its own or the information-assisted generation. If the metrics evaluated on the information-assisted generation (e.g Faithfulness or NIAH response) are scoring low, it is difficult to parse out whether or not the low score is caused by the information retrieval, the generation itself, or both. Having these benchmarks provides a way to validate whether or not the generation works as expected, indicating a potential problem with the retrieval. These benchmarks are also standard, and therefore used across many LLMs. Therefore, these values can be used when comparing what performance is expected of these models and what is being observed in LeapfrogAI. These benchmarks can assist in diagnosing problems with both quantization (which often don't have these benchmarks) and implementation differences. + + While these metrics are going to be utilized first to balance value-gained and time to implement, we will be adding additional evaluation metrics soon following MVP status. Potential options include: + - RAG retrieval Hit Rate: non-LLM metric that evaluates how often a retrieved context matches the expected context for a question/answer scenario + - Performance metrics: non-LLM metrics that measure performance targets such as runtime, compute (cpu and gpu), etc. (requires a standarized deployment context) + +
+ +### Execution / Delivery +
+ Details + + #### Decision + For MVP status, we will be running the evaluation framework in one-off instances utilizing the `leapfrogai_evals` module. This module contains the runners for the current evaluations and measures the metrics that have been established thus far. + + #### Rationale + In order to start getting feedback from evaluations, we simply need to get the results in whatever form we can. Since there is not an established cadence for how often evals will be run (a determination for post MVP), the storage of said evals does not need to be consistent at this time. + + The next steps for the execution and delivery of evals will likely be the following: + - Using the `leapfrogai_evals` module, evaluations will be run at a regular cadence in a Github workflow so that we have a standardized way of running evaluations that we can compare against. + - These evaluation results will be stored as artifacts in GitHub so that performance can be tracked over time across version releases. + +
+ +### Model Card +
+ Details + + #### Decision + + The model card will ultimately exist in a few forms: + + - A tabular representation that shows for a given model (or hyperparameter configuration) as a row, the columns consist of all of the scored metrics that were applied to that configuration. + - **Assumption**: A deployed instance of LeapfrogAI will likely always accompany UDS runtime. The evaluation results for a deployment will live in a table under its corresponding UDS runtime page. + - The evaluation outputs themselves will eventually be provided in `json` format for easier ingestion into observability tools or other additional frameworks. + - This will likely become more relevant after MVP status. + - This assumption will need to be vetted by the UDS team and therefore may have to be adjusted in the future. + + A model card report will consist of the table of evaluation metrics as well as a written summary of what the metrics mean, how they relate to specific performance considerations, as well as model recommendations. Therefore, this report can be generalized for a wide audience, but will need to be customized for a given potential deployment scenario. A metrics table may look something like this: + ![Screenshot from 2024-09-18 18-03-18](https://github.com/user-attachments/assets/479f385b-1d09-4842-b1f0-e2d8992b0b3d) + + #### Rationale + + The needs of the model card will likely evolve over time as the needs of delivering evaluations changes. This can be observed in three potential stages: + - Near-term: evaluations benefit the product team to help identify new model choices for new defaults, diagnose implementation bugs, and evaluate upgrades to the RAG pipeline. + - Data format needed: raw numbers, potentially in tabular format for ease of ingesting + - Mid-term: evaluations on default model options for mission heroes are part of the delivery process. These recommendations are provided to assist mission heroes in selecting the models they want in their deployments. + - Data format needed: same as near-term, but a higher emphasis on the report will be necessary + - Long-term: evaluations are ingrained within all LeapfrogAI deployments to diagnose potential runtime issues and to evaluate multiple model options directly within the cluster + - Data format needed: evaluations will need to be directly tied into other metrics-measuring tools, such as prometheus, to integrate directly into UDS runtime. + + By providing an iterable approach to delivering evaluation results, the model card's use-case will be able to evolve over time to scale to meet the needs of the product team, delivery team, and mission heroes. + +
+ +## Related ADRs +This ADR was influenced by the [RAG Evaluations Toolset](/adr/0004-rag-eval-toolset.md) ADR. + +## References diff --git a/bundles/dev/cpu/uds-bundle.yaml b/bundles/dev/cpu/uds-bundle.yaml index 0df6fd4d9..5e24c4eaa 100644 --- a/bundles/dev/cpu/uds-bundle.yaml +++ b/bundles/dev/cpu/uds-bundle.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json kind: UDSBundle metadata: diff --git a/bundles/dev/gpu/uds-bundle.yaml b/bundles/dev/gpu/uds-bundle.yaml index c6205f0c4..3ad029f99 100644 --- a/bundles/dev/gpu/uds-bundle.yaml +++ b/bundles/dev/gpu/uds-bundle.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json kind: UDSBundle metadata: diff --git a/bundles/dev/gpu/uds-config.yaml b/bundles/dev/gpu/uds-config.yaml index 9ad6cfdb4..1ef7a2634 100644 --- a/bundles/dev/gpu/uds-config.yaml +++ b/bundles/dev/gpu/uds-config.yaml @@ -9,8 +9,31 @@ variables: gpu_limit: 0 # runs on CPU until GPU limit is increased vllm: - gpu_limit: 1 # if <1, vllm won't work, VLLM is GPU only - #tensor_parallel_size: 1 # TODO: reintroduce when vllm changes get pulled in + trust_remote_code: "True" + tensor_parallel_size: "1" + enforce_eager: "False" + gpu_memory_utilization: "0.90" + worker_use_ray: "True" + engine_use_ray: "True" + quantization: "None" + load_format: "auto" + # LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development) + max_context_length: "32768" + stop_tokens: ", <|im_end|>, <|endoftext|>" + prompt_format_chat_system: "SYSTEM: {}\n" + prompt_format_chat_user: "USER: {}\n" + prompt_format_chat_assistant: "ASSISTANT: {}\n" + temperature: "0.1" + top_p: "1.0" + top_k: "0" + repetition_penalty: "1.0" + max_new_tokens: "8192" + # Pod deployment configuration + gpu_limit: "1" + gpu_runtime: "nvidia" + pvc_size: "15Gi" + pvc_access_mode: "ReadWriteOnce" + pvc_storage_class: "local-path" supabase: domain: "uds.dev" diff --git a/bundles/latest/cpu/uds-bundle.yaml b/bundles/latest/cpu/uds-bundle.yaml index 747645ae3..23170f504 100644 --- a/bundles/latest/cpu/uds-bundle.yaml +++ b/bundles/latest/cpu/uds-bundle.yaml @@ -1,38 +1,38 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json kind: UDSBundle metadata: name: leapfrogai description: A UDS bundle for deploying LeapfrogAI - version: 0.12.2-upstream + version: 0.13.1-upstream packages: # Supabase backend for the UI and API to interface with Postgresql - name: supabase repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/supabase - ref: 0.12.2-upstream + ref: 0.13.1-upstream # API - name: leapfrogai-api repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/leapfrogai-api - ref: 0.12.2-upstream + ref: 0.13.1-upstream # Chat Model - name: llama-cpp-python repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/llama-cpp-python - ref: 0.12.2-upstream + ref: 0.13.1-upstream # Text Embeddings Model - name: text-embeddings repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/text-embeddings - ref: 0.12.2-upstream + ref: 0.13.1-upstream # Transcription Model - name: whisper repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/whisper - ref: 0.12.2-upstream + ref: 0.13.1-upstream # UI - name: leapfrogai-ui repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/leapfrogai-ui - ref: 0.12.2-upstream + ref: 0.13.1-upstream diff --git a/bundles/latest/gpu/uds-bundle.yaml b/bundles/latest/gpu/uds-bundle.yaml index 3867749a4..39b0acf79 100644 --- a/bundles/latest/gpu/uds-bundle.yaml +++ b/bundles/latest/gpu/uds-bundle.yaml @@ -1,38 +1,38 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json kind: UDSBundle metadata: name: leapfrogai description: A UDS bundle for deploying LeapfrogAI - version: 0.12.2-upstream + version: 0.13.1-upstream packages: # Supabase backend for the UI and API to interface with Postgresql - name: supabase repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/supabase - ref: 0.12.2-upstream + ref: 0.13.1-upstream # OpenAI-like API - name: leapfrogai-api repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/leapfrogai-api - ref: 0.12.2-upstream + ref: 0.13.1-upstream # Model for generic chat and summarization - name: vllm repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/vllm - ref: 0.12.2-upstream + ref: 0.13.1-upstream # Model for providing vector embeddings for text - name: text-embeddings repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/text-embeddings - ref: 0.12.2-upstream + ref: 0.13.1-upstream # Model for converting audio to text - name: whisper repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/whisper - ref: 0.12.2-upstream + ref: 0.13.1-upstream # UI - name: leapfrogai-ui repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/leapfrogai-ui - ref: 0.12.2-upstream + ref: 0.13.1-upstream diff --git a/bundles/latest/gpu/uds-config.yaml b/bundles/latest/gpu/uds-config.yaml index 9ad6cfdb4..1ef7a2634 100644 --- a/bundles/latest/gpu/uds-config.yaml +++ b/bundles/latest/gpu/uds-config.yaml @@ -9,8 +9,31 @@ variables: gpu_limit: 0 # runs on CPU until GPU limit is increased vllm: - gpu_limit: 1 # if <1, vllm won't work, VLLM is GPU only - #tensor_parallel_size: 1 # TODO: reintroduce when vllm changes get pulled in + trust_remote_code: "True" + tensor_parallel_size: "1" + enforce_eager: "False" + gpu_memory_utilization: "0.90" + worker_use_ray: "True" + engine_use_ray: "True" + quantization: "None" + load_format: "auto" + # LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development) + max_context_length: "32768" + stop_tokens: ", <|im_end|>, <|endoftext|>" + prompt_format_chat_system: "SYSTEM: {}\n" + prompt_format_chat_user: "USER: {}\n" + prompt_format_chat_assistant: "ASSISTANT: {}\n" + temperature: "0.1" + top_p: "1.0" + top_k: "0" + repetition_penalty: "1.0" + max_new_tokens: "8192" + # Pod deployment configuration + gpu_limit: "1" + gpu_runtime: "nvidia" + pvc_size: "15Gi" + pvc_access_mode: "ReadWriteOnce" + pvc_storage_class: "local-path" supabase: domain: "uds.dev" diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index 897bfaf5d..98343ef7f 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -13,20 +13,20 @@ Please first see the pre-requisites listed on the LeapfrogAI documentation websi It is **_HIGHLY RECOMMENDED_** that PyEnv be installed on your machine, and a new virtual environment is created for every new development branch. -Follow the installation instructions outlined in the [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) repository to install Python 3.11.6: +Follow the installation instructions outlined in the [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) repository to install Python 3.11.9: ```bash # install the correct python version - pyenv install 3.11.6 + pyenv install 3.11.9 # create a new virtual environment named "leapfrogai" - pyenv virtualenv 3.11.6 leapfrogai + pyenv virtualenv 3.11.9 leapfrogai # activate the virtual environment pyenv activate leapfrogai ``` -If your installation process completes successfully but indicates missing packages such as `sqlite3`, execute the following command to install the required packages then proceed with the reinstallation of Python 3.11.6: +If your installation process completes successfully but indicates missing packages such as `sqlite3`, execute the following command to install the required packages then proceed with the reinstallation of Python 3.11.9: ```bash sudo apt-get install build-essential zlib1g-dev libffi-dev \ @@ -62,6 +62,52 @@ Many of the directories and sub-directories within this project contain Make tar Please refer to each Makefile for more arguments and details on what each target does and is dependent on. +## UDS Tasks + +UDS tasks use the UDS CLI runner, and are defined in the root `tasks.yaml` file. + +Currently, the only tasks within the file are for checking the progress of the LeapfrogAI towards the `Made for UDS` packaging standards. To run the task verification task you must have a [UDS Kubernetes cluster](../packages/k3d-gpu/README.md) and LeapfrogAI (GPU or CPU) deployed. After deploying both major capabilities, you can execute the following: + +```bash +uds run nightly-uds-badge-verification --no-progress +``` + +You should get an output similar to this, depending on how many components of LeapfrogAI are actually deployed: + +```bash + • Running "Create Reports Directory" + + ✔ Completed "Create Reports Directory" + + • Running "Run UDS Badge Verification Task" + + ✔ Completed "Run UDS Badge Verification Task" + + • Running "Clean Up Final Report" +----------------------------- +Package: leapfrogai-api + +❌ Errors: 4 +⚠️ Warnings: 3 + +❌ Error Descriptions: + - Endpoint leapfrogai-api.uds.dev is returning 404 + - Not all applicable network policies are using selectors + - Not all applicable network policies are using ports + - No monitors defined + +⚠️ Warning Descriptions: + - Version is not consistent across flavors and package + - Network policies with 'remoteGenerated: Anywhere' are present, review needed + - No SSO configuration found, review needed +----------------------------- +UDS Capability Issues + +❌ Error Descriptions: + - Not all pods have the istio sidecar +----------------------------- +``` + ## Environment Variables Be wary of `*config*.yaml` or `.env*` files that are in individual components of the stack. The component's README will usually tell the developer when to fill them out or supply environment variables to a script. @@ -81,6 +127,7 @@ uds zarf tools registry prune --confirm # create and deploy the new package # FLAVOR can be upstream (default) or registry1 - see README for availability details +# See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md) LOCAL_VERSION=dev FLAVOR=upstream REGISTRY_PORT=5000 ARCH=amd64 make build-api LOCAL_VERSION=dev FLAVOR=upstream REGISTRY_PORT=5000 ARCH=amd64 make deploy-api ``` @@ -107,6 +154,7 @@ uds zarf package deploy zarf-package-*.tar.zst --confirm ```bash # FLAVOR can be upstream (default) or registry1 - see README for availability details + # See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md) LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-cpu # ui, api, llama-cpp-python, text-embeddings, whisper, supabase # OR LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-gpu # ui, api, vllm, text-embeddings, whisper, supabase @@ -120,6 +168,7 @@ uds zarf package deploy zarf-package-*.tar.zst --confirm ```bash # FLAVOR can be upstream (default) or registry1 - see README for availability details + # See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md) LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-ui LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-api LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-supabase @@ -154,7 +203,7 @@ Although not provided in the example UDS bundle manifests found in this reposito - name: leapfrogai-api repository: ghcr.io/defenseunicorns/packages/leapfrogai/leapfrogai-api # x-release-please-start-version - ref: 0.12.2 + ref: 0.13.1 # x-release-please-end # THE BELOW LINES WERE ADDED FOR DEMONSTRATION PURPOSES @@ -188,6 +237,7 @@ To demonstrate what this would look like for an Apple Silicon Mac: ```bash # FLAVOR can be upstream (default) or registry1 - see README for availability details +# See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md) REG_PORT=5001 ARCH=arm64 LOCAL_VERSION=dev FLAVOR=upstream make build-cpu ``` @@ -195,6 +245,7 @@ To demonstrate what this would look like for an older Intel Mac: ```bash # FLAVOR can be upstream (default) or registry1 - see README for availability details +# See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md) REG_PORT=5001 ARCH=arm64 LOCAL_VERSION=dev FLAVOR=upstream make build-cpu ``` diff --git a/mk-clean.mk b/mk-clean.mk index ff7e8c61d..4ca00ae89 100644 --- a/mk-clean.mk +++ b/mk-clean.mk @@ -15,8 +15,8 @@ clean-artifacts: # Zarf packages, UDS bundles, Python build artifacts, etc. clean-cache: -rm -rf ./**/__pycache__ ./**/*/__pycache__ ./**/**/*/__pycache__ - -rm -rf ./**/*/.ruff_cache ./**/.ruff_cache - -rm -rf ./**/.pytest_cache ./**/*/.pytest_cache + -rm -rf ./.ruff_cache ./**/*/.ruff_cache ./**/.ruff_cache + -rm -rf ./.pytest_cache ./**/.pytest_cache ./**/*/.pytest_cache -rm -rf ./.mypy_cache clean-env: diff --git a/packages/api/README.md b/packages/api/README.md index aa2b34690..2d68d67f8 100644 --- a/packages/api/README.md +++ b/packages/api/README.md @@ -27,6 +27,13 @@ make build-api LOCAL_VERSION=dev FLAVOR=upstream uds zarf package deploy packages/api/zarf-package-leapfrogai-api-*-dev.tar.zst --confirm ``` +For other package flavors, use the following example: + +```bash +make build-api FLAVOR=registry1 +uds zarf package deploy packages/api/zarf-package-leapfrogai-api-*-dev.tar.zst --confirm +``` + ### Local Development See the [source code documentation](../../src/leapfrogai_api/README.md) for running the API from the source code for local Python environment development. diff --git a/packages/api/chart/templates/istio-admin.yaml b/packages/api/chart/templates/istio-admin.yaml new file mode 100644 index 000000000..c369e8786 --- /dev/null +++ b/packages/api/chart/templates/istio-admin.yaml @@ -0,0 +1,24 @@ +{{- if .Capabilities.APIVersions.Has "security.istio.io/v1beta1" }} +apiVersion: security.istio.io/v1beta1 +kind: AuthorizationPolicy +metadata: + name: api-block-metrics-access-from-public-gateway + namespace: {{ .Release.Namespace }} +spec: + selector: + matchLabels: + {{- include "chart.selectorLabels" . | nindent 6 }} + action: DENY + rules: + - to: + - operation: + ports: + - "8080" + paths: + - /metrics* + from: + - source: + notNamespaces: + - istio-admin-gateway + - monitoring +{{- end }} diff --git a/packages/api/chart/templates/uds-package.yaml b/packages/api/chart/templates/uds-package.yaml index a6a83dea8..17220788d 100644 --- a/packages/api/chart/templates/uds-package.yaml +++ b/packages/api/chart/templates/uds-package.yaml @@ -7,6 +7,11 @@ metadata: labels: {{- include "chart.labels" . | nindent 4 }} spec: + monitor: + - portName: http + targetPort: {{ .Values.api.service.port }} + selector: + {{- include "chart.selectorLabels" . | nindent 8 }} network: expose: - service: {{ include "chart.fullname" . }} diff --git a/packages/api/chart/values.yaml b/packages/api/chart/values.yaml index 65b397e46..4c217ba8a 100644 --- a/packages/api/chart/values.yaml +++ b/packages/api/chart/values.yaml @@ -25,6 +25,8 @@ api: value: "*.toml" - name: DEFAULT_EMBEDDINGS_MODEL value: "text-embeddings" + - name: DEV + value: "false" - name: PORT value: "8080" - name: SUPABASE_URL diff --git a/packages/api/common/zarf.yaml b/packages/api/common/zarf.yaml index 08f52f60a..3462103d2 100644 --- a/packages/api/common/zarf.yaml +++ b/packages/api/common/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: diff --git a/packages/api/values/registry1-values.yaml b/packages/api/values/registry1-values.yaml index d269c6415..91f92b168 100644 --- a/packages/api/values/registry1-values.yaml +++ b/packages/api/values/registry1-values.yaml @@ -1,9 +1,7 @@ api: image: repository: "registry1.dso.mil/ironbank/opensource/defenseunicorns/leapfrogai/api" - # x-release-please-start-version - tag: v0.12.2 - # x-release-please-end + tag: v###ZARF_CONST_IMAGE_VERSION### expose: "###ZARF_VAR_EXPOSE_API###" @@ -16,6 +14,8 @@ api: value: "*.toml" - name: DEFAULT_EMBEDDINGS_MODEL value: "###ZARF_VAR_DEFAULT_EMBEDDINGS_MODEL###" + - name: DEV + value: "###ZARF_VAR_DEV###" - name: PORT value: "8080" - name: SUPABASE_URL diff --git a/packages/api/values/upstream-values.yaml b/packages/api/values/upstream-values.yaml index 6d867260e..ef2dcdad9 100644 --- a/packages/api/values/upstream-values.yaml +++ b/packages/api/values/upstream-values.yaml @@ -14,6 +14,8 @@ api: value: "*.toml" - name: DEFAULT_EMBEDDINGS_MODEL value: "###ZARF_VAR_DEFAULT_EMBEDDINGS_MODEL###" + - name: DEV + value: "###ZARF_VAR_DEV###" - name: PORT value: "8080" - name: SUPABASE_URL diff --git a/packages/api/zarf-config.yaml b/packages/api/zarf-config.yaml new file mode 100644 index 000000000..475ac2d48 --- /dev/null +++ b/packages/api/zarf-config.yaml @@ -0,0 +1,6 @@ +package: + create: + set: + # x-release-please-start-version + image_version: "0.13.1" + # x-release-please-end diff --git a/packages/api/zarf.yaml b/packages/api/zarf.yaml index 4fa6c59f2..10a183e9c 100644 --- a/packages/api/zarf.yaml +++ b/packages/api/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: @@ -16,6 +16,9 @@ variables: description: "Flag to expose the OpenAPI schema for debugging." - name: DEFAULT_EMBEDDINGS_MODEL default: "text-embeddings" + - name: DEV + default: "false" + description: "Flag to enable development endpoints." components: - name: leapfrogai-api @@ -47,7 +50,7 @@ components: valuesFiles: - "values/registry1-values.yaml" images: - - "registry1.dso.mil/ironbank/opensource/defenseunicorns/leapfrogai/api:v0.12.2" + - "registry1.dso.mil/ironbank/opensource/defenseunicorns/leapfrogai/api:v###ZARF_PKG_TMPL_IMAGE_VERSION###" # TODO: replace with Ironbank image once hardened: registry1.dso.mil/ironbank/opensource/defenseunicorns/leapfrogai/api/migrations - "ghcr.io/defenseunicorns/leapfrogai/api-migrations:###ZARF_PKG_TMPL_IMAGE_VERSION###" - "registry1.dso.mil/ironbank/kiwigrid/k8s-sidecar:1.23.3" diff --git a/packages/llama-cpp-python/zarf.yaml b/packages/llama-cpp-python/zarf.yaml index 2320e5a26..49ac98f34 100644 --- a/packages/llama-cpp-python/zarf.yaml +++ b/packages/llama-cpp-python/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: diff --git a/packages/repeater/zarf.yaml b/packages/repeater/zarf.yaml index 0e1f76378..38d6090f2 100644 --- a/packages/repeater/zarf.yaml +++ b/packages/repeater/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 3c6d5c4f7..44ea46c1c 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: name: supabase diff --git a/packages/text-embeddings/zarf.yaml b/packages/text-embeddings/zarf.yaml index d11d50ff5..fc270d48a 100644 --- a/packages/text-embeddings/zarf.yaml +++ b/packages/text-embeddings/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: diff --git a/packages/ui/chart/templates/ui/service.yaml b/packages/ui/chart/templates/ui/service.yaml index 15243e806..2cb919567 100644 --- a/packages/ui/chart/templates/ui/service.yaml +++ b/packages/ui/chart/templates/ui/service.yaml @@ -18,11 +18,3 @@ spec: protocol: TCP port: {{ .Values.service.port }} targetPort: {{ .Values.service.port }} ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ include "chart.serviceAccountName" . }} - namespace: {{ .Release.Namespace | default "leapfrogai" }} - labels: - {{- include "chart.labels" . | nindent 4 }} diff --git a/packages/ui/zarf.yaml b/packages/ui/zarf.yaml index 14de3c89d..7a0741a5e 100644 --- a/packages/ui/zarf.yaml +++ b/packages/ui/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: diff --git a/packages/vllm/.env.example b/packages/vllm/.env.example index 1e3a00170..0a995e234 100644 --- a/packages/vllm/.env.example +++ b/packages/vllm/.env.example @@ -1,13 +1,12 @@ -export LAI_HF_HUB_ENABLE_HF_TRANSFER="1" -export LAI_REPO_ID="TheBloke/Synthia-7B-v2.0-GPTQ" -export LAI_REVISION="gptq-4bit-32g-actorder_True" -export LAI_QUANTIZATION="gptq" -export LAI_TENSOR_PARALLEL_SIZE=1 -export LAI_MODEL_SOURCE=".model/" -export LAI_MAX_CONTEXT_LENGTH=32768 -export LAI_STOP_TOKENS='["","<|endoftext|>","<|im_end|>"]' -export LAI_PROMPT_FORMAT_CHAT_SYSTEM="SYSTEM: {}\n" -export LAI_PROMPT_FORMAT_CHAT_ASSISTANT="ASSISTANT: {}\n" -export LAI_PROMPT_FORMAT_CHAT_USER="USER: {}\n" -export LAI_PROMPT_FORMAT_DEFAULTS_TOP_P=1.0 -export LAI_PROMPT_FORMAT_DEFAULTS_TOP_K=0 \ No newline at end of file +LFAI_REPO_ID="TheBloke/SynthIA-7B-v2.0-GPTQ" +LFAI_REVISION="gptq-4bit-32g-actorder_True" + +VLLM_TENSOR_PARALLEL_SIZE=1 +VLLM_TRUST_REMOTE_CODE=True +VLLM_MAX_CONTEXT_LENGTH=32768 +VLLM_ENFORCE_EAGER=False +VLLM_GPU_MEMORY_UTILIZATION=0.90 +VLLM_WORKER_USE_RAY=True +VLLM_ENGINE_USE_RAY=True +VLLM_QUANTIZATION=None +VLLM_LOAD_FORMAT=auto diff --git a/packages/vllm/Dockerfile b/packages/vllm/Dockerfile index 8676f5eda..f53088ead 100755 --- a/packages/vllm/Dockerfile +++ b/packages/vllm/Dockerfile @@ -6,8 +6,9 @@ FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 AS builder # set SDK location # set the pyenv and Python versions ARG SDK_DEST=src/leapfrogai_sdk/build \ - PYTHON_VERSION=3.11.6 \ - PYENV_GIT_TAG=v2.4.8 + PYTHON_VERSION=3.11.9 \ + PYENV_GIT_TAG=v2.4.8\ + COMPONENT_DIRECTORY="packages/vllm" # use root user for deps installation and nonroot user creation USER root @@ -41,7 +42,7 @@ USER nonroot # copy-in SDK from sdk stage and vllm source code from host WORKDIR /home/leapfrogai COPY --from=sdk --chown=nonroot:nonroot /leapfrogai/${SDK_DEST} ./${SDK_DEST} -COPY --chown=nonroot:nonroot packages/vllm packages/vllm +COPY --chown=nonroot:nonroot ${COMPONENT_DIRECTORY} packages/vllm # create virtual environment for light-weight portability and minimal libraries RUN curl https://pyenv.run | bash && \ @@ -54,10 +55,10 @@ RUN curl https://pyenv.run | bash && \ ENV PYENV_ROOT="/home/nonroot/.pyenv" \ PATH="/home/nonroot/.pyenv/bin:$PATH" -# Install Python 3.11.6, set it as global, and create a venv +# Install Python, set it as global, and create a venv RUN . ~/.bashrc && \ - PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install 3.11.6 && \ - pyenv global 3.11.6 && \ + PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install 3.11.9 && \ + pyenv global ${PYTHON_VERSION} && \ pyenv exec python -m venv .venv # set path to venv python @@ -67,26 +68,15 @@ RUN rm -f packages/vllm/build/*.whl && \ python -m pip wheel packages/vllm -w packages/vllm/build --find-links=${SDK_DEST} && \ pip install packages/vllm/build/lfai_vllm*.whl --no-index --find-links=packages/vllm/build/ +################# +# FINAL CONTAINER +################# + FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 # set SDK location ARG SDK_DEST=src/leapfrogai_sdk/build -# model-specific arguments -ARG ARG HF_HUB_ENABLE_HF_TRANSFER="1" \ - REPO_ID="TheBloke/Synthia-7B-v2.0-GPTQ" \ - REVISION="gptq-4bit-32g-actorder_True" \ - MODEL_SOURCE="/data/.model/" \ - MAX_CONTEXT_LENGTH=32768 \ - STOP_TOKENS='[""]' \ - PROMPT_FORMAT_CHAT_SYSTEM="SYSTEM: {}\n" \ - PROMPT_FORMAT_CHAT_USER="USER: {}\n" \ - PROMPT_FORMAT_CHAT_ASSISTANT="ASSISTANT: {}\n" \ - PROMPT_FORMAT_DEFAULTS_TOP_P=1.0 \ - PROMPT_FORMAT_DEFAULTS_TOP_K=0 \ - TENSOR_PARALLEL_SIZE=1 \ - QUANTIZATION="gptq" - # setup nonroot user and permissions USER root RUN groupadd -g 65532 vglusers && \ @@ -101,24 +91,10 @@ COPY --from=sdk --chown=nonroot:nonroot /leapfrogai/${SDK_DEST} ./${SDK_DEST} COPY --from=builder --chown=nonroot:nonroot /home/leapfrogai/.venv /home/leapfrogai/.venv COPY --from=builder --chown=nonroot:nonroot /home/leapfrogai/packages/vllm/src /home/leapfrogai/packages/vllm/src # copy-in python binaries -COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.pyenv/versions/3.11.6/ /home/nonroot/.pyenv/versions/3.11.6/ - -# load ARG values into env variables for pickup by confz -ENV LAI_HF_HUB_ENABLE_HF_TRANSFER=${HF_HUB_ENABLE_HF_TRANSFER} \ - LAI_REPO_ID=${REPO_ID} \ - LAI_REVISION=${REVISION} \ - LAI_MODEL_SOURCE=${MODEL_SOURCE} \ - LAI_MAX_CONTEXT_LENGTH=${MAX_CONTEXT_LENGTH} \ - LAI_STOP_TOKENS=${STOP_TOKENS} \ - LAI_PROMPT_FORMAT_CHAT_SYSTEM=${PROMPT_FORMAT_CHAT_SYSTEM} \ - LAI_PROMPT_FORMAT_CHAT_USER=${PROMPT_FORMAT_CHAT_USER} \ - LAI_PROMPT_FORMAT_CHAT_ASSISTANT=${PROMPT_FORMAT_CHAT_ASSISTANT} \ - LAI_PROMPT_FORMAT_DEFAULTS_TOP_P=${PROMPT_FORMAT_DEFAULTS_TOP_P} \ - LAI_PROMPT_FORMAT_DEFAULTS_TOP_K=${PROMPT_FORMAT_DEFAULTS_TOP_K} \ - LAI_TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE} \ - LAI_QUANTIZATION=${QUANTIZATION} \ - # remove vLLM callback to stats server - VLLM_NO_USAGE_STATS=1 +COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.pyenv/versions/${PYTHON_VERSION}/ /home/nonroot/.pyenv/versions/${PYTHON_VERSION}/ + +# remove vLLM callback to stats server +ENV VLLM_NO_USAGE_STATS=1 ENV PATH="/home/leapfrogai/.venv/bin:$PATH" diff --git a/packages/vllm/Makefile b/packages/vllm/Makefile index 98e8b29db..c764a78f2 100644 --- a/packages/vllm/Makefile +++ b/packages/vllm/Makefile @@ -1,6 +1,27 @@ +ARCH ?= amd64 +LOCAL_VERSION ?= $(shell git rev-parse --short HEAD) +DOCKER_FLAGS := + install: python -m pip install ../../src/leapfrogai_sdk python -m pip install -e ".[dev]" -dev: - python -m leapfrogai_sdk.cli --app-dir=src/ main:Model +download: + @env $$(cat .env | xargs) python src/model_download.py + +dev: download + @env $$(cat .env | xargs) python -m leapfrogai_sdk.cli --app-dir=src/ main:Model + +docker: download + docker build ${DOCKER_FLAGS} \ + --platform=linux/${ARCH} \ + --build-arg LOCAL_VERSION=${LOCAL_VERSION} \ + --build-arg COMPONENT_DIRECTORY="./" \ + -t ghcr.io/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION} \ + -f ./Dockerfile . + + docker run -it --rm \ + --env-file ./.env \ + -v $(PWD)/config.yaml:/home/leapfrogai/config.yaml \ + -v $(PWD)/.model:/home/leapfrogai/.model \ + ghcr.io/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION} diff --git a/packages/vllm/README.md b/packages/vllm/README.md index a55238cfd..5bc7a052f 100644 --- a/packages/vllm/README.md +++ b/packages/vllm/README.md @@ -16,13 +16,21 @@ See the LeapfrogAI documentation website for [system requirements](https://docs. The default model that comes with this backend in this repository's officially released images is a [4-bit quantization of the Synthia-7b model](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GPTQ). -You can optionally specify different models or quantization types using the following Docker build arguments: +All of the commands in this sub-section are executed within this `packages/vllm` sub-directory. -- `--build-arg HF_HUB_ENABLE_HF_TRANSFER="1"`: Enable or disable HuggingFace Hub transfer (default: 1) -- `--build-arg REPO_ID="TheBloke/Synthia-7B-v2.0-GPTQ"`: HuggingFace repository ID for the model -- `--build-arg REVISION="gptq-4bit-32g-actorder_True"`: Revision or commit hash for the model -- `--build-arg QUANTIZATION="gptq"`: Quantization type (e.g., gptq, awq, or empty for un-quantized) -- `--build-arg TENSOR_PARALLEL_SIZE="1"`: The number of gpus to spread the tensor processing across +Optionally, you can specify a different model during Zarf creation: + +```bash +uds zarf package create --confirm --set MODEL_REPO_ID=defenseunicorns/Hermes-2-Pro-Mistral-7B-4bit-32g --set MODEL_REVISION=main +``` + +If you decide to use a different model, there will likely be a need to change generation and engine runtime configurations, please see the [Zarf Package Config](./zarf-config.yaml) and the [values override file](./values/upstream-values.yaml) for details on what runtime parameters can be modified. These parameters are model-specific, and can be found in the HuggingFace model cards and/or configuration files (e.g., prompt templates). + +For example, during Zarf deployment, you can override the Zarf Package Config defaults by doing the following: + +```bash +uds zarf package deploy zarf-package-vllm-amd64-dev.tar.zst --confirm --set ENFORCE_EAGER=True +``` ### Deployment @@ -39,11 +47,26 @@ uds zarf package deploy packages/vllm/zarf-package-vllm-*-dev.tar.zst --confirm ### Local Development -To run the vllm backend locally: +In local development the [config.yaml](./config.yaml) and [.env.example](./.env.example) must be modified if the model has changed away from the default. The LeapfrogAI SDK picks up the `config.yaml` automatically, and the `.env` must be sourced into the Python environment. > [!IMPORTANT] > Execute the following commands from this sub-directory +Create a `.env` file based on the [`.env.example`](./.env.example): + +```bash +cp .env.example .env +source .env +``` + +As necessary, modify the existing [`config.yaml`](./config.yaml): + +```bash +vim config.yaml +``` + +To run the vllm backend locally: + ```bash # Install dev and runtime dependencies make install @@ -54,3 +77,19 @@ python src/model_download.py # Start the model backend make dev ``` + +#### Local Docker Container + +To run the Docker container, use the following Makefile commands. `LOCAL_VERSION` must be consistent across the two Make commands. + +In the root of the LeapfrogAI repository: + +```bash +LOCAL_VERSION=dev make sdk-wheel +``` + +In the root of this vLLM sub-directory: + +```bash +LOCAL_VERSION=dev make docker +``` diff --git a/packages/vllm/chart/templates/deployment.yaml b/packages/vllm/chart/templates/deployment.yaml index 7b88cc137..3f8aa0540 100644 --- a/packages/vllm/chart/templates/deployment.yaml +++ b/packages/vllm/chart/templates/deployment.yaml @@ -36,7 +36,7 @@ spec: [ "sh", "-c", - 'while [ ! -f /data/.model/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"', + 'while [ ! -f ###ZARF_CONST_MODEL_PATH###/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"', ] resources: {{- toYaml .Values.modelInjectionContainer.resources | nindent 12 }} @@ -46,6 +46,9 @@ spec: - name: leapfrogai-pv-storage persistentVolumeClaim: claimName: lfai-{{ .Values.nameOverride }}-pv-claim + - name: leapfrogai-sdk-configmap + configMap: + name: "{{ .Values.nameOverride }}-sdk-configmap" securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} containers: @@ -58,6 +61,9 @@ spec: env: {{- toYaml . | nindent 12 }} {{- end }} + envFrom: + - configMapRef: + name: "{{ .Values.nameOverride }}-engine-configmap" ports: - name: http containerPort: {{ .Values.service.port }} @@ -67,6 +73,10 @@ spec: volumeMounts: - name: leapfrogai-pv-storage mountPath: "/data" + - name: leapfrogai-sdk-configmap + mountPath: "/home/leapfrogai/config.yaml" + subPath: "config.yaml" + readOnly: true {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/packages/vllm/chart/templates/leapfrogai-sdk-configmap.yaml b/packages/vllm/chart/templates/leapfrogai-sdk-configmap.yaml new file mode 100644 index 000000000..cdc08be5e --- /dev/null +++ b/packages/vllm/chart/templates/leapfrogai-sdk-configmap.yaml @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{ .Values.nameOverride }}-sdk-configmap" + namespace: {{ .Release.Namespace | default "leapfrogai" }} +data: + config.yaml: | + model: + source: {{ .Values.leapfrogaiConfig.model.source | quote }} + max_context_length: {{ .Values.leapfrogaiConfig.maxContextLength | quote }} + stop_tokens: + {{- $stopTokens := .Values.leapfrogaiConfig.stopTokens }} + {{- range $stopToken := splitList ", " .Values.leapfrogaiConfig.stopTokens }} + - {{ printf "%s" $stopToken }} + {{- end }} + prompt_format: + {{- with .Values.leapfrogaiConfig.promptFormat.chat }} + chat: + {{- if .system }} + system: {{ .system | quote }} + {{- end }} + {{- if .assistant }} + assistant: {{ .assistant | quote }} + {{- end }} + {{- if .user }} + user: {{ .user | quote }} + {{- end }} + {{- if .function }} + function: {{ .function | quote }} + {{- end }} + {{- end }} + defaults: + temperature: {{ .Values.leapfrogaiConfig.defaults.temperature | quote }} + top_p: {{ .Values.leapfrogaiConfig.defaults.topP | quote }} + top_k: {{ .Values.leapfrogaiConfig.defaults.topK | quote }} + repetition_penalty: {{ .Values.leapfrogaiConfig.defaults.repetitionPenalty | quote }} + max_new_tokens: {{ .Values.leapfrogaiConfig.defaults.maxNewTokens | quote }} diff --git a/packages/vllm/chart/templates/vllm-engine-configmap.yaml b/packages/vllm/chart/templates/vllm-engine-configmap.yaml new file mode 100644 index 000000000..5ac82b42c --- /dev/null +++ b/packages/vllm/chart/templates/vllm-engine-configmap.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{ .Values.nameOverride }}-engine-configmap" + namespace: {{ .Release.Namespace | default "leapfrogai" }} +data: + VLLM_TRUST_REMOTE_CODE: "{{ .Values.vllmConfig.trustRemoteCode }}" + VLLM_TENSOR_PARALLEL_SIZE: "{{ .Values.vllmConfig.tensorParallelSize }}" + VLLM_ENFORCE_EAGER: "{{ .Values.vllmConfig.enforceEager }}" + VLLM_GPU_MEMORY_UTILIZATION: "{{ .Values.vllmConfig.gpuMemoryUtilization }}" + VLLM_WORKER_USE_RAY: "{{ .Values.vllmConfig.workerUseRay }}" + VLLM_ENGINE_USE_RAY: "{{ .Values.vllmConfig.engineUseRay }}" + VLLM_QUANTIZATION: "{{ .Values.vllmConfig.quantization }}" + VLLM_LOAD_FORMAT: "{{ .Values.vllmConfig.loadFormat }}" diff --git a/packages/vllm/chart/values.yaml b/packages/vllm/chart/values.yaml index 0f7fe9911..0209a8b34 100644 --- a/packages/vllm/chart/values.yaml +++ b/packages/vllm/chart/values.yaml @@ -13,6 +13,33 @@ image: nameOverride: "vllm" fullnameOverride: "" +leapfrogaiConfig: + model: + source: "/data/.model/" + maxContextLength: "32768" + stopTokens: ", <|im_end|>, <|endoftext|>" + promptFormat: + chat: + system: "SYSTEM: {}\n" + assistant: "ASSISTANT: {}\n" + user: "USER: {}\n" + defaults: + temperature: "0.1" + topP: "1.0" + topK: "0" + repetitionPenalty: "1.0" + maxNewTokens: "8192" + +vllmConfig: + trustRemoteCode: "True" + tensorParallelSize: "1" + enforceEager: "False" + gpuMemoryUtilization: "0.90" + workerUseRay: "True" + engineUseRay: "True" + quantization: "None" + loadFormat: "auto" + env: - name: LFAI_LOG_LEVEL value: "INFO" @@ -41,7 +68,7 @@ resources: limits: cpu: 0 memory: 0 - nvidia.com/gpu: 0 + nvidia.com/gpu: 1 requests: cpu: 0 memory: 0 diff --git a/packages/vllm/config.yaml b/packages/vllm/config.yaml new file mode 100644 index 000000000..22210a74b --- /dev/null +++ b/packages/vllm/config.yaml @@ -0,0 +1,17 @@ +model: + source: ".model/" +max_context_length: 32768 +stop_tokens: + - "<|im_end|>" + - "<|endoftext|>" + - "" +prompt_format: + chat: + system: "SYSTEM: {}\n" + assistant: "ASSISTANT: {}\n" + user: "USER: {}\n" +defaults: + top_p: 1.0 + top_k: 0 + repetition_penalty: 1.0 + max_new_tokens: 8192 diff --git a/packages/vllm/pyproject.toml b/packages/vllm/pyproject.toml index 4d7955708..24b1363e6 100644 --- a/packages/vllm/pyproject.toml +++ b/packages/vllm/pyproject.toml @@ -8,7 +8,7 @@ version = "0.13.1" dependencies = [ "pydantic == 2.8.2", - "vllm == 0.4.2", + "vllm == 0.4.3", "python-dotenv == 1.0.1", "aiostream ==0.6.2", "leapfrogai-sdk", diff --git a/packages/vllm/src/config.py b/packages/vllm/src/config.py index debca4ba3..c13af5521 100644 --- a/packages/vllm/src/config.py +++ b/packages/vllm/src/config.py @@ -5,10 +5,6 @@ class ConfigOptions(BaseConfig): - quantization: Literal[None, "awq", "gptq", "squeezellm"] = Field( - default=None, - description="Type of quantization, for un-quantized models omit this field", - ) tensor_parallel_size: int = Field( default=1, title="GPU Utilization Count", @@ -16,39 +12,105 @@ class ConfigOptions(BaseConfig): "This must be divisible to the number of attention heads in the model", examples=[1, 2, 3], ) + quantization: Literal[ + "aqlm", + "bitsandbytes", + "awq", + "deepspeedfp", + "fp8", + "marlin", + "gptq_marlin_24", + "gptq_marlin", + "gptq", + "squeezellm", + "sparseml", + "None", + "", + ] = Field( + title="quantization", + description="Quantization type of the model" + "Force GPTQ instead of GPTQ_Marlin by explicitly providing `gptq` as value.", + examples=["awq", "fp8", "gptq_marlin", "gptq", "squeezellm", "None"], + ) + load_format: Literal["auto", "safetensors", "npz", "pt", "bitsandbytes"] = Field( + title="quantization", + description="Load format for the type model and files", + examples=["auto", "safetensors", "npz", "pt", "bitsandbytes"], + ) + enforce_eager: bool = Field( + title="Enable Eager Mode", + description="Enable eager mode to start token generation immediately after prompt processing." + "Potentially reduces initial latency at the cost of slightly higher memory usage." + "Should be set to False in production environments with higher GPU memory.", + examples=[True, False], + ) + gpu_memory_utilization: float = Field( + title="GPU Memory Limit", + description="Maximum amount of GPU vRAM allocated to the vLLM engine and worker(s)", + examples=[0.50, 0.80, 0.90], + ) + engine_use_ray: bool = Field( + title="Use Ray for Engine", + description="If True, uses Ray for managing the execution engine. Allows for distributed inferencing in multi-node situations.", + examples=[True, False], + ) + worker_use_ray: bool = Field( + title="Use Ray for Worker", + description="If True, uses Ray for distributed worker management. Allows for distributed inferencing in multi-node situations.", + examples=[True, False], + ) + trust_remote_code: bool = Field( + title="Trust Downloaded Model Code", + description="Whether to trust inferencing code downloaded as part of the model download." + "Please review the Python code in the .model/ directory before trusting custom model code.", + examples=[True, False], + ) class DownloadOptions(BaseConfig): - hf_hub_enable_hf_transfer: Literal["0", "1"] = Field( - description="Option (0 - Disable, 1 - Enable) for faster transfers, tradeoff stability for faster speeds" - ) repo_id: str = Field( - description="HuggingFace repo id", + description="The HuggingFace git repository ID", examples=[ - "TheBloke/Synthia-7B-v2.0-GPTQ", - "migtissera/Synthia-MoE-v3-Mixtral-8x7B", - "microsoft/phi-2", + "defenseunicorns/Hermes-2-Pro-Mistral-7B-4bit-32g", + "justinthelaw/Phi-3-mini-128k-instruct-4bit-128g", ], ) revision: str = Field( - description="The model branch to use", + description="The HuggingFace repository git branch to use", examples=["main", "gptq-4bit-64g-actorder_True"], ) +# vLLM specific runtime configuration options class AppConfig(BaseConfig): backend_options: ConfigOptions + CONFIG_SOURCES = [ + EnvSource( + allow_all=True, + prefix="VLLM_", + remap={ + "tensor_parallel_size": "backend_options.tensor_parallel_size", + "trust_remote_code": "backend_options.trust_remote_code", + "enforce_eager": "backend_options.enforce_eager", + "quantization": "backend_options.quantization", + "gpu_memory_utilization": "backend_options.gpu_memory_utilization", + "worker_use_ray": "backend_options.worker_use_ray", + "engine_use_ray": "backend_options.engine_use_ray", + "load_format": "backend_options.load_format", + }, + ) + ] + + +class DownloadConfig(BaseConfig): download_options: Optional[DownloadOptions] CONFIG_SOURCES = [ EnvSource( allow_all=True, - prefix="LAI_", + prefix="LFAI_", remap={ - "hf_hub_enable_hf_transfer": "download_options.hf_hub_enable_hf_transfer", "repo_id": "download_options.repo_id", "revision": "download_options.revision", - "quantization": "backend_options.quantization", - "tensor_parallel_size": "backend_options.tensor_parallel_size", }, ) ] diff --git a/packages/vllm/src/main.py b/packages/vllm/src/main.py index 6a530e4f0..67d36d178 100644 --- a/packages/vllm/src/main.py +++ b/packages/vllm/src/main.py @@ -1,15 +1,12 @@ import asyncio -import json import logging import os import queue import random -import sys import threading import time from typing import Any, Dict, AsyncGenerator -from confz import EnvSource from dotenv import load_dotenv from vllm import SamplingParams from vllm.engine.arg_utils import AsyncEngineArgs @@ -18,15 +15,8 @@ from vllm.utils import random_uuid from config import AppConfig -from leapfrogai_sdk import ( - BackendConfig, - ChatCompletionRequest, - CompletionRequest, -) -from leapfrogai_sdk.llm import ( - GenerationConfig, - LLM, -) +from leapfrogai_sdk import BackendConfig +from leapfrogai_sdk.llm import GenerationConfig, LLM load_dotenv() @@ -84,60 +74,6 @@ def remove_iterator(self, async_iterable): pass # If the iterable is not found, ignore the error -def get_backend_configs(): - # Manually load env var as ConfZ does not handle complex types (list) - stop_tokens: str | None = os.getenv("LAI_STOP_TOKENS") - if stop_tokens: - processed_stop_tokens = json.loads(stop_tokens) - else: - processed_stop_tokens = [] - del os.environ["LAI_STOP_TOKENS"] - - env_source = EnvSource( - allow_all=True, - prefix="LAI_", - remap={ - "model_source": "model.source", - "max_context_length": "max_context_length", - "stop_tokens": "stop_tokens", - "prompt_format_chat_system": "prompt_format.chat.system", - "prompt_format_chat_assistant": "prompt_format.chat.assistant", - "prompt_format_chat_user": "prompt_format.chat.user", - "prompt_format_defaults_top_p": "prompt_format.defaults.top_p", - "prompt_format_defaults_top_k": "prompt_format.defaults.top_k", - }, - ) - - BackendConfig.CONFIG_SOURCES = env_source - # Initialize an immutable config from env variables without stop_tokens list - backend_configs: BackendConfig = BackendConfig() - # Updates "processed_stop_tokens" without triggering Pydantic validation errors - backend_configs.model_copy(update={"stop_tokens": processed_stop_tokens}) - - return backend_configs - - -def get_config_from_request(request: ChatCompletionRequest | CompletionRequest): - return GenerationConfig( - max_new_tokens=request.max_new_tokens, - temperature=request.temperature, - top_k=request.top_k, - top_p=request.top_p, - do_sample=request.do_sample, - n=request.n, - stop=list(request.stop), - repetition_penalty=request.repetition_penalty, - presence_penalty=request.presence_penalty, - best_of=str(request.best_of), - logit_bias=request.logit_bias, - return_full_text=request.return_full_text, - truncate=request.truncate, - typical_p=request.typical_p, - watermark=request.watermark, - seed=request.seed, - ) - - @LLM class Model: """Implements an LLM model with concurrent output generation and management.""" @@ -152,19 +88,26 @@ def __init__(self): _thread = threading.Thread(target=asyncio.run, args=(self.iterate_outputs(),)) _thread.start() - self.backend_config = get_backend_configs() - self.model = self.backend_config.model.source + quantization = ( + None + if AppConfig().backend_options.quantization in ["", "None"] + else AppConfig().backend_options.quantization + ) + self.engine_args = AsyncEngineArgs( - engine_use_ray=True, - model=self.model, - trust_remote_code=False, - quantization=AppConfig().backend_options.quantization, - max_seq_len_to_capture=self.backend_config.max_context_length, - max_model_len=self.backend_config.max_context_length, - dtype="auto", - worker_use_ray=True, - gpu_memory_utilization=0.90, + # Taken from the LFAI SDK general LLM configuration + model=BackendConfig().model.source, + max_seq_len_to_capture=BackendConfig().max_context_length, + max_model_len=BackendConfig().max_context_length, + # Taken from the vLLM-specific configuration + enforce_eager=AppConfig().backend_options.enforce_eager, + quantization=quantization, + load_format=AppConfig().backend_options.load_format, tensor_parallel_size=AppConfig().backend_options.tensor_parallel_size, + engine_use_ray=AppConfig().backend_options.engine_use_ray, + worker_use_ray=AppConfig().backend_options.worker_use_ray, + gpu_memory_utilization=AppConfig().backend_options.gpu_memory_utilization, + trust_remote_code=AppConfig().backend_options.trust_remote_code, ) self.engine = AsyncLLMEngine.from_engine_args(self.engine_args) print(self.engine_args) @@ -228,18 +171,39 @@ async def create_response( """Initiate a response generation for the given prompt and configuration, adding the result to the iterator pool.""" - sampling_params = SamplingParams( - temperature=config.temperature, - # Clamp top_p value to prevent float errors - top_p=clamp(config.top_p, 0.0 + sys.float_info.epsilon, 1.0), - # Restrict top_k to valid values, -1 disables top_k - top_k=config.top_k if config.top_k >= 1 else -1, - stop=self.backend_config.stop_tokens, - max_tokens=config.max_new_tokens, - skip_special_tokens=False, - ) + # Collect LeapfrogAI SDK-defined parameters not aligned with vLLM SamplingParams + params = { + "max_tokens": getattr(config, "max_new_tokens"), + } + + # Collect LeapfrogAI SDK-defined parameters directly aligned with vLLM SamplingParams + aligned_params = [ + "temperature", + "top_p", + "top_k", + "stop", + "n", + "repetition_penalty", + "presence_penalty", + "best_of", + "logit_bias", + "return_full_text", + "truncate", + "typical_p", + "seed", + ] + + # Add only the parameters that exist in the request + # vLLM will provide defaults for the rest, if not specified + for param in aligned_params: + if param in config: + params[param] = config[param] + + # Pass the collected params to vLLM SamplingParams + sampling_params = SamplingParams(**params) + logger.info(f"Begin generation for request {request_id}") - logger.debug(f"{request_id} sampling_paramms: {sampling_params}") + logger.debug(f"{request_id} sampling_params: {sampling_params}") # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. @@ -284,8 +248,12 @@ async def generate( request_id ): result = "" - if not self.is_queue_empty(request_id): - result = self.delta_queue_by_id.get(request_id).get() + + # Ensure that the queue is not None and contains items before calling .get() + cur_queue = self.delta_queue_by_id.get(request_id) + if cur_queue is not None and not cur_queue.empty(): + result = cur_queue.get() + yield result logger.info(f"Finished request {request_id}") diff --git a/packages/vllm/src/model_download.py b/packages/vllm/src/model_download.py index 29f88942c..b87b6a61e 100644 --- a/packages/vllm/src/model_download.py +++ b/packages/vllm/src/model_download.py @@ -1,18 +1,17 @@ import os from huggingface_hub import snapshot_download -from config import AppConfig +from config import DownloadConfig -REPO_ID = AppConfig().download_options.repo_id -REVISION = AppConfig().download_options.revision -os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = ( - AppConfig().download_options.hf_hub_enable_hf_transfer -) +REPO_ID = DownloadConfig().download_options.repo_id +REVISION = DownloadConfig().download_options.revision + +# enable hf_transfer to max-out model download bandwidth +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" print(f"Downloading model from {REPO_ID} at revision {REVISION}...") snapshot_download( repo_id=REPO_ID, local_dir=".model", - local_dir_use_symlinks=False, revision=REVISION, ) diff --git a/packages/vllm/values/upstream-values.yaml b/packages/vllm/values/upstream-values.yaml index 0fe581bdd..e74ebec4a 100644 --- a/packages/vllm/values/upstream-values.yaml +++ b/packages/vllm/values/upstream-values.yaml @@ -2,12 +2,55 @@ image: repository: "ghcr.io/defenseunicorns/leapfrogai/vllm" tag: "###ZARF_CONST_IMAGE_VERSION###" +nameOverride: "###ZARF_CONST_NAME_OVERRIDE###" + +leapfrogaiConfig: + model: + source: "###ZARF_CONST_MODEL_PATH###" + maxContextLength: "###ZARF_VAR_MAX_CONTEXT_LENGTH###" + stopTokens: "###ZARF_VAR_STOP_TOKENS###" + promptFormat: + chat: + system: "###ZARF_VAR_PROMPT_FORMAT_CHAT_SYSTEM###" + assistant: "###ZARF_VAR_PROMPT_FORMAT_CHAT_ASSISTANT###" + user: "###ZARF_VAR_PROMPT_FORMAT_CHAT_USER###" + defaults: + temperature: "###ZARF_VAR_TEMPERATURE###" + topP: "###ZARF_VAR_TOP_P###" + topK: "###ZARF_VAR_TOP_K###" + repetitionPenalty: "###ZARF_VAR_REPETITION_PENALTY###" + maxNewTokens: "###ZARF_VAR_MAX_NEW_TOKENS###" + + +vllmConfig: + trustRemoteCode: "###ZARF_VAR_TRUST_REMOTE_CODE###" + tensorParallelSize: "###ZARF_VAR_TENSOR_PARALLEL_SIZE###" + enforceEager: "###ZARF_VAR_ENFORCE_EAGER###" + gpuMemoryUtilization: "###ZARF_VAR_GPU_MEMORY_UTILIZATION###" + workerUseRay: "###ZARF_VAR_WORKER_USE_RAY###" + engineUseRay: "###ZARF_VAR_ENGINE_USE_RAY###" + quantization: "###ZARF_VAR_QUANTIZATION###" + loadFormat: "###ZARF_VAR_LOAD_FORMAT###" + +env: + - name: LFAI_LOG_LEVEL + value: "INFO" + gpu: runtimeClassName: "###ZARF_VAR_GPU_RUNTIME###" resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. limits: + cpu: 0 + memory: 0 nvidia.com/gpu: "###ZARF_VAR_GPU_LIMIT###" + requests: + cpu: 0 + memory: 0 persistence: size: "###ZARF_VAR_PVC_SIZE###" diff --git a/packages/vllm/zarf-config.yaml b/packages/vllm/zarf-config.yaml new file mode 100644 index 000000000..5f032eecb --- /dev/null +++ b/packages/vllm/zarf-config.yaml @@ -0,0 +1,39 @@ +package: + create: + set: + # x-release-please-start-version + image_version: "0.13.0" + # x-release-please-end + + model_repo_id: "TheBloke/Synthia-7B-v2.0-GPTQ" + model_revision: "gptq-4bit-32g-actorder_True" + model_path: "/data/.model/" + name_override: "vllm" + deploy: + set: + # vLLM runtime configuration (usually influenced by .env in local development) + trust_remote_code: "True" + tensor_parallel_size: "1" + enforce_eager: "False" + gpu_memory_utilization: "0.90" + worker_use_ray: "True" + engine_use_ray: "True" + quantization: "None" + load_format: "auto" + # LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development) + max_context_length: "32768" + stop_tokens: ", <|im_end|>, <|endoftext|>" + prompt_format_chat_system: "SYSTEM: {}\n" + prompt_format_chat_user: "USER: {}\n" + prompt_format_chat_assistant: "ASSISTANT: {}\n" + temperature: "0.1" + top_p: "1.0" + top_k: "0" + repetition_penalty: "1.0" + max_new_tokens: "8192" + # Pod deployment configuration + gpu_limit: "1" + gpu_runtime: "nvidia" + pvc_size: "15Gi" + pvc_access_mode: "ReadWriteOnce" + pvc_storage_class: "local-path" diff --git a/packages/vllm/zarf.yaml b/packages/vllm/zarf.yaml index ed88c2f18..f87564e36 100644 --- a/packages/vllm/zarf.yaml +++ b/packages/vllm/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: name: vllm @@ -9,27 +9,86 @@ metadata: constants: - name: IMAGE_VERSION value: "###ZARF_PKG_TMPL_IMAGE_VERSION###" + - name: MODEL_REPO_ID + description: "The HuggingFace repository ID" + value: "###ZARF_PKG_TMPL_MODEL_REPO_ID###" + - name: MODEL_REVISION + description: "The HuggingFace git branch or commit hash" + value: "###ZARF_PKG_TMPL_MODEL_REVISION###" + - name: MODEL_PATH + description: "Defines the location of the Zarf Injected model files in the vLLM container" + value: "###ZARF_PKG_TMPL_MODEL_PATH###" + - name: NAME_OVERRIDE + description: "Provide an override for the name of the deployment (e.g., the model name)" + value: "###ZARF_PKG_TMPL_NAME_OVERRIDE###" variables: + # vLLM runtime configuration (usually influenced by .env in local development) + - name: TRUST_REMOTE_CODE + description: "If True, allows the execution of code within the model files directory" + pattern: "^(True|False)$" + - name: TENSOR_PARALLEL_SIZE + description: "The number of tensor parallelism splits, typically used for model parallelism across GPUs" + pattern: "^[1-9][0-9]*$" + - name: ENFORCE_EAGER + description: "If set to True, enforces eager execution mode instead of lazy execution, impacting performance" + pattern: "^(True|False)$" + - name: GPU_MEMORY_UTILIZATION + description: "The fraction of GPU memory to be utilized, expressed as a decimal value between 0.01 and 0.99" + pattern: ^0\.(0[1-9]|[1-9][0-9])$ + - name: WORKER_USE_RAY + description: "If True, uses Ray for distributed worker management" + pattern: "^(True|False)$" + - name: ENGINE_USE_RAY + description: "If True, uses Ray for managing the execution engine" + pattern: "^(True|False)$" + - name: QUANTIZATION + description: "If None, allows vLLM to automatically detect via model files and configuration" + - name: LOAD_FORMAT + description: "If auto, allows vLLM to automatically detect via model files and configuration" + # LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development) + - name: MAX_CONTEXT_LENGTH + description: "The maximum number of tokens the model can process in a single input before the inferencing engine's overflow strategy is used" + pattern: "^[1-9][0-9]*$" + - name: STOP_TOKENS + description: "A set of special tokens that signal the model to stop producing further output, delimited using a comma and space" + pattern: ^(<[^,]+>\s*,\s*)*<[^,]+>\s*$ + - name: PROMPT_FORMAT_CHAT_SYSTEM + description: "Prompt template format for the LeapfrogAI SDK to consume and wrap" + - name: PROMPT_FORMAT_CHAT_USER + description: "Prompt template format for the LeapfrogAI SDK to consume and wrap" + - name: PROMPT_FORMAT_CHAT_ASSISTANT + description: "Prompt template format for the LeapfrogAI SDK to consume and wrap" + - name: TEMPERATURE + description: "Controls the randomness of the model's output" + pattern: ^(0(\.\d+)?|1(\.0+)?)$ + - name: TOP_P + description: "The cumulative probability threshold for token sampling, where 1.0 represents no restriction" + pattern: ^(0(\.\d+)?|1(\.0+)?)$ + - name: TOP_K + description: "The number of top-K tokens to consider during sampling, where 0 disables top-K sampling" + pattern: ^\d+$ + - name: REPETITION_PENALTY + description: "The penalty value for repetition in generation" + pattern: ^(0(\.\d+)?|1(\.0+)?)$ + - name: MAX_NEW_TOKENS + description: "Maximum new tokens to generate" + pattern: ^\d+$ + # Pod deployment configuration - name: GPU_LIMIT - description: The GPU limit for the model inferencing. Must be 1 or more. - default: "1" + description: "The GPU limit for the model inferencing. Must be 1 or more." pattern: "^[1-9][0-9]*$" - name: GPU_RUNTIME - description: The GPU runtime name for the model inferencing. - default: "nvidia" + description: "The GPU runtime name for the model inferencing." pattern: "^(nvidia)?$" - name: PVC_SIZE - description: Size of the PVC used for model storage. - default: "15Gi" + description: "Size of the PVC used for model storage." pattern: "^[0-9]+[a-zA-Z]+$" - name: PVC_ACCESS_MODE - description: Access mode of the PVC used for model storage. - default: "ReadWriteOnce" + description: "Access mode of the PVC used for model storage." pattern: "^(ReadWriteOnce|ReadOnlyMany|ReadWriteMany)$" - name: PVC_STORAGE_CLASS - description: Storage class of the PVC used for model storage. - default: "local-path" + description: "Storage class of the PVC used for model storage." components: - name: vllm-model @@ -37,33 +96,33 @@ components: only: flavor: upstream charts: - - name: vllm-model + - name: "###ZARF_PKG_TMPL_NAME_OVERRIDE###-model" namespace: leapfrogai localPath: chart - releaseName: vllm-model + releaseName: "###ZARF_PKG_TMPL_NAME_OVERRIDE###-model" # x-release-please-start-version version: 0.13.1 # x-release-please-end valuesFiles: - "values/upstream-values.yaml" images: - - ghcr.io/defenseunicorns/leapfrogai/vllm:###ZARF_PKG_TMPL_IMAGE_VERSION### - - cgr.dev/chainguard/bash:latest + - "ghcr.io/defenseunicorns/leapfrogai/vllm:###ZARF_PKG_TMPL_IMAGE_VERSION###" + - "cgr.dev/chainguard/bash:latest" dataInjections: - - source: .model/ + # location where locally downloaded model files are located + - source: ".model/" target: - namespace: leapfrogai - selector: app=lfai-vllm - container: data-loader - path: /data/.model + namespace: "leapfrogai" + selector: "app=lfai-###ZARF_PKG_TMPL_NAME_OVERRIDE###" + container: "data-loader" + # location in the container for injection of the model files + path: "###ZARF_PKG_TMPL_MODEL_PATH###" compress: true actions: onCreate: before: # NOTE: This assumes python is installed and in $PATH and 'huggingface_hub[cli,hf_transfer]' has been installed - - cmd: python src/model_download.py + - cmd: "python src/model_download.py" env: - - LAI_REPO_ID=TheBloke/Synthia-7B-v2.0-GPTQ - - LAI_REVISION=gptq-4bit-32g-actorder_True - - LAI_QUANTIZATION=gptq - - LAI_HF_HUB_ENABLE_HF_TRANSFER=1 + - LFAI_REPO_ID=###ZARF_PKG_TMPL_MODEL_REPO_ID### + - LFAI_REVISION=###ZARF_PKG_TMPL_MODEL_REVISION### diff --git a/packages/whisper/Dockerfile b/packages/whisper/Dockerfile index b3bed054a..a5513e9fa 100644 --- a/packages/whisper/Dockerfile +++ b/packages/whisper/Dockerfile @@ -37,8 +37,8 @@ COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/ # set the path to the cuda 11.8 dependencies ENV LD_LIBRARY_PATH \ - /leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cublas/lib:\ - /leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cudnn/lib +/leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cublas/lib:\ +/leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cudnn/lib COPY packages/whisper/main.py . diff --git a/packages/whisper/zarf.yaml b/packages/whisper/zarf.yaml index cc53f36b6..06ef87cce 100644 --- a/packages/whisper/zarf.yaml +++ b/packages/whisper/zarf.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json kind: ZarfPackageConfig metadata: diff --git a/src/leapfrogai_api/README.md b/src/leapfrogai_api/README.md index eec4dd0c6..214c986a9 100644 --- a/src/leapfrogai_api/README.md +++ b/src/leapfrogai_api/README.md @@ -56,3 +56,72 @@ See the ["Access" section of the DEVELOPMENT.md](../../docs/DEVELOPMENT.md#acces ### Tests See the [tests directory documentation](../../tests/README.md) for more details. + +### Reranking Configuration + +The LeapfrogAI API includes a Retrieval Augmented Generation (RAG) pipeline for enhanced question answering. This section details how to configure its reranking options. All RAG configurations are managed through the `/leapfrogai/v1/rag/configure` API endpoint. + +#### 1. Enabling/Disabling Reranking + +Reranking improves the accuracy and relevance of RAG responses. You can enable or disable it using the `enable_reranking` parameter: + +* **Enable Reranking:** Send a PATCH request to `/leapfrogai/v1/rag/configure` with the following JSON payload: + +```json +{ + "enable_reranking": true +} +``` + +* **Disable Reranking:** Send a PATCH request with: + +```json +{ + "enable_reranking": false +} +``` + +#### 2. Selecting a Reranking Model + +Multiple reranking models are supported, each offering different performance characteristics. Choose your preferred model using the `ranking_model` parameter. Ensure you've installed any necessary Python dependencies for your chosen model (see the [rerankers library documentation](https://github.com/AnswerDotAI/rerankers) on dependencies). + +* **Supported Models:** The system supports several models, including (but not limited to) `flashrank`, `rankllm`, `cross-encoder`, and `colbert`. Refer to the [rerankers library documentation](https://github.com/AnswerDotAI/rerankers) for a complete list and details on their capabilities. + +* **Model Selection:** Use a PATCH request to `/leapfrogai/v1/rag/configure` with the desired model: + +```json +{ + "enable_reranking": true, // Reranking must be enabled + "ranking_model": "rankllm" // Or another supported model +} +``` + +#### 3. Adjusting the Number of Results Before Reranking (`rag_top_k_when_reranking`) + +This parameter sets the number of top results retrieved from the vector database *before* the reranking process begins. A higher value increases the diversity of candidates considered for reranking but also increases processing time. A lower value can lead to missing relevant results if not carefully chosen. This setting is only relevant when reranking is enabled. + +* **Configuration:** Use a PATCH request to `/leapfrogai/v1/rag/configure` to set this value: + +```json +{ + "enable_reranking": true, + "ranking_model": "flashrank", + "rag_top_k_when_reranking": 150 // Adjust this value as needed +} +``` + +#### 4. Retrieving the Current RAG Configuration + +To check the current RAG configuration (including reranking status, model, and `rag_top_k_when_reranking`), send a GET request to `/leapfrogai/v1/rag/configure`. The response will be a JSON object containing all the current settings. + +#### 5. Example Configuration Flow + +1. **Initial Setup:** Start with reranking enabled using the default `flashrank` model and a `rag_top_k_when_reranking` value of 100. + +2. **Experiment with Models:** Test different reranking models (`rankllm`, `colbert`, etc.) by changing the `ranking_model` parameter and observing the impact on response quality. Adjust `rag_top_k_when_reranking` as needed to find the optimal balance between diversity and performance. + +3. **Fine-tuning:** Once you identify a suitable model, fine-tune the `rag_top_k_when_reranking` parameter for optimal performance. Monitor response times and quality to determine the best setting. + +4. **Disabling Reranking:** If needed, disable reranking by setting `"enable_reranking": false`. + +Remember to always consult the [rerankers library documentation](https://github.com/AnswerDotAI/rerankers) for information on supported models and their specific requirements. The API documentation provides further details on request formats and potential error responses. diff --git a/src/leapfrogai_api/backend/composer.py b/src/leapfrogai_api/backend/composer.py index b95e957a3..424e6c6d0 100644 --- a/src/leapfrogai_api/backend/composer.py +++ b/src/leapfrogai_api/backend/composer.py @@ -78,12 +78,25 @@ async def create_chat_messages( thread: Thread, additional_instructions: str | None, tool_resources: BetaThreadToolResources | None = None, - ) -> tuple[list[ChatMessage], list[str]]: + ) -> tuple[list[ChatMessage], SearchResponse]: + """Create chat message list for consumption by the LLM backend. + + Args: + request (RunCreateParamsRequest): The request object. + session (Session): The database session. + thread (Thread): The thread object. + additional_instructions (str | None): Additional instructions. + tool_resources (BetaThreadToolResources | None): The tool resources. + + Returns: + tuple[list[ChatMessage], SearchResponse]: The chat messages and any RAG responses. + """ # Get existing messages thread_messages: list[Message] = await self.list_messages(thread.id, session) + rag_responses: SearchResponse = SearchResponse(data=[]) if len(thread_messages) == 0: - return [], [] + return [], rag_responses def sort_by_created_at(msg: Message): return msg.created_at @@ -125,7 +138,6 @@ def sort_by_created_at(msg: Message): chat_messages.extend(chat_thread_messages) # 4 - The RAG results are appended behind the user's query - file_ids: set[str] = set() if request.can_use_rag(tool_resources) and chat_thread_messages: rag_message: str = "Here are relevant docs needed to reply:\n" @@ -138,22 +150,22 @@ def sort_by_created_at(msg: Message): vector_store_ids: list[str] = cast(list[str], file_search.vector_store_ids) for vector_store_id in vector_store_ids: - rag_responses: SearchResponse = await query_service.query_rag( + rag_responses = await query_service.query_rag( query=query_message.content_as_str(), vector_store_id=vector_store_id, ) + # Insert the RAG response messages just before the user's query for rag_response in rag_responses.data: - file_ids.add(rag_response.file_id) response_with_instructions: str = f"{rag_response.content}" rag_message += f"{response_with_instructions}\n" chat_messages.insert( len(chat_messages) - 1, # Insert right before the user message ChatMessage(role="user", content=rag_message), - ) # TODO: Should this go in user or something else like function? + ) - return chat_messages, list(file_ids) + return chat_messages, rag_responses async def generate_message_for_thread( self, @@ -182,7 +194,7 @@ async def generate_message_for_thread( else: tool_resources = None - chat_messages, file_ids = await self.create_chat_messages( + chat_messages, rag_responses = await self.create_chat_messages( request, session, thread, additional_instructions, tool_resources ) @@ -204,13 +216,15 @@ async def generate_message_for_thread( choice: ChatChoice = cast(ChatChoice, chat_response.choices[0]) - message = from_text_to_message(choice.message.content_as_str(), file_ids) + message: Message = from_text_to_message( + text=choice.message.content_as_str(), search_responses=rag_responses + ) create_message_request = CreateMessageRequest( role=message.role, content=message.content, attachments=message.attachments, - metadata=message.metadata.__dict__ if message.metadata else None, + metadata=vars(message.metadata), ) await create_message_request.create_message( @@ -249,7 +263,7 @@ async def stream_generate_message_for_thread( else: tool_resources = None - chat_messages, file_ids = await self.create_chat_messages( + chat_messages, rag_responses = await self.create_chat_messages( request, session, thread, additional_instructions, tool_resources ) @@ -274,13 +288,15 @@ async def stream_generate_message_for_thread( yield "\n\n" # Create an empty message - new_message: Message = from_text_to_message("", []) + new_message: Message = from_text_to_message( + text="", search_responses=SearchResponse(data=[]) + ) create_message_request = CreateMessageRequest( role=new_message.role, content=new_message.content, attachments=new_message.attachments, - metadata=new_message.metadata.__dict__ if new_message.metadata else None, + metadata=vars(new_message.metadata), ) new_message = await create_message_request.create_message( @@ -319,7 +335,9 @@ async def stream_generate_message_for_thread( yield "\n\n" index += 1 - new_message.content = from_text_to_message(response, file_ids).content + new_message.content = from_text_to_message( + text=response, search_responses=rag_responses + ).content new_message.created_at = int(time.time()) crud_message = CRUDMessage(db=session) diff --git a/src/leapfrogai_api/backend/converters.py b/src/leapfrogai_api/backend/converters.py index 8d31b23ba..1fbb844a2 100644 --- a/src/leapfrogai_api/backend/converters.py +++ b/src/leapfrogai_api/backend/converters.py @@ -4,6 +4,7 @@ from openai.types.beta import AssistantStreamEvent from openai.types.beta.assistant_stream_event import ThreadMessageDelta from openai.types.beta.threads.file_citation_annotation import FileCitation +from openai.types.beta.threads.file_path_annotation import FilePathAnnotation from openai.types.beta.threads import ( MessageContentPartParam, MessageContent, @@ -17,6 +18,9 @@ FileCitationAnnotation, ) +from leapfrogai_api.typedef.vectorstores.search_types import SearchResponse +from leapfrogai_api.typedef.common import MetadataObject + def from_assistant_stream_event_to_str(stream_event: AssistantStreamEvent): return f"event: {stream_event.event}\ndata: {stream_event.data.model_dump_json()}" @@ -44,24 +48,41 @@ def from_content_param_to_content( ) -def from_text_to_message(text: str, file_ids: list[str]) -> Message: - all_file_ids: str = "" +def from_text_to_message(text: str, search_responses: SearchResponse | None) -> Message: + """Loads text and RAG search responses into a Message object - for file_id in file_ids: - all_file_ids += f" [{file_id}]" + Args: + text: The text to load into the message + search_responses: The RAG search responses to load into the message - message_content: TextContentBlock = TextContentBlock( - text=Text( - annotations=[ + Returns: + The OpenAI compliant Message object + """ + + all_file_ids: str = "" + all_vector_ids: list[str] = [] + annotations: list[FileCitationAnnotation | FilePathAnnotation] = [] + + if search_responses: + for search_response in search_responses.data: + all_file_ids += f"[{search_response.file_id}]" + all_vector_ids.append(search_response.id) + file_name = search_response.metadata.get("source", "source") + annotations.append( FileCitationAnnotation( - text=f"[{file_id}]", - file_citation=FileCitation(file_id=file_id, quote=""), + text=f"【4:0†{file_name}】", # TODO: What should these numbers be? https://github.com/defenseunicorns/leapfrogai/issues/1110 + file_citation=FileCitation( + file_id=search_response.file_id, quote=search_response.content + ), start_index=0, end_index=0, type="file_citation", ) - for file_id in file_ids - ], + ) + + message_content: TextContentBlock = TextContentBlock( + text=Text( + annotations=annotations, value=text + all_file_ids, ), type="text", @@ -75,7 +96,9 @@ def from_text_to_message(text: str, file_ids: list[str]) -> Message: thread_id="", content=[message_content], role="assistant", - metadata=None, + metadata=MetadataObject( + vector_ids=all_vector_ids.__str__(), + ), ) return new_message diff --git a/src/leapfrogai_api/backend/grpc_client.py b/src/leapfrogai_api/backend/grpc_client.py index f9082fdc2..9d18d2951 100644 --- a/src/leapfrogai_api/backend/grpc_client.py +++ b/src/leapfrogai_api/backend/grpc_client.py @@ -63,7 +63,7 @@ async def completion(model: Model, request: lfai.CompletionRequest): CompletionChoice( index=0, text=response.choices[0].text, - finish_reason=finish_reason_enum.to_string(), + finish_reason=finish_reason_enum.to_finish_reason(), logprobs=None, ) ], @@ -122,7 +122,7 @@ async def chat_completion(model: Model, request: lfai.ChatCompletionRequest): ).lower(), content=response.choices[0].chat_item.content, ), - finish_reason=finish_reason_enum.to_string(), + finish_reason=finish_reason_enum.to_finish_reason(), ) ], usage=Usage( diff --git a/src/leapfrogai_api/backend/helpers.py b/src/leapfrogai_api/backend/helpers.py index 65a2fd0b5..005111601 100644 --- a/src/leapfrogai_api/backend/helpers.py +++ b/src/leapfrogai_api/backend/helpers.py @@ -39,7 +39,7 @@ async def recv_completion( index=0, text=c.choices[0].text, logprobs=None, - finish_reason=finish_reason_enum.to_string(), + finish_reason=finish_reason_enum.to_finish_reason(), ) ], usage=Usage( @@ -77,7 +77,7 @@ async def recv_chat( delta=ChatDelta( role="assistant", content=c.choices[0].chat_item.content ), - finish_reason=finish_reason_enum.to_string(), + finish_reason=finish_reason_enum.to_finish_reason(), ) ], usage=Usage( diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py index 764a65975..4c5d22470 100644 --- a/src/leapfrogai_api/backend/rag/index.py +++ b/src/leapfrogai_api/backend/rag/index.py @@ -81,6 +81,8 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil temp_file.write(file_bytes) temp_file.seek(0) documents = await load_file(temp_file.name) + for document in documents: + document.metadata["source"] = file_object.filename chunks = await split(documents) if len(chunks) == 0: diff --git a/src/leapfrogai_api/backend/rag/query.py b/src/leapfrogai_api/backend/rag/query.py index e5e0decce..bd0ae9bf6 100644 --- a/src/leapfrogai_api/backend/rag/query.py +++ b/src/leapfrogai_api/backend/rag/query.py @@ -1,11 +1,15 @@ """Service for querying the RAG model.""" +from rerankers.results import RankedResults from supabase import AClient as AsyncClient from langchain_core.embeddings import Embeddings from leapfrogai_api.backend.rag.leapfrogai_embeddings import LeapfrogAIEmbeddings from leapfrogai_api.data.crud_vector_content import CRUDVectorContent -from leapfrogai_api.typedef.vectorstores.search_types import SearchResponse +from leapfrogai_api.typedef.rag.rag_types import ConfigurationSingleton +from leapfrogai_api.typedef.vectorstores.search_types import SearchResponse, SearchItem from leapfrogai_api.backend.constants import TOP_K +from leapfrogai_api.utils.logging_tools import logger +from rerankers import Reranker # Allows for overwriting type of embeddings that will be instantiated embeddings_type: type[Embeddings] | type[LeapfrogAIEmbeddings] | None = ( @@ -22,7 +26,10 @@ def __init__(self, db: AsyncClient) -> None: self.embeddings = embeddings_type() async def query_rag( - self, query: str, vector_store_id: str, k: int = TOP_K + self, + query: str, + vector_store_id: str, + k: int = TOP_K, ) -> SearchResponse: """ Query the Vector Store. @@ -36,11 +43,70 @@ async def query_rag( SearchResponse: The search response from the vector store. """ + logger.debug("Beginning RAG query...") + # 1. Embed query vector = await self.embeddings.aembed_query(query) # 2. Perform similarity search + _k: int = k + if ConfigurationSingleton.get_instance().enable_reranking: + """Use the user specified top-k value unless reranking. + When reranking, use the reranking top-k value to get the initial results. + Then filter the list down later to just the k that the user has requested after reranking.""" + _k = ConfigurationSingleton.get_instance().rag_top_k_when_reranking + crud_vector_content = CRUDVectorContent(db=self.db) - return await crud_vector_content.similarity_search( - query=vector, vector_store_id=vector_store_id, k=k + results = await crud_vector_content.similarity_search( + query=vector, vector_store_id=vector_store_id, k=_k ) + + # 3. Rerank results + if ( + ConfigurationSingleton.get_instance().enable_reranking + and len(results.data) > 0 + ): + ranker = Reranker(ConfigurationSingleton.get_instance().ranking_model) + ranked_results: RankedResults = ranker.rank( + query=query, + docs=[result.content for result in results.data], + doc_ids=[result.id for result in results.data], + ) + results = rerank_search_response(results, ranked_results) + # Narrow down the results to the top-k value specified by the user + results.data = results.data[0:k] + + logger.debug("Ending RAG query...") + + return results + + +def rerank_search_response( + original_response: SearchResponse, ranked_results: RankedResults +) -> SearchResponse: + """ + Reorder the SearchResponse based on reranked results. + + Args: + original_response (SearchResponse): The original search response. + ranked_results (List[str]): List of ranked content strings. + + Returns: + SearchResponse: A new SearchResponse with reordered items. + """ + # Create a mapping of id to original SearchItem + content_to_item = {item.id: item for item in original_response.data} + + # Create new SearchItems based on reranked results + ranked_items = [] + for content in ranked_results.results: + if content.document.doc_id in content_to_item: + item: SearchItem = content_to_item[content.document.doc_id] + item.rank = content.rank + item.score = content.score + ranked_items.append(item) + + ranked_response = SearchResponse(data=ranked_items) + + # Create a new SearchResponse with reranked items + return ranked_response diff --git a/src/leapfrogai_api/data/crud_vector_content.py b/src/leapfrogai_api/data/crud_vector_content.py index 18c87a18a..d53118986 100644 --- a/src/leapfrogai_api/data/crud_vector_content.py +++ b/src/leapfrogai_api/data/crud_vector_content.py @@ -1,20 +1,11 @@ """CRUD Operations for VectorStore.""" -from pydantic import BaseModel from supabase import AClient as AsyncClient from leapfrogai_api.data.crud_base import get_user_id import ast from leapfrogai_api.typedef.vectorstores import SearchItem, SearchResponse from leapfrogai_api.backend.constants import TOP_K - - -class Vector(BaseModel): - id: str = "" - vector_store_id: str - file_id: str - content: str - metadata: dict - embedding: list[float] +from leapfrogai_api.typedef.vectorstores import Vector class CRUDVectorContent: @@ -65,6 +56,30 @@ async def add_vectors(self, object_: list[Vector]) -> list[Vector]: except Exception as e: raise e + async def get_vector(self, vector_id: str) -> Vector: + """Get a vector by its ID.""" + data, _count = ( + await self.db.table(self.table_name) + .select("*") + .eq("id", vector_id) + .single() + .execute() + ) + + _, response = data + + if isinstance(response["embedding"], str): + response["embedding"] = self.string_to_float_list(response["embedding"]) + + return Vector( + id=response["id"], + vector_store_id=response["vector_store_id"], + file_id=response["file_id"], + content=response["content"], + metadata=response["metadata"], + embedding=response["embedding"], + ) + async def delete_vectors(self, vector_store_id: str, file_id: str) -> bool: """Delete a vector store file by its ID.""" data, _count = ( diff --git a/src/leapfrogai_api/main.py b/src/leapfrogai_api/main.py index 85822f7f3..108ccd51e 100644 --- a/src/leapfrogai_api/main.py +++ b/src/leapfrogai_api/main.py @@ -8,12 +8,13 @@ from fastapi import FastAPI from fastapi.exception_handlers import request_validation_exception_handler from fastapi.exceptions import RequestValidationError - +from fastapi.responses import RedirectResponse from leapfrogai_api.routers.base import router as base_router from leapfrogai_api.routers.leapfrogai import auth from leapfrogai_api.routers.leapfrogai import models as lfai_models from leapfrogai_api.routers.leapfrogai import vector_stores as lfai_vector_stores from leapfrogai_api.routers.leapfrogai import count as lfai_token_count +from leapfrogai_api.routers.leapfrogai import rag as lfai_rag from leapfrogai_api.routers.openai import ( assistants, audio, @@ -29,6 +30,7 @@ vector_stores, ) from leapfrogai_api.utils import get_model_config +from prometheus_fastapi_instrumentator import Instrumentator logging.basicConfig( level=os.getenv("LFAI_LOG_LEVEL", logging.INFO), @@ -61,6 +63,21 @@ async def lifespan(app: FastAPI): app = FastAPI(lifespan=lifespan) +@app.get("/", include_in_schema=False) +async def root(): + """Intercepts the root path and redirects to the API documentation.""" + return RedirectResponse(url="/docs") + + +Instrumentator( + excluded_handlers=["/healthz", "/metrics"], + should_group_status_codes=False, +).instrument(app).expose( + app, + include_in_schema=False, +) + + @app.exception_handler(RequestValidationError) async def validation_exception_handler(request, exc): logger.error(f"The client sent invalid data!: {exc}") @@ -81,6 +98,8 @@ async def validation_exception_handler(request, exc): app.include_router(messages.router) app.include_router(runs_steps.router) app.include_router(lfai_vector_stores.router) +if os.environ.get("DEV"): + app.include_router(lfai_rag.router) app.include_router(lfai_token_count.router) app.include_router(lfai_models.router) # This should be at the bottom to prevent it preempting more specific runs endpoints diff --git a/src/leapfrogai_api/pyproject.toml b/src/leapfrogai_api/pyproject.toml index 01ae651b0..6779c3dbd 100644 --- a/src/leapfrogai_api/pyproject.toml +++ b/src/leapfrogai_api/pyproject.toml @@ -26,6 +26,8 @@ dependencies = [ "postgrest==0.16.11", # required by supabase, bug when using previous versions "openpyxl == 3.1.5", "psutil == 6.0.0", + "prometheus-fastapi-instrumentator == 7.0.0", + "rerankers[flashrank] == 0.5.3" ] requires-python = "~=3.11" diff --git a/src/leapfrogai_api/routers/leapfrogai/rag.py b/src/leapfrogai_api/routers/leapfrogai/rag.py new file mode 100644 index 000000000..3b61b616e --- /dev/null +++ b/src/leapfrogai_api/routers/leapfrogai/rag.py @@ -0,0 +1,56 @@ +"""LeapfrogAI endpoints for RAG.""" + +from fastapi import APIRouter +from leapfrogai_api.typedef.rag.rag_types import ( + ConfigurationSingleton, + ConfigurationPayload, +) +from leapfrogai_api.routers.supabase_session import Session +from leapfrogai_api.utils.logging_tools import logger + +router = APIRouter(prefix="/leapfrogai/v1/rag", tags=["leapfrogai/rag"]) + + +@router.patch("/configure") +async def configure(session: Session, configuration: ConfigurationPayload) -> None: + """ + Configures the RAG settings at runtime. + + Args: + session (Session): The database session. + configuration (Configuration): The configuration to update. + """ + + # We set the class variable to update the configuration globally + ConfigurationSingleton._instance = ConfigurationSingleton.get_instance().copy( + update=configuration.dict(exclude_none=True) + ) + + +@router.get("/configure") +async def get_configuration(session: Session) -> ConfigurationPayload: + """ + Retrieves the current RAG configuration. + + Args: + session (Session): The database session. + + Returns: + Configuration: The current RAG configuration. + """ + + instance = ConfigurationSingleton.get_instance() + + # Create a new dictionary with only the relevant attributes + config_dict = { + key: value + for key, value in instance.__dict__.items() + if not key.startswith("_") # Exclude private attributes + } + + # Create a new ConfigurationPayload instance with the filtered dictionary + new_configuration = ConfigurationPayload(**config_dict) + + logger.info(f"The current configuration has been set to {new_configuration}") + + return new_configuration diff --git a/src/leapfrogai_api/routers/leapfrogai/vector_stores.py b/src/leapfrogai_api/routers/leapfrogai/vector_stores.py index cd2899925..5251440c1 100644 --- a/src/leapfrogai_api/routers/leapfrogai/vector_stores.py +++ b/src/leapfrogai_api/routers/leapfrogai/vector_stores.py @@ -4,6 +4,7 @@ from leapfrogai_api.backend.rag.query import QueryService from leapfrogai_api.typedef.vectorstores import SearchResponse from leapfrogai_api.routers.supabase_session import Session +from leapfrogai_api.data.crud_vector_content import CRUDVectorContent, Vector from leapfrogai_api.backend.constants import TOP_K router = APIRouter( @@ -32,7 +33,26 @@ async def search( """ query_service = QueryService(db=session) return await query_service.query_rag( - query=query, - vector_store_id=vector_store_id, - k=k, + query=query, vector_store_id=vector_store_id, k=k ) + + +@router.get("/vector/{vector_id}") +async def get_vector( + session: Session, + vector_id: str, +) -> Vector: + """ + Get a specfic vector by its ID. + + Args: + session (Session): The database session. + vector_id (str): The ID of the vector. + + Returns: + Vector: The vector object. + """ + crud_vector_content = CRUDVectorContent(db=session) + vector = await crud_vector_content.get_vector(vector_id=vector_id) + + return vector diff --git a/src/leapfrogai_api/typedef/__init__.py b/src/leapfrogai_api/typedef/__init__.py index d65f47391..6e8c30d7b 100644 --- a/src/leapfrogai_api/typedef/__init__.py +++ b/src/leapfrogai_api/typedef/__init__.py @@ -1 +1,4 @@ -from .common import Usage as Usage +from .common import ( + Usage as Usage, + MetadataObject as MetadataObject, +) diff --git a/src/leapfrogai_api/typedef/assistants/assistant_types.py b/src/leapfrogai_api/typedef/assistants/assistant_types.py index 168a0e357..a59fb8f8d 100644 --- a/src/leapfrogai_api/typedef/assistants/assistant_types.py +++ b/src/leapfrogai_api/typedef/assistants/assistant_types.py @@ -27,14 +27,13 @@ logger = logging.getLogger(__name__) -class CreateAssistantRequest(BaseModel): - """Request object for creating an assistant.""" +class BaseAssistantRequest(BaseModel): + """ + Base Request object for creating or modifying an assistant. + This class should not be used directly. Use CreateAssistantRequest or ModifyAssistantRequest instead. + Model field is required for CreateAssistantRequest, but optional for ModifyAssistantRequest. + """ - model: str = Field( - default="llama-cpp-python", - examples=["llama-cpp-python"], - description="The model to be used by the assistant. Default is 'llama-cpp-python'.", - ) name: str | None = Field( default=None, examples=["Froggy Assistant"], @@ -202,11 +201,24 @@ async def attach_existing_vector_store_from_id(): self.tool_resources.file_search.vector_stores = None -class ModifyAssistantRequest(CreateAssistantRequest): +class CreateAssistantRequest(BaseAssistantRequest): + """Request object for creating an assistant.""" + + model: str = Field( + default="llama-cpp-python", + examples=["llama-cpp-python"], + description="The model to be used by the assistant. Default is 'llama-cpp-python'.", + ) + + +class ModifyAssistantRequest(BaseAssistantRequest): """Request object for modifying an assistant.""" - # Inherits all fields from CreateAssistantRequest - # All fields are optional for modification + model: str | None = Field( + default=None, + examples=["llama-cpp-python", None], + description="The model to be used by the assistant. Default is 'llama-cpp-python'.", + ) class ListAssistantsResponse(BaseModel): diff --git a/src/leapfrogai_api/typedef/common.py b/src/leapfrogai_api/typedef/common.py index 879dc0855..f00b2c4ed 100644 --- a/src/leapfrogai_api/typedef/common.py +++ b/src/leapfrogai_api/typedef/common.py @@ -2,6 +2,17 @@ from leapfrogai_api.backend.constants import DEFAULT_MAX_COMPLETION_TOKENS +class MetadataObject: + """A metadata object that can be serialized back to a dict.""" + + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + def __getattr__(self, key): + return self.__dict__.get(key) + + class Usage(BaseModel): """Usage object.""" diff --git a/src/leapfrogai_api/typedef/completion/completion_types.py b/src/leapfrogai_api/typedef/completion/completion_types.py index 9a5cdad95..f92d91f28 100644 --- a/src/leapfrogai_api/typedef/completion/completion_types.py +++ b/src/leapfrogai_api/typedef/completion/completion_types.py @@ -7,15 +7,48 @@ class FinishReason(Enum): - NONE = 0 # Maps to "None" - STOP = 1 # Maps to "stop" - LENGTH = 2 # Maps to "length" + NONE = 0 + STOP = 1 + LENGTH = 2 - def to_string(self) -> str | None: + def to_finish_reason(self) -> str | None: + """ + Convert the enum member to its corresponding finish reason string. + + Returns: + str | None: The finish reason as a lowercase string if it is not NONE; otherwise, None. + """ if self == FinishReason.NONE: return None return self.name.lower() + @classmethod + def _missing_(cls, value): + """ + Handle missing values when creating an enum instance. + + This method is called when a value passed to the enum constructor does not match any existing enum members. + It provides custom logic to map input values to enum members or raises an error if the value is invalid. + + Args: + value: The value that was not found among the enum members. + + Returns: + FinishReason: The corresponding enum member after applying custom mapping. + + Raises: + ValueError: If the value cannot be mapped to any enum member. + """ + # Handle custom value mappings + if value is None or value == "None": + return cls.NONE + elif value == "stop": + return cls.STOP + elif value == "length": + return cls.LENGTH + else: + raise ValueError(f"Invalid FinishReason value: {value}") + class CompletionChoice(BaseModel): """Choice object for completion.""" diff --git a/src/leapfrogai_api/typedef/rag/__init__.py b/src/leapfrogai_api/typedef/rag/__init__.py new file mode 100644 index 000000000..65c2e26cd --- /dev/null +++ b/src/leapfrogai_api/typedef/rag/__init__.py @@ -0,0 +1,3 @@ +from .rag_types import ( + ConfigurationSingleton as ConfigurationSingleton, +) diff --git a/src/leapfrogai_api/typedef/rag/rag_types.py b/src/leapfrogai_api/typedef/rag/rag_types.py new file mode 100644 index 000000000..17fe6601c --- /dev/null +++ b/src/leapfrogai_api/typedef/rag/rag_types.py @@ -0,0 +1,40 @@ +from typing import Optional + +from pydantic import BaseModel, Field + + +class ConfigurationSingleton: + """Singleton manager for ConfigurationPayload.""" + + _instance = None + + @classmethod + def get_instance(cls): + if cls._instance is None: + cls._instance = ConfigurationPayload() + cls._instance.enable_reranking = True + cls._instance.rag_top_k_when_reranking = 100 + cls._instance.ranking_model = "flashrank" + return cls._instance + + +class ConfigurationPayload(BaseModel): + """Response for RAG configuration.""" + + enable_reranking: Optional[bool] = Field( + default=None, + examples=[True, False], + description="Enables reranking for RAG queries", + ) + # More model info can be found here: + # https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file + # https://pypi.org/project/rerankers/ + ranking_model: Optional[str] = Field( + default=None, + description="What model to use for reranking. Some options may require additional python dependencies.", + examples=["flashrank", "rankllm", "cross-encoder", "colbert"], + ) + rag_top_k_when_reranking: Optional[int] = Field( + default=None, + description="The top-k results returned from the RAG call before reranking", + ) diff --git a/src/leapfrogai_api/typedef/vectorstores/__init__.py b/src/leapfrogai_api/typedef/vectorstores/__init__.py index 1491a9767..dde3c2860 100644 --- a/src/leapfrogai_api/typedef/vectorstores/__init__.py +++ b/src/leapfrogai_api/typedef/vectorstores/__init__.py @@ -7,6 +7,7 @@ ListVectorStoresResponse as ListVectorStoresResponse, ) from .search_types import ( + Vector as Vector, SearchItem as SearchItem, SearchResponse as SearchResponse, ) diff --git a/src/leapfrogai_api/typedef/vectorstores/search_types.py b/src/leapfrogai_api/typedef/vectorstores/search_types.py index 76abb0822..ea69df1fe 100644 --- a/src/leapfrogai_api/typedef/vectorstores/search_types.py +++ b/src/leapfrogai_api/typedef/vectorstores/search_types.py @@ -1,6 +1,17 @@ +from typing import Optional + from pydantic import BaseModel, Field +class Vector(BaseModel): + id: str = "" + vector_store_id: str + file_id: str + content: str + metadata: dict + embedding: list[float] + + class SearchItem(BaseModel): """Object representing a single item in a search result.""" @@ -16,6 +27,14 @@ class SearchItem(BaseModel): similarity: float = Field( ..., description="Similarity score of this item to the query." ) + rank: Optional[int] = Field( + default=None, + description="The rank of this search item after ranking has occurred.", + ) + score: Optional[float] = Field( + default=None, + description="The score of this search item after ranking has occurred.", + ) class SearchResponse(BaseModel): diff --git a/src/leapfrogai_api/utils/logging_tools.py b/src/leapfrogai_api/utils/logging_tools.py new file mode 100644 index 000000000..aa2448288 --- /dev/null +++ b/src/leapfrogai_api/utils/logging_tools.py @@ -0,0 +1,12 @@ +import os +import logging +from dotenv import load_dotenv + +load_dotenv() + +logging.basicConfig( + level=os.getenv("LFAI_LOG_LEVEL", logging.INFO), + format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s", +) + +logger = logging.getLogger(__name__) diff --git a/src/leapfrogai_evals/pyproject.toml b/src/leapfrogai_evals/pyproject.toml index 1974da81a..9726c51c0 100644 --- a/src/leapfrogai_evals/pyproject.toml +++ b/src/leapfrogai_evals/pyproject.toml @@ -8,7 +8,7 @@ version = "0.13.1" dependencies = [ "deepeval == 1.3.0", - "openai == 1.42.0", + "openai == 1.45.0", "tqdm == 4.66.5", "python-dotenv == 1.0.1", "seaborn == 0.13.2", @@ -16,7 +16,8 @@ dependencies = [ "huggingface-hub == 0.24.6", "anthropic ==0.34.2", "instructor ==1.4.3", - "pyPDF2 == 3.0.1" + "pyPDF2 == 3.0.1", + "python-dotenv == 1.0.1" ] requires-python = "~=3.11" readme = "README.md" diff --git a/src/leapfrogai_ui/src/app.css b/src/leapfrogai_ui/src/app.css index b1f6ef61b..1afa8ffdd 100644 --- a/src/leapfrogai_ui/src/app.css +++ b/src/leapfrogai_ui/src/app.css @@ -9,6 +9,17 @@ scrollbar-color: #4b5563 #1f2937; } +/* Override TailwindCSS default Preflight styles for lists in messages */ +#message-content-container { + ul { + margin: revert; + padding: revert; + li { + list-style: square; + } + } +} + /*TODO - can we get rid of some of these?*/ @layer utilities { .content { diff --git a/src/leapfrogai_ui/src/app.d.ts b/src/leapfrogai_ui/src/app.d.ts index d493910cc..f19b0b155 100644 --- a/src/leapfrogai_ui/src/app.d.ts +++ b/src/leapfrogai_ui/src/app.d.ts @@ -23,7 +23,6 @@ declare global { profile?: Profile; threads?: LFThread[]; assistants?: LFAssistant[]; - assistant?: LFAssistant; files?: FileObject[]; keys?: APIKeyRow[]; } diff --git a/src/leapfrogai_ui/src/lib/components/AssistantAvatar.svelte b/src/leapfrogai_ui/src/lib/components/AssistantAvatar.svelte index ceca70148..a5e6d8105 100644 --- a/src/leapfrogai_ui/src/lib/components/AssistantAvatar.svelte +++ b/src/leapfrogai_ui/src/lib/components/AssistantAvatar.svelte @@ -33,8 +33,7 @@ ignoreLocation: true }; - $: fileNotUploaded = !$form.avatarFile; // if on upload tab, you must upload a file to enable save - + $: fileNotUploaded = !$form.avatar && !$form.avatarFile; // if on upload tab, you must upload a file to enable save $: avatarToShow = $form.avatarFile ? URL.createObjectURL($form.avatarFile) : $form.avatar; $: fileTooBig = $form.avatarFile?.size > MAX_AVATAR_SIZE; @@ -66,9 +65,7 @@ modalOpen = false; $form.avatar = originalAvatar; tempPictogram = selectedPictogramName; // reset to original pictogram - if ($form.avatar) { - $form.avatarFile = $form.avatar; // reset to original file - } else { + if (!$form.avatar) { clearFileInput(); } fileUploaderRef.value = ''; // Reset the file input value to ensure input event detection @@ -102,7 +99,7 @@ } } else { // pictogram tab - selectedPictogramName = tempPictogram; // TODO - can we remove this line + selectedPictogramName = tempPictogram; $form.pictogram = tempPictogram; $form.avatar = ''; // remove saved avatar clearFileInput(); @@ -197,8 +194,6 @@ > Upload from computer - - {#if hideUploader} @@ -222,7 +217,9 @@ - + { @@ -236,5 +233,6 @@ name="avatarFile" class="sr-only" /> - + + diff --git a/src/leapfrogai_ui/src/lib/components/AssistantCard.svelte b/src/leapfrogai_ui/src/lib/components/AssistantCard.svelte index ceabb4098..dfa88a3e4 100644 --- a/src/leapfrogai_ui/src/lib/components/AssistantCard.svelte +++ b/src/leapfrogai_ui/src/lib/components/AssistantCard.svelte @@ -1,10 +1,10 @@ diff --git a/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.svelte b/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.svelte index 74d0f8ba9..6cf1ab3e5 100644 --- a/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.svelte +++ b/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.svelte @@ -2,7 +2,7 @@ import { fade } from 'svelte/transition'; import { filesStore } from '$stores'; import type { FilesForm } from '$lib/types/files'; - import { ACCEPTED_FILE_TYPES, STANDARD_FADE_DURATION } from '$constants'; + import { ACCEPTED_DOC_TYPES, STANDARD_FADE_DURATION } from '$constants'; import AssistantFileDropdown from '$components/AssistantFileDropdown.svelte'; import FileUploaderItem from '$components/FileUploaderItem.svelte'; @@ -17,7 +17,7 @@ .filter((id) => $filesStore.selectedAssistantFileIds.includes(id)); - +
{#each filteredStoreFiles as file} diff --git a/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.test.ts b/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.test.ts index 6bb15f2ae..61c3efed9 100644 --- a/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.test.ts +++ b/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.test.ts @@ -4,14 +4,14 @@ import AssistantFileSelect from '$components/AssistantFileSelect.svelte'; import { superValidate } from 'sveltekit-superforms'; import { yup } from 'sveltekit-superforms/adapters'; import { filesSchema } from '$schemas/files'; -import type { FileRow } from '$lib/types/files'; +import type { LFFileObject } from '$lib/types/files'; import { getUnixSeconds } from '$helpers/dates'; import userEvent from '@testing-library/user-event'; const filesForm = await superValidate({}, yup(filesSchema), { errors: false }); describe('AssistantFileSelect', () => { - const mockFiles: FileRow[] = [ + const mockFiles: LFFileObject[] = [ { id: '1', filename: 'file1.pdf', status: 'complete', created_at: getUnixSeconds(new Date()) }, { id: '2', filename: 'file2.pdf', status: 'error', created_at: getUnixSeconds(new Date()) }, { id: '3', filename: 'file3.txt', status: 'uploading', created_at: getUnixSeconds(new Date()) } diff --git a/src/leapfrogai_ui/src/lib/components/AssistantForm.svelte b/src/leapfrogai_ui/src/lib/components/AssistantForm.svelte index 815e009b2..8e7c97a5a 100644 --- a/src/leapfrogai_ui/src/lib/components/AssistantForm.svelte +++ b/src/leapfrogai_ui/src/lib/components/AssistantForm.svelte @@ -6,11 +6,11 @@ } from '$lib/constants'; import { superForm } from 'sveltekit-superforms'; import { page } from '$app/stores'; - import { beforeNavigate, goto, invalidate } from '$app/navigation'; + import { beforeNavigate, goto } from '$app/navigation'; import { Button, Modal, P } from 'flowbite-svelte'; import Slider from '$components/Slider.svelte'; import { yup } from 'sveltekit-superforms/adapters'; - import { filesStore, toastStore, uiStore } from '$stores'; + import { assistantsStore, filesStore, toastStore, uiStore } from '$stores'; import { assistantInputSchema, editAssistantInputSchema } from '$lib/schemas/assistants'; import type { NavigationTarget } from '@sveltejs/kit'; import { onMount } from 'svelte'; @@ -25,6 +25,10 @@ let bypassCancelWarning = false; + $: assistant = $assistantsStore.assistants.find( + (assistant) => assistant.id === $page.params.assistantId + ); + const { form, errors, enhance, submitting, isTainted, delayed } = superForm(data.form, { invalidateAll: false, validators: yup(isEditMode ? editAssistantInputSchema : assistantInputSchema), @@ -55,8 +59,12 @@ } bypassCancelWarning = true; - await invalidate('lf:assistants'); - goto(result.data.redirectUrl); + if (isEditMode) { + assistantsStore.updateAssistant(result.data.assistant); + } else { + assistantsStore.addAssistant(result.data.assistant); + } + await goto(result.data.redirectUrl); } else if (result.type === 'failure') { // 400 errors will show errors for the respective fields, do not show toast if (result.status !== 400) { @@ -174,7 +182,7 @@
diff --git a/src/leapfrogai_ui/src/lib/components/AssistantProgressToast.test.ts b/src/leapfrogai_ui/src/lib/components/AssistantProgressToast.test.ts index fb21bd849..fc1d5c5e4 100644 --- a/src/leapfrogai_ui/src/lib/components/AssistantProgressToast.test.ts +++ b/src/leapfrogai_ui/src/lib/components/AssistantProgressToast.test.ts @@ -10,7 +10,7 @@ import AssistantProgressToast from '$components/AssistantProgressToast.svelte'; import { render, screen } from '@testing-library/svelte'; import filesStore from '$stores/filesStore'; import { getFakeFiles } from '$testUtils/fakeData'; -import { convertFileObjectToFileRows } from '$helpers/fileHelpers'; +import { convertFileObjectToLFFileObject } from '$helpers/fileHelpers'; import { delay } from 'msw'; import { vi } from 'vitest'; import { toastStore } from '$stores'; @@ -27,7 +27,7 @@ describe('AssistantProgressToast', () => { fileIds: files.map((file) => file.id), vectorStoreId: '123' }; - filesStore.setFiles(convertFileObjectToFileRows(files)); + filesStore.setFiles(convertFileObjectToLFFileObject(files)); const timeout = 10; //10ms render(AssistantProgressToast, { timeout, toast }); //10ms timeout diff --git a/src/leapfrogai_ui/src/lib/components/ChatFileUpload.svelte b/src/leapfrogai_ui/src/lib/components/ChatFileUpload.svelte index e01575ce2..73356ee1a 100644 --- a/src/leapfrogai_ui/src/lib/components/ChatFileUpload.svelte +++ b/src/leapfrogai_ui/src/lib/components/ChatFileUpload.svelte @@ -1,7 +1,7 @@
diff --git a/src/leapfrogai_ui/src/lib/components/modals/ConfirmFilesDeleteModal.svelte b/src/leapfrogai_ui/src/lib/components/modals/ConfirmFilesDeleteModal.svelte index d80d93147..d581f83cd 100644 --- a/src/leapfrogai_ui/src/lib/components/modals/ConfirmFilesDeleteModal.svelte +++ b/src/leapfrogai_ui/src/lib/components/modals/ConfirmFilesDeleteModal.svelte @@ -3,7 +3,6 @@ import type { Assistant } from 'openai/resources/beta/assistants'; import { filesStore, toastStore } from '$stores'; import { ExclamationCircleOutline } from 'flowbite-svelte-icons'; - import { invalidate } from '$app/navigation'; import { createEventDispatcher } from 'svelte'; import vectorStatusStore from '$stores/vectorStatusStore'; @@ -12,6 +11,8 @@ export let deleting: boolean; export let affectedAssistants: Assistant[]; + $: isMultipleFiles = $filesStore.selectedFileManagementFileIds.length > 1; + const dispatch = createEventDispatcher(); const handleCancel = () => { @@ -20,34 +21,43 @@ affectedAssistantsLoading = false; }; + const handleDeleteError = () => { + toastStore.addToast({ + kind: 'error', + title: `Error Deleting ${isMultipleFiles ? 'Files' : 'File'}` + }); + }; + const handleConfirmedDelete = async () => { - const isMultipleFiles = $filesStore.selectedFileManagementFileIds.length > 1; deleting = true; - const res = await fetch('/api/files/delete', { - method: 'DELETE', - body: JSON.stringify({ ids: $filesStore.selectedFileManagementFileIds }), - headers: { - 'Content-Type': 'application/json' - } - }); - open = false; - await invalidate('lf:files'); - if (res.ok) { - toastStore.addToast({ - kind: 'success', - title: `${isMultipleFiles ? 'Files' : 'File'} Deleted` - }); - } else { - toastStore.addToast({ - kind: 'error', - title: `Error Deleting ${isMultipleFiles ? 'Files' : 'File'}` + try { + const res = await fetch('/api/files/delete', { + method: 'DELETE', + body: JSON.stringify({ ids: $filesStore.selectedFileManagementFileIds }), + headers: { + 'Content-Type': 'application/json' + } }); - } - vectorStatusStore.removeFiles($filesStore.selectedFileManagementFileIds); - filesStore.setSelectedFileManagementFileIds([]); + if (res.ok) { + open = false; + for (const id of $filesStore.selectedFileManagementFileIds) { + filesStore.removeFile(id); + } + vectorStatusStore.removeFiles($filesStore.selectedFileManagementFileIds); + filesStore.setSelectedFileManagementFileIds([]); + toastStore.addToast({ + kind: 'success', + title: `${isMultipleFiles ? 'Files' : 'File'} Deleted` + }); + dispatch('delete'); + } else { + handleDeleteError(); + } + } catch { + handleDeleteError(); + } deleting = false; - dispatch('delete'); }; $: fileNames = $filesStore.files diff --git a/src/leapfrogai_ui/src/lib/components/modals/DeleteApiKeyModal.svelte b/src/leapfrogai_ui/src/lib/components/modals/DeleteApiKeyModal.svelte index 58b0d9d58..c0c7083a8 100644 --- a/src/leapfrogai_ui/src/lib/components/modals/DeleteApiKeyModal.svelte +++ b/src/leapfrogai_ui/src/lib/components/modals/DeleteApiKeyModal.svelte @@ -10,10 +10,12 @@ export let selectedRowIds: string[]; export let deleting: boolean; + $: isMultiple = selectedRowIds.length > 1; + const dispatch = createEventDispatcher(); - $: keyNames = $page.data.keys - ? $page.data.keys + $: keyNames = $page.data.apiKeys + ? $page.data.apiKeys .map((key) => { if (selectedRowIds.includes(key.id)) return key.name; }) @@ -25,27 +27,35 @@ confirmDeleteModalOpen = false; }; + const handleDeleteError = () => { + toastStore.addToast({ + kind: 'error', + title: `Error Deleting ${isMultiple ? 'Keys' : 'Key'}` + }); + }; + const handleDelete = async () => { deleting = true; - const isMultiple = selectedRowIds.length > 1; - const res = await fetch('/api/api-keys/delete', { - body: JSON.stringify({ ids: selectedRowIds }), - method: 'DELETE' - }); - dispatch('delete', selectedRowIds); - deleting = false; - if (res.ok) { - toastStore.addToast({ - kind: 'success', - title: `${isMultiple ? 'Keys' : 'Key'} Deleted` - }); - } else { - toastStore.addToast({ - kind: 'error', - title: `Error Deleting ${isMultiple ? 'Keys' : 'Key'}` + try { + const res = await fetch('/api/api-keys/delete', { + body: JSON.stringify({ ids: selectedRowIds }), + method: 'DELETE' }); + if (res.ok) { + dispatch('delete', selectedRowIds); + toastStore.addToast({ + kind: 'success', + title: `${isMultiple ? 'Keys' : 'Key'} Deleted` + }); + await invalidate('lf:api-keys'); + } else { + handleDeleteError(); + } + } catch { + handleDeleteError(); } - await invalidate('lf:api-keys'); + + deleting = false; }; diff --git a/src/leapfrogai_ui/src/lib/constants/errors.ts b/src/leapfrogai_ui/src/lib/constants/errors.ts index e26224e6f..a34bd5906 100644 --- a/src/leapfrogai_ui/src/lib/constants/errors.ts +++ b/src/leapfrogai_ui/src/lib/constants/errors.ts @@ -1,2 +1,4 @@ export const FILE_CONTEXT_TOO_LARGE_ERROR_MSG = 'Error: Upload fewer or smaller files'; export const ERROR_UPLOADING_FILE_MSG = 'Error uploading file'; +export const ASSISTANT_ERROR_MSG = + "I'm sorry but I've experienced an error. Please try again, or contact support."; diff --git a/src/leapfrogai_ui/src/lib/constants/index.ts b/src/leapfrogai_ui/src/lib/constants/index.ts index 5ad6cac6d..08e813bf0 100644 --- a/src/leapfrogai_ui/src/lib/constants/index.ts +++ b/src/leapfrogai_ui/src/lib/constants/index.ts @@ -52,7 +52,7 @@ export const ACCEPTED_AUDIO_FILE_TYPES = [ '.webm' ]; -export const ACCEPTED_FILE_TYPES = [ +export const ACCEPTED_DOC_TYPES = [ '.pdf', '.txt', '.text', @@ -62,7 +62,10 @@ export const ACCEPTED_FILE_TYPES = [ '.pptx', '.doc', '.docx', - '.csv', + '.csv' +]; +export const ACCEPTED_DOC_AND_AUDIO_FILE_TYPES = [ + ...ACCEPTED_DOC_TYPES, ...ACCEPTED_AUDIO_FILE_TYPES ]; @@ -108,7 +111,7 @@ export const NO_FILE_ERROR_TEXT = 'Please upload an image or select a pictogram' export const AVATAR_FILE_SIZE_ERROR_TEXT = `File must be less than ${MAX_AVATAR_SIZE / 1000000} MB`; export const FILE_SIZE_ERROR_TEXT = `File must be less than ${MAX_FILE_SIZE / 1000000} MB`; export const AUDIO_FILE_SIZE_ERROR_TEXT = `Audio file must be less than ${MAX_AUDIO_FILE_SIZE / 1000000} MB`; -export const INVALID_FILE_TYPE_ERROR_TEXT = `Invalid file type, accepted types are: ${ACCEPTED_FILE_TYPES.join(', ')}`; +export const INVALID_FILE_TYPE_ERROR_TEXT = `Invalid file type, accepted types are: ${ACCEPTED_DOC_AND_AUDIO_FILE_TYPES.join(', ')}`; export const INVALID_AUDIO_FILE_TYPE_ERROR_TEXT = `Invalid file type, accepted types are: ${ACCEPTED_AUDIO_FILE_TYPES.join(', ')}`; export const NO_SELECTED_ASSISTANT_ID = 'noSelectedAssistantId'; diff --git a/src/leapfrogai_ui/src/lib/constants/toastMessages.ts b/src/leapfrogai_ui/src/lib/constants/toastMessages.ts index e431348a5..5bcadadc8 100644 --- a/src/leapfrogai_ui/src/lib/constants/toastMessages.ts +++ b/src/leapfrogai_ui/src/lib/constants/toastMessages.ts @@ -19,7 +19,7 @@ export const ERROR_GETTING_ASSISTANT_MSG_TOAST = ( ): ToastData => ({ kind: 'error', title: 'Error', - subtitle: 'Error getting Assistant Response', + subtitle: 'Error getting assistant response', ...override }); diff --git a/src/leapfrogai_ui/src/lib/helpers/chatHelpers.ts b/src/leapfrogai_ui/src/lib/helpers/chatHelpers.ts index 72db4dd58..ef5961ea5 100644 --- a/src/leapfrogai_ui/src/lib/helpers/chatHelpers.ts +++ b/src/leapfrogai_ui/src/lib/helpers/chatHelpers.ts @@ -263,3 +263,11 @@ export const getCitations = (message: OpenAIMessage, files: FileObject[]) => { } return []; }; + +export const refetchThread = async (threadId: string) => { + const res = await fetch(`/api/threads/${threadId}`); + if (res.ok) { + const thread = await res.json(); + threadsStore.updateThread(thread); + } +}; diff --git a/src/leapfrogai_ui/src/lib/helpers/fileHelpers.ts b/src/leapfrogai_ui/src/lib/helpers/fileHelpers.ts index a0cd0fc5b..b6d229336 100644 --- a/src/leapfrogai_ui/src/lib/helpers/fileHelpers.ts +++ b/src/leapfrogai_ui/src/lib/helpers/fileHelpers.ts @@ -1,11 +1,10 @@ -import type { FileMetadata, FileRow } from '$lib/types/files'; +import type { FileMetadata, LFFileObject } from '$lib/types/files'; import type { FileObject } from 'openai/resources/files'; import { FILE_CONTEXT_TOO_LARGE_ERROR_MSG } from '$constants/errors'; -export const convertFileObjectToFileRows = (files: FileObject[]): FileRow[] => +export const convertFileObjectToLFFileObject = (files: FileObject[]): LFFileObject[] => files.map((file) => ({ - id: file.id, - filename: file.filename, + ...file, created_at: file.created_at * 1000, status: 'hide' })); diff --git a/src/leapfrogai_ui/src/lib/mocks/file-mocks.ts b/src/leapfrogai_ui/src/lib/mocks/file-mocks.ts index f4ff4460f..88fa6d566 100644 --- a/src/leapfrogai_ui/src/lib/mocks/file-mocks.ts +++ b/src/leapfrogai_ui/src/lib/mocks/file-mocks.ts @@ -78,7 +78,7 @@ export const mockConvertFileErrorNoId = () => { export const mockDeleteCheck = (assistantsToReturn: LFAssistant[]) => { server.use( - http.post('/api/files/delete-check', async () => { + http.post('/api/files/delete/check', async () => { await delay(100); return HttpResponse.json(assistantsToReturn); }) diff --git a/src/leapfrogai_ui/src/lib/stores/assistantsStore.ts b/src/leapfrogai_ui/src/lib/stores/assistantsStore.ts new file mode 100644 index 000000000..b0356c576 --- /dev/null +++ b/src/leapfrogai_ui/src/lib/stores/assistantsStore.ts @@ -0,0 +1,57 @@ +import { writable } from 'svelte/store'; +import type { LFAssistant } from '$lib/types/assistants'; +import { NO_SELECTED_ASSISTANT_ID } from '$constants'; + +type AssistantsStore = { + assistants: LFAssistant[]; + selectedAssistantId?: string; +}; + +const defaultValues: AssistantsStore = { + assistants: [], + selectedAssistantId: NO_SELECTED_ASSISTANT_ID +}; +const createAssistantsStore = () => { + const { subscribe, set, update } = writable({ ...defaultValues }); + + return { + subscribe, + set, + update, + setAssistants: (newAssistants: LFAssistant[]) => { + update((old) => ({ ...old, assistants: newAssistants })); + }, + setSelectedAssistantId: (selectedAssistantId: string) => { + update((old) => { + return { ...old, selectedAssistantId }; + }); + }, + addAssistant: (newAssistant: LFAssistant) => { + update((old) => ({ ...old, assistants: [...old.assistants, newAssistant] })); + }, + removeAssistant: (id: string) => { + update((old) => { + const updatedAssistants = [...old.assistants]; + const assistantIndex = updatedAssistants.findIndex((assistant) => assistant.id === id); + if (assistantIndex > -1) { + updatedAssistants.splice(assistantIndex, 1); + } + return { ...old, assistants: updatedAssistants }; + }); + }, + updateAssistant: (newAssistant: LFAssistant) => { + update((old) => { + const updatedAssistants = [...old.assistants]; + const assistantIndex = updatedAssistants.findIndex( + (assistant) => assistant.id === newAssistant.id + ); + if (assistantIndex > -1) { + updatedAssistants[assistantIndex] = newAssistant; + } + return { ...old, assistants: updatedAssistants }; + }); + } + }; +}; +const assistantsStore = createAssistantsStore(); +export default assistantsStore; diff --git a/src/leapfrogai_ui/src/lib/stores/filesStore.ts b/src/leapfrogai_ui/src/lib/stores/filesStore.ts index c6ba33db8..5e0eeea19 100644 --- a/src/leapfrogai_ui/src/lib/stores/filesStore.ts +++ b/src/leapfrogai_ui/src/lib/stores/filesStore.ts @@ -1,14 +1,16 @@ import { derived, writable } from 'svelte/store'; import type { FileObject } from 'openai/resources/files'; -import type { FileRow } from '$lib/types/files'; +import type { LFFileObject, PendingOrErrorFile } from '$lib/types/files'; import { toastStore } from '$stores/index'; +import { getUnixSeconds } from '$helpers/dates'; type FilesStore = { - files: FileRow[]; + files: LFFileObject[]; selectedFileManagementFileIds: string[]; selectedAssistantFileIds: string[]; uploading: boolean; - pendingUploads: FileRow[]; + pendingUploads: PendingOrErrorFile[]; + needsUpdate?: boolean; }; const defaultValues: FilesStore = { @@ -16,7 +18,8 @@ const defaultValues: FilesStore = { selectedFileManagementFileIds: [], selectedAssistantFileIds: [], uploading: false, - pendingUploads: [] + pendingUploads: [], + needsUpdate: false }; const createFilesStore = () => { @@ -27,16 +30,32 @@ const createFilesStore = () => { set, update, setUploading: (status: boolean) => update((old) => ({ ...old, uploading: status })), - - setFiles: (newFiles: FileRow[]) => { + removeFile: (id: string) => { + update((old) => { + const updatedFiles = [...old.files]; + const fileIndex = updatedFiles.findIndex((file) => file.id === id); + if (fileIndex > -1) { + updatedFiles.splice(fileIndex, 1); + } + return { ...old, files: updatedFiles }; + }); + }, + setFiles: (newFiles: LFFileObject[]) => { update((old) => ({ ...old, files: [...newFiles] })); }, - setPendingUploads: (newFiles: FileRow[]) => { + setPendingUploads: (newFiles: LFFileObject[]) => { update((old) => ({ ...old, pendingUploads: [...newFiles] })); }, setSelectedFileManagementFileIds: (newIds: string[]) => { update((old) => ({ ...old, selectedFileManagementFileIds: newIds })); }, + setNeedsUpdate: (status: boolean) => { + update((old) => ({ ...old, needsUpdate: status })); + }, + fetchFiles: async () => { + const files = await fetch('/api/files').then((res) => res.json()); + update((old) => ({ ...old, files, needsUpdate: false })); + }, addSelectedFileManagementFileId: (id: string) => { update((old) => ({ ...old, @@ -66,7 +85,7 @@ const createFilesStore = () => { }, addUploadingFiles: (files: File[], { autoSelectUploadedFiles = false } = {}) => { update((old) => { - const newFiles: FileRow[] = []; + const newFiles: Pick[] = []; const newFileIds: string[] = []; for (const file of files) { const id = `${file.name}-${new Date()}`; // temp id @@ -74,7 +93,7 @@ const createFilesStore = () => { id, filename: file.name, status: 'uploading', - created_at: null + created_at: getUnixSeconds(new Date()) }); newFileIds.push(id); } @@ -87,16 +106,14 @@ const createFilesStore = () => { }; }); }, - updateWithUploadErrors: (newFiles: Array) => { + updateWithUploadErrors: (newFiles: Array) => { update((old) => { - const failedRows: FileRow[] = []; + const failedRows: LFFileObject[] = []; for (const file of newFiles) { if (file.status === 'error') { - const row: FileRow = { - id: file.id, - filename: file.filename, - created_at: file.created_at, + const row: LFFileObject = { + ...file, status: 'error' }; @@ -126,15 +143,13 @@ const createFilesStore = () => { }; }); }, - updateWithUploadSuccess: (newFiles: Array) => { + updateWithUploadSuccess: (newFiles: Array) => { update((old) => { const successRows = [...old.files]; for (const file of newFiles) { - const row: FileRow = { - id: file.id, - filename: file.filename, - created_at: file.created_at, + const row: LFFileObject = { + ...file, status: 'complete' }; diff --git a/src/leapfrogai_ui/src/lib/stores/index.ts b/src/leapfrogai_ui/src/lib/stores/index.ts index 90cac2ebd..66da975b0 100644 --- a/src/leapfrogai_ui/src/lib/stores/index.ts +++ b/src/leapfrogai_ui/src/lib/stores/index.ts @@ -2,3 +2,4 @@ export { default as threadsStore } from './threads'; export { default as toastStore } from './toast'; export { default as uiStore } from './ui'; export { default as filesStore } from './filesStore'; +export { default as assistantsStore } from './assistantsStore'; diff --git a/src/leapfrogai_ui/src/lib/stores/threads.ts b/src/leapfrogai_ui/src/lib/stores/threads.ts index 0b9738fbb..a79c66f1a 100644 --- a/src/leapfrogai_ui/src/lib/stores/threads.ts +++ b/src/leapfrogai_ui/src/lib/stores/threads.ts @@ -1,6 +1,6 @@ import { writable } from 'svelte/store'; -import { MAX_LABEL_SIZE, NO_SELECTED_ASSISTANT_ID } from '$lib/constants'; -import { goto, invalidate } from '$app/navigation'; +import { MAX_LABEL_SIZE } from '$lib/constants'; +import { goto } from '$app/navigation'; import { error } from '@sveltejs/kit'; import { type Message as VercelAIMessage } from '@ai-sdk/svelte'; import { toastStore } from '$stores'; @@ -12,7 +12,6 @@ import type { Message } from 'ai'; type ThreadsStore = { threads: LFThread[]; - selectedAssistantId: string; sendingBlocked: boolean; lastVisitedThreadId: string; streamingMessage: VercelAIMessage | null; @@ -20,7 +19,6 @@ type ThreadsStore = { const defaultValues: ThreadsStore = { threads: [], - selectedAssistantId: NO_SELECTED_ASSISTANT_ID, sendingBlocked: false, lastVisitedThreadId: '', streamingMessage: null @@ -97,11 +95,6 @@ const createThreadsStore = () => { setLastVisitedThreadId: (id: string) => { update((old) => ({ ...old, lastVisitedThreadId: id })); }, - setSelectedAssistantId: (selectedAssistantId: string) => { - update((old) => { - return { ...old, selectedAssistantId }; - }); - }, // Important - this method has a built in delay to ensure next user message has a different timestamp when setting to false (unblocking) setSendingBlocked: async (status: boolean) => { if (!status && process.env.NODE_ENV !== 'test') { @@ -303,7 +296,6 @@ const createThreadsStore = () => { title: 'Error', subtitle: `Error deleting message.` }); - await invalidate('lf:threads'); } }, updateThreadLabel: async (id: string, newLabel: string) => { diff --git a/src/leapfrogai_ui/src/lib/types/files.d.ts b/src/leapfrogai_ui/src/lib/types/files.d.ts index 599260041..17355cd32 100644 --- a/src/leapfrogai_ui/src/lib/types/files.d.ts +++ b/src/leapfrogai_ui/src/lib/types/files.d.ts @@ -1,16 +1,16 @@ import type { SuperValidated } from 'sveltekit-superforms'; +import type { FileObject } from 'openai/resources/files'; export type FileUploadStatus = 'uploading' | 'complete' | 'error' | 'hide'; export type VectorStatus = 'in_progress' | 'completed' | 'cancelled' | 'failed'; -export type FileRow = { - id: string; - filename: string; - created_at: number | null; +export type LFFileObject = Omit & { status: FileUploadStatus; }; +export type PendingOrErrorFile = Pick; + // This type is taken from SuperValidated, leaving the any export type FilesForm = SuperValidated< { files?: (File | null | undefined)[] | undefined }, diff --git a/src/leapfrogai_ui/src/routes/api/api-keys/delete/+server.ts b/src/leapfrogai_ui/src/routes/api/api-keys/delete/+server.ts index 785c289ac..eacdd3b2d 100644 --- a/src/leapfrogai_ui/src/routes/api/api-keys/delete/+server.ts +++ b/src/leapfrogai_ui/src/routes/api/api-keys/delete/+server.ts @@ -10,7 +10,6 @@ export const DELETE: RequestHandler = async ({ request, locals: { session } }) = if (!session) { error(401, 'Unauthorized'); } - let requestData: { ids: string }; // Validate request body diff --git a/src/leapfrogai_ui/src/routes/api/chat/assistants/+server.ts b/src/leapfrogai_ui/src/routes/api/chat/assistants/+server.ts index b5152cc50..20558f455 100644 --- a/src/leapfrogai_ui/src/routes/api/chat/assistants/+server.ts +++ b/src/leapfrogai_ui/src/routes/api/chat/assistants/+server.ts @@ -46,7 +46,6 @@ export const POST: RequestHandler = async ({ request, locals: { session } }) => throw new Error('assistant_id is not set'); })() }); - // forward run status would stream message deltas let runResult = await forwardStream(runStream); diff --git a/src/leapfrogai_ui/src/routes/api/files/delete/+server.ts b/src/leapfrogai_ui/src/routes/api/files/delete/+server.ts index 935195842..e8942d8da 100644 --- a/src/leapfrogai_ui/src/routes/api/files/delete/+server.ts +++ b/src/leapfrogai_ui/src/routes/api/files/delete/+server.ts @@ -8,7 +8,6 @@ export const DELETE: RequestHandler = async ({ request, locals: { session } }) = error(401, 'Unauthorized'); } let requestData: { ids: string[] }; - // Validate request body try { requestData = await request.json(); diff --git a/src/leapfrogai_ui/src/routes/api/files/delete-check/+server.ts b/src/leapfrogai_ui/src/routes/api/files/delete/check/+server.ts similarity index 100% rename from src/leapfrogai_ui/src/routes/api/files/delete-check/+server.ts rename to src/leapfrogai_ui/src/routes/api/files/delete/check/+server.ts diff --git a/src/leapfrogai_ui/src/routes/api/files/delete-check/server.test.ts b/src/leapfrogai_ui/src/routes/api/files/delete/check/server.test.ts similarity index 86% rename from src/leapfrogai_ui/src/routes/api/files/delete-check/server.test.ts rename to src/leapfrogai_ui/src/routes/api/files/delete/check/server.test.ts index 1f6bb19bc..f78b142e9 100644 --- a/src/leapfrogai_ui/src/routes/api/files/delete-check/server.test.ts +++ b/src/leapfrogai_ui/src/routes/api/files/delete/check/server.test.ts @@ -1,5 +1,5 @@ import { POST } from './+server'; -import { mockOpenAI } from '../../../../../vitest-setup'; +import { mockOpenAI } from '../../../../../../vitest-setup'; import { getFakeAssistant, getFakeFiles, @@ -7,11 +7,11 @@ import { getFakeVectorStoreFile } from '$testUtils/fakeData'; import type { RequestEvent } from '@sveltejs/kit'; -import type { RouteParams } from '../../../../../.svelte-kit/types/src/routes/api/messages/new/$types'; +import type { RouteParams } from './$types'; import { getLocalsMock } from '$lib/mocks/misc'; const validMessageBody = { fileIds: ['file1', 'file2'] }; -describe('/api/files/delete-check', () => { +describe('/api/files/delete/check', () => { it('returns a 401 when there is no session', async () => { const request = new Request('http://thisurlhasnoeffect', { method: 'POST', @@ -22,7 +22,7 @@ describe('/api/files/delete-check', () => { POST({ request, locals: getLocalsMock({ nullSession: true }) - } as RequestEvent) + } as RequestEvent) ).rejects.toMatchObject({ status: 401 }); @@ -39,7 +39,7 @@ describe('/api/files/delete-check', () => { POST({ request, locals: getLocalsMock() - } as RequestEvent) + } as RequestEvent) ).rejects.toMatchObject({ status: 400 }); @@ -54,7 +54,7 @@ describe('/api/files/delete-check', () => { POST({ request, locals: getLocalsMock() - } as RequestEvent) + } as RequestEvent) ).rejects.toMatchObject({ status: 400 }); @@ -69,7 +69,7 @@ describe('/api/files/delete-check', () => { POST({ request, locals: getLocalsMock() - } as RequestEvent) + } as RequestEvent) ).rejects.toMatchObject({ status: 400 }); @@ -84,7 +84,7 @@ describe('/api/files/delete-check', () => { POST({ request, locals: getLocalsMock() - } as RequestEvent) + } as RequestEvent) ).rejects.toMatchObject({ status: 400 }); @@ -137,7 +137,7 @@ describe('/api/files/delete-check', () => { const res = await POST({ request, locals: getLocalsMock() - } as RequestEvent); + } as RequestEvent); const resData = await res.json(); expect(res.status).toEqual(200); @@ -153,7 +153,7 @@ describe('/api/files/delete-check', () => { const res2 = await POST({ request: request2, locals: getLocalsMock() - } as RequestEvent); + } as RequestEvent); const resData2 = await res2.json(); expect(res2.status).toEqual(200); @@ -173,7 +173,7 @@ describe('/api/files/delete-check', () => { POST({ request, locals: getLocalsMock() - } as RequestEvent) + } as RequestEvent) ).rejects.toMatchObject({ status: 500 }); diff --git a/src/leapfrogai_ui/src/routes/api/helpers.ts b/src/leapfrogai_ui/src/routes/api/helpers.ts new file mode 100644 index 000000000..c64bfe611 --- /dev/null +++ b/src/leapfrogai_ui/src/routes/api/helpers.ts @@ -0,0 +1,18 @@ +import type { LFThread } from '$lib/types/threads'; +import { getOpenAiClient } from '$lib/server/constants'; +import type { LFMessage } from '$lib/types/messages'; + +export const getThreadWithMessages = async ( + thread_id: string, + access_token: string +): Promise => { + const openai = getOpenAiClient(access_token); + const thread = (await openai.beta.threads.retrieve(thread_id)) as LFThread; + if (!thread) { + return null; + } + const messagesPage = await openai.beta.threads.messages.list(thread.id); + const messages = messagesPage.data as LFMessage[]; + messages.sort((a, b) => a.created_at - b.created_at); + return { ...thread, messages: messages }; +}; diff --git a/src/leapfrogai_ui/src/routes/api/threads/+server.ts b/src/leapfrogai_ui/src/routes/api/threads/+server.ts new file mode 100644 index 000000000..8158bab7a --- /dev/null +++ b/src/leapfrogai_ui/src/routes/api/threads/+server.ts @@ -0,0 +1,45 @@ +import type { RequestHandler } from './$types'; +import { error, json } from '@sveltejs/kit'; +import type { Profile } from '$lib/types/profile'; +import type { LFThread } from '$lib/types/threads'; +import { getThreadWithMessages } from '../helpers'; + +export const GET: RequestHandler = async ({ locals: { session, supabase, user } }) => { + if (!session) { + error(401, 'Unauthorized'); + } + + const { data: profile, error: profileError } = await supabase + .from('profiles') + .select(`*`) + .eq('id', user?.id) + .returns() + .single(); + + if (profileError) { + console.error( + `error getting user profile for user_id: ${user?.id}. ${JSON.stringify(profileError)}` + ); + error(500, 'Internal Error'); + } + + const threads: LFThread[] = []; + if (profile?.thread_ids && profile?.thread_ids.length > 0) { + try { + const threadPromises = profile.thread_ids.map((thread_id) => + getThreadWithMessages(thread_id, session.access_token) + ); + const results = await Promise.allSettled(threadPromises); + results.forEach((result) => { + if (result.status === 'fulfilled' && result.value) { + threads.push(result.value); + } + }); + } catch (e) { + console.error(`Error fetching threads: ${e}`); + return json([]); + } + } + + return json(threads); +}; diff --git a/src/leapfrogai_ui/src/routes/api/threads/[thread_id]/+server.ts b/src/leapfrogai_ui/src/routes/api/threads/[thread_id]/+server.ts index 0a4a29f76..5c0c9f769 100644 --- a/src/leapfrogai_ui/src/routes/api/threads/[thread_id]/+server.ts +++ b/src/leapfrogai_ui/src/routes/api/threads/[thread_id]/+server.ts @@ -1,23 +1,6 @@ import type { RequestHandler } from './$types'; import { error, json } from '@sveltejs/kit'; -import { getOpenAiClient } from '$lib/server/constants'; -import type { LFThread } from '$lib/types/threads'; -import type { LFMessage } from '$lib/types/messages'; - -const getThreadWithMessages = async ( - thread_id: string, - access_token: string -): Promise => { - const openai = getOpenAiClient(access_token); - const thread = (await openai.beta.threads.retrieve(thread_id)) as LFThread; - if (!thread) { - return null; - } - const messagesPage = await openai.beta.threads.messages.list(thread.id); - const messages = messagesPage.data as LFMessage[]; - messages.sort((a, b) => a.created_at - b.created_at); - return { ...thread, messages: messages }; -}; +import { getThreadWithMessages } from '../../helpers'; export const GET: RequestHandler = async ({ params, locals: { session } }) => { if (!session) { diff --git a/src/leapfrogai_ui/src/routes/api/threads/server.test.ts b/src/leapfrogai_ui/src/routes/api/threads/server.test.ts new file mode 100644 index 000000000..34c7dade9 --- /dev/null +++ b/src/leapfrogai_ui/src/routes/api/threads/server.test.ts @@ -0,0 +1,125 @@ +import { GET } from './+server'; +import { getLocalsMock } from '$lib/mocks/misc'; +import type { RequestEvent } from '@sveltejs/kit'; +import type { RouteParams } from './$types'; +import { + selectSingleReturnsMockError, + supabaseFromMockWrapper, + supabaseSelectSingleByIdMock +} from '$lib/mocks/supabase-mocks'; +import { getFakeThread } from '$testUtils/fakeData'; +import { mockOpenAI } from '../../../../vitest-setup'; +import * as apiHelpers from '../helpers'; + +const request = new Request('http://thisurlhasnoeffect', { + method: 'GET' +}); + +const thread1 = getFakeThread({ numMessages: 1 }); +const thread2 = getFakeThread({ numMessages: 2 }); +const fakeProfile = { thread_ids: [thread1.id, thread2.id] }; + +describe('/api/threads', () => { + it('returns a 401 when there is no session', async () => { + await expect( + GET({ + request, + locals: getLocalsMock({ nullSession: true }) + } as RequestEvent) + ).rejects.toMatchObject({ + status: 401 + }); + }); + it("returns a user's threads", async () => { + const thread1WithoutMessages = { ...thread1, messages: undefined }; + const thread2WithoutMessages = { ...thread2, messages: undefined }; + + mockOpenAI.setThreads([thread1WithoutMessages, thread2WithoutMessages]); + mockOpenAI.setMessages([...(thread1.messages || []), ...(thread2.messages || [])]); + + const res = await GET({ + request, + locals: getLocalsMock({ + supabase: supabaseFromMockWrapper({ + ...supabaseSelectSingleByIdMock(fakeProfile) + }) + }) + } as RequestEvent); + + expect(res.status).toEqual(200); + const resJson = await res.json(); + // Note - our fake threads already have messages attached, we are checking here that the + // API fetched the messages and added them to the threads since real threads don't have messages + expect(resJson[0].id).toEqual(thread1.id); + expect(resJson[0].messages).toEqual(thread1.messages); + expect(resJson[1].id).toEqual(thread2.id); + expect(resJson[1].messages).toEqual(thread2.messages); + }); + it('still returns threads that were successfully retrieved when there is an error getting a thread', async () => { + mockOpenAI.setThreads([thread2]); + mockOpenAI.setError('retrieveThread'); // fail the first thread fetching + const res = await GET({ + request, + locals: getLocalsMock({ + supabase: supabaseFromMockWrapper({ + ...supabaseSelectSingleByIdMock(fakeProfile) + }) + }) + } as RequestEvent); + + expect(res.status).toEqual(200); + const resJson = await res.json(); + expect(resJson[0].id).toEqual(thread2.id); + }); + it('still returns threads that were successfully retrieved when there is an error getting messages for a thread', async () => { + mockOpenAI.setThreads([thread1, thread2]); + mockOpenAI.setError('listMessages'); // fail the first thread's message fetching + const res = await GET({ + request, + locals: getLocalsMock({ + supabase: supabaseFromMockWrapper({ + ...supabaseSelectSingleByIdMock(fakeProfile) + }) + }) + } as RequestEvent); + + expect(res.status).toEqual(200); + const resJson = await res.json(); + expect(resJson[0].id).toEqual(thread2.id); + }); + it('returns an empty array if there is an unhandled error fetching threads', async () => { + vi.spyOn(apiHelpers, 'getThreadWithMessages').mockImplementationOnce(() => { + throw new Error('fake error'); + }); + const consoleSpy = vi.spyOn(console, 'error'); + + const res = await GET({ + request, + locals: getLocalsMock({ + supabase: supabaseFromMockWrapper({ + ...supabaseSelectSingleByIdMock(fakeProfile) + }) + }) + } as RequestEvent); + + expect(res.status).toEqual(200); + const resJson = await res.json(); + expect(resJson).toEqual([]); + // ensure we hit the correct catch block/error case with this test + expect(consoleSpy).toHaveBeenCalledWith('Error fetching threads: Error: fake error'); + }); + it("returns a 500 is an error getting the user's profile", async () => { + await expect( + GET({ + request, + locals: getLocalsMock({ + supabase: supabaseFromMockWrapper({ + ...selectSingleReturnsMockError() + }) + }) + } as RequestEvent) + ).rejects.toMatchObject({ + status: 500 + }); + }); +}); diff --git a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.svelte b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.svelte index f082615c5..113883c5a 100644 --- a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.svelte +++ b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.svelte @@ -3,7 +3,7 @@ import { LFTextArea, PoweredByDU } from '$components'; import { Hr, ToolbarButton } from 'flowbite-svelte'; import { onMount, tick } from 'svelte'; - import { threadsStore, toastStore } from '$stores'; + import { assistantsStore, threadsStore, toastStore } from '$stores'; import { type Message as VercelAIMessage, useAssistant, useChat } from '@ai-sdk/svelte'; import { page } from '$app/stores'; import Message from '$components/Message.svelte'; @@ -13,6 +13,7 @@ import { twMerge } from 'tailwind-merge'; import { isRunAssistantMessage, + refetchThread, resetMessages, saveMessage, stopThenSave @@ -29,35 +30,32 @@ import ChatFileUploadForm from '$components/ChatFileUpload.svelte'; import FileChatActions from '$components/FileChatActions.svelte'; import LFCarousel from '$components/LFCarousel.svelte'; + import { ASSISTANT_ERROR_MSG } from '$constants/errors'; + import { delay } from 'msw'; + import type { LFThread } from '$lib/types/threads'; export let data; /** LOCAL VARS **/ let lengthInvalid: boolean; // bound to child LFTextArea - let assistantsList: Array<{ id: string; text: string }>; let uploadingFiles = false; let attachedFiles: LFFile[] = []; // the actual files uploaded let attachedFileMetadata: FileMetadata[] = []; // metadata about the files uploaded, e.g. upload status, extracted text, etc... + let activeThread: LFThread | undefined = undefined; /** END LOCAL VARS **/ /** REACTIVE STATE **/ $: componentHasMounted = false; - $: $page.params.thread_id, threadsStore.setLastVisitedThreadId($page.params.thread_id); - $: $page.params.thread_id, - resetMessages({ - activeThread: data.thread, - setChatMessages, - setAssistantMessages - }); - - $: activeThreadMessages = - $threadsStore.threads.find((thread) => thread.id === $page.params.thread_id)?.messages || []; + $: activeThread = $threadsStore.threads.find( + (thread: LFThread) => thread.id === $page.params.thread_id + ); + $: $page.params.thread_id, handleThreadChange(); $: messageStreaming = $isLoading || $status === 'in_progress'; $: latestChatMessage = $chatMessages[$chatMessages.length - 1]; $: latestAssistantMessage = $assistantMessages[$assistantMessages.length - 1]; $: assistantMode = - $threadsStore.selectedAssistantId !== NO_SELECTED_ASSISTANT_ID && - $threadsStore.selectedAssistantId !== 'manage-assistants'; + $assistantsStore.selectedAssistantId !== NO_SELECTED_ASSISTANT_ID && + $assistantsStore.selectedAssistantId !== 'manage-assistants'; $: if (messageStreaming) threadsStore.setSendingBlocked(true); @@ -76,8 +74,30 @@ resetFiles(); // attachment of files w/assistants disabled } + $: if ($assistantError) handleAssistantResponseError(); + /** END REACTIVE STATE **/ + const handleThreadChange = () => { + if ($page.params.thread_id) { + if (activeThread) { + threadsStore.setLastVisitedThreadId(activeThread.id); + resetMessages({ + activeThread, + setChatMessages, + setAssistantMessages + }); + } + } else { + threadsStore.setLastVisitedThreadId(''); + resetMessages({ + activeThread, + setChatMessages, + setAssistantMessages + }); + } + }; + const resetFiles = () => { uploadingFiles = false; attachedFileMetadata = []; @@ -100,13 +120,13 @@ ); const message = await messageRes.json(); // store the assistant id on the user msg to know it's associated with an assistant - message.metadata.assistant_id = $threadsStore.selectedAssistantId; + message.metadata.assistant_id = $assistantsStore.selectedAssistantId; await threadsStore.addMessageToStore(message); } else if (latestAssistantMessage?.role !== 'user') { // Streamed assistant responses don't contain an assistant_id, so we add it here // and also add a createdAt date if not present if (!latestAssistantMessage.assistant_id) { - latestAssistantMessage.assistant_id = $threadsStore.selectedAssistantId; + latestAssistantMessage.assistant_id = $assistantsStore.selectedAssistantId; } if (!latestAssistantMessage.createdAt) @@ -121,16 +141,62 @@ const handleCompletedAssistantResponse = async () => { if (componentHasMounted && $status === 'awaiting_message') { - const assistantResponseId = $assistantMessages[$assistantMessages.length - 1].id; + if ($assistantError) return; + if (latestAssistantMessage.role === 'user') { + await handleAssistantResponseError(); + return; + } + + const assistantResponseId = latestAssistantMessage.id; const messageRes = await fetch( `/api/messages?thread_id=${$page.params.thread_id}&message_id=${assistantResponseId}` ); + if (!messageRes.ok) { + //useAssistants onError hook will handle this + return; + } + const message = await messageRes.json(); - await threadsStore.addMessageToStore(message); - threadsStore.setStreamingMessage(null); + if (message && !getMessageText(message)) { + // error with response(empty response)/timeout + await handleAssistantResponseError(); + } else { + await threadsStore.addMessageToStore(message); + threadsStore.setStreamingMessage(null); + } } }; + const createAssistantErrorResponse = async () => { + await delay(1000); // ensure error response timestamp is after user's msg + const newMessage = await saveMessage({ + thread_id: data.thread.id, + content: ASSISTANT_ERROR_MSG, + role: 'assistant', + metadata: { + assistant_id: latestAssistantMessage.assistant_id || $threadsStore.selectedAssistantId + } + }); + + await threadsStore.addMessageToStore(newMessage); + }; + + const handleAssistantResponseError = async () => { + await refetchThread($page.params.thread_id); // if there was an error in the stream, we need to re-fetch to get the user's msg from the db + toastStore.addToast({ + ...ERROR_GETTING_ASSISTANT_MSG_TOAST() + }); + if (latestAssistantMessage.role === 'assistant') { + await threadsStore.deleteMessage($page.params.thread_id, latestAssistantMessage.id); + threadsStore.removeMessageFromStore($page.params.thread_id, latestAssistantMessage.id); + $assistantMessages = [...$assistantMessages.splice(-1)]; + } + await createAssistantErrorResponse(); + + threadsStore.setStreamingMessage(null); + await threadsStore.setSendingBlocked(false); + }; + /** useChat - streams messages with the /api/chat route**/ const { input: chatInput, @@ -144,10 +210,10 @@ // Handle completed AI Responses onFinish: async (message: VercelAIMessage) => { try { - if (data.thread?.id) { + if (activeThread?.id) { // Save with API to db const newMessage = await saveMessage({ - thread_id: data.thread.id, + thread_id: activeThread.id, content: getMessageText(message), role: 'assistant' }); @@ -180,24 +246,16 @@ submitMessage: submitAssistantMessage, stop: assistantStop, setMessages: setAssistantMessages, - append: assistantAppend + append: assistantAppend, + error: assistantError } = useAssistant({ api: '/api/chat/assistants', - threadId: data.thread?.id, - onError: async (e) => { - // ignore this error b/c it is expected on cancel - if (e.message !== 'BodyStreamBuffer was aborted') { - toastStore.addToast({ - ...ERROR_GETTING_ASSISTANT_MSG_TOAST() - }); - } - await threadsStore.setSendingBlocked(false); - } + threadId: activeThread?.id }); const sendAssistantMessage = async (e: SubmitEvent | KeyboardEvent) => { await threadsStore.setSendingBlocked(true); - if (data.thread?.id) { + if (activeThread?.id) { // assistant mode $assistantInput = $chatInput; $chatInput = ''; // clear chat input @@ -206,8 +264,8 @@ // submit to AI (/api/chat/assistants) data: { message: $chatInput, - assistantId: $threadsStore.selectedAssistantId, - threadId: data.thread.id + assistantId: $assistantsStore.selectedAssistantId, + threadId: activeThread.id } }); $assistantInput = ''; @@ -218,13 +276,13 @@ const sendChatMessage = async (e: SubmitEvent | KeyboardEvent) => { try { await threadsStore.setSendingBlocked(true); - if (data.thread?.id) { + if (activeThread?.id) { let extractedFilesTextString = JSON.stringify(attachedFileMetadata); if (attachedFileMetadata.length > 0) { // Save the text of the document as its own message before sending actual question const contextMsg = await saveMessage({ - thread_id: data.thread.id, + thread_id: activeThread.id, content: `${FILE_UPLOAD_PROMPT}: ${extractedFilesTextString}`, role: 'user', metadata: { @@ -237,7 +295,7 @@ // Save with API const newMessage = await saveMessage({ - thread_id: data.thread.id, + thread_id: activeThread.id, content: $chatInput, role: 'user', ...(attachedFileMetadata.length > 0 @@ -270,11 +328,11 @@ // setSendingBlocked (when called with the value 'false') automatically handles this delay const onSubmit = async (e: SubmitEvent | KeyboardEvent) => { e.preventDefault(); - if (($isLoading || $status === 'in_progress') && data.thread?.id) { + if (($isLoading || $status === 'in_progress') && activeThread?.id) { const isAssistantChat = $status === 'in_progress'; // message still sending await stopThenSave({ - activeThreadId: data.thread.id, + activeThreadId: activeThread.id, messages: isAssistantChat ? $assistantMessages : $chatMessages, status: $status, isLoading: $isLoading || false, @@ -285,7 +343,7 @@ return; } else { if (sendDisabled) return; - if (!data.thread?.id) { + if (!activeThread?.id) { // create new thread await threadsStore.newThread($chatInput); await tick(); // allow store to update @@ -305,19 +363,13 @@ onMount(async () => { componentHasMounted = true; - assistantsList = [...(data.assistants || [])].map((assistant) => ({ - id: assistant.id, - text: assistant.name || 'unknown' - })); - assistantsList.unshift({ id: NO_SELECTED_ASSISTANT_ID, text: 'Select assistant...' }); // add dropdown item for no assistant selected - assistantsList.unshift({ id: `manage-assistants`, text: 'Manage assistants' }); // add dropdown item for manage assistants button }); beforeNavigate(async () => { - if (($isLoading || $status === 'in_progress') && data.thread?.id) { + if (($isLoading || $status === 'in_progress') && activeThread?.id) { const isAssistantChat = $status === 'in_progress'; await stopThenSave({ - activeThreadId: data.thread.id, + activeThreadId: activeThread.id, messages: isAssistantChat ? $assistantMessages : $chatMessages, status: $status, isLoading: $isLoading || false, @@ -331,19 +383,21 @@
- {#each activeThreadMessages as message, index (message.id)} - {#if message.metadata?.hideMessage !== 'true'} - - {/if} - {/each} + {#if activeThread} + {#each activeThread.messages as message, index (message.id)} + {#if message.metadata?.hideMessage !== 'true'} + + {/if} + {/each} + {/if} {#if $threadsStore.streamingMessage} @@ -352,7 +406,7 @@

- +
{ - const promises = [fetch('/api/assistants'), fetch('/api/files')]; - - if (params.thread_id) promises.push(fetch(`/api/threads/${params.thread_id}`)); - - const promiseResponses = await Promise.all(promises); - - const assistants = await promiseResponses[0].json(); - const files = await promiseResponses[1].json(); - - let thread: LFThread | undefined = undefined; - if (params.thread_id) { - thread = await promiseResponses[2].json(); - } - - if (browser) { - if (thread) { - // update store with latest thread fetched by page data - threadsStore.updateThread(thread); - } - } - - return { thread, assistants, files }; -}; diff --git a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage.test.ts b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage.test.ts index 0a3cefa37..21857b0e8 100644 --- a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage.test.ts +++ b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage.test.ts @@ -17,7 +17,6 @@ import { mockNewMessageError } from '$lib/mocks/chat-mocks'; import { getMessageText } from '$helpers/threads'; -import { load } from './+page'; import { mockOpenAI } from '../../../../../vitest-setup'; import { ERROR_GETTING_AI_RESPONSE_TOAST, ERROR_SAVING_MSG_TOAST } from '$constants/toastMessages'; @@ -27,7 +26,6 @@ import type { LFAssistant } from '$lib/types/assistants'; import { delay } from '$helpers/chatHelpers'; import { mockGetFiles } from '$lib/mocks/file-mocks'; import { threadsStore } from '$stores'; -import { NO_SELECTED_ASSISTANT_ID } from '$constants'; type LayoutServerLoad = { threads: LFThread[]; @@ -60,17 +58,9 @@ describe('when there is an active thread selected', () => { mockOpenAI.setMessages(allMessages); mockOpenAI.setFiles(files); - // @ts-expect-error: full mocking of load function params not necessary and is overcomplicated - data = await load({ - fetch: global.fetch, - depends: vi.fn(), - params: { thread_id: fakeThreads[0].id } - }); - threadsStore.set({ threads: fakeThreads, lastVisitedThreadId: fakeThreads[0].id, - selectedAssistantId: NO_SELECTED_ASSISTANT_ID, sendingBlocked: false, streamingMessage: null }); diff --git a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage_no_thread.test.ts b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage_no_thread.test.ts index 71242a2b2..6ec9995cb 100644 --- a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage_no_thread.test.ts +++ b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage_no_thread.test.ts @@ -8,7 +8,7 @@ import { mockNewMessage, mockNewThreadError } from '$lib/mocks/chat-mocks'; -import { load } from './+page'; + import { mockOpenAI } from '../../../../../vitest-setup'; import ChatPageWithToast from './ChatPageWithToast.test.svelte'; import type { LFThread } from '$lib/types/threads'; @@ -34,13 +34,6 @@ describe('when there is NO active thread selected', () => { mockOpenAI.setThreads(fakeThreads); mockOpenAI.setMessages(allMessages); mockOpenAI.setFiles(files); - - // @ts-expect-error: full mocking of load function params not necessary and is overcomplicated - data = await load({ - params: {}, - fetch: global.fetch, - depends: vi.fn() - }); }); afterAll(() => { diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.server.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.server.ts index 1cc33e4e8..ae0ec066c 100644 --- a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.server.ts +++ b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.server.ts @@ -30,7 +30,6 @@ export const load: PageServerLoad = async ({ depends, locals: { session } }) => if (!res.ok) { return error(500, { message: 'Error fetching API keys' }); } - keys = (await res.json()) as APIKeyRow[]; // convert from seconds to milliseconds keys.forEach((key) => { diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.svelte b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.svelte index e854a8e6f..413cf8e23 100644 --- a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.svelte +++ b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.svelte @@ -137,7 +137,11 @@
{#if editMode} -
+
{#if deleting} {#if deleting}