diff --git a/.github/actions/release/action.yaml b/.github/actions/release/action.yaml
new file mode 100644
index 000000000..38157f59e
--- /dev/null
+++ b/.github/actions/release/action.yaml
@@ -0,0 +1,174 @@
+name: release
+description: "Cut a release of all LeapfrogAI artifacts"
+
+inputs:
+  releaseTag:
+    description: The release tag to be published, cannot be left empty
+    required: true
+  subRepository:
+    description: The sub-repository to publish the artifacts to
+    required: false
+    default: /uds/
+  registry1Username:
+    description: Registry1 Username
+  registry1Password:
+    description: Registry1 Password
+  ghToken:
+    description: GitHub Token
+  chainguardIdentity:
+    description: Chainguard login identity
+
+runs:
+  using: composite
+
+  steps:
+    - name: Setup UDS Environment
+      uses: defenseunicorns/uds-common/.github/actions/setup@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1
+      with:
+        registry1Username: ${{ inputs.registry1Username }}
+        registry1Password: ${{ inputs.registry1Password }}
+        ghToken: ${{ inputs.ghToken }}
+        chainguardIdentity: ${{ inputs.chainguardIdentity }}
+
+    - name: Set up QEMU
+      uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # v3.0.0
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@0d103c3126aa41d772a8362f6aa67afac040f80c # v3.1.0
+
+    - name: Setup Python
+      uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
+      with:
+        python-version-file: "pyproject.toml"
+
+    - name: Install Dev Dependencies
+      shell: bash
+      run: |
+        python -m pip install ".[dev]" ".[dev-vllm]" ".[dev-whisper]"
+
+    - name: Build and Publish K3d GPU
+      shell: bash
+      run: |
+        cd packages/k3d-gpu
+        docker build \
+          --platform linux/amd64 \
+          -t ghcr.io/defenseunicorns/leapfrogai/k3d-gpu:${{ inputs.releaseTag }} .
+        docker push ghcr.io/defenseunicorns/leapfrogai/k3d-gpu:${{ inputs.releaseTag }}
+        cd ../..
+
+    - name: Download Python Wheels and Publish Builder Image
+      shell: bash
+      run: |
+        docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-sdk:${{ inputs.releaseTag }} --push -f src/leapfrogai_sdk/Dockerfile .
+
+    - name: Install Zarf
+      uses: defenseunicorns/setup-zarf@10e539efed02f75ec39eb8823e22a5c795f492ae #v1.0.1
+
+    - name: Build and Publish API
+      shell: bash
+      run: |
+        docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${{ inputs.releaseTag }} --push -f packages/api/Dockerfile .
+        docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/api-migrations:${{ inputs.releaseTag }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/api/supabase/migrations" .
+
+        zarf package create packages/api --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm
+        zarf package create packages/api --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm
+
+        zarf package publish zarf-package-leapfrogai-api-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+        zarf package publish zarf-package-leapfrogai-api-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+
+        docker image prune -af
+        rm zarf-package-leapfrogai-api-*.tar.zst
+
+    - name: Build and Publish UI
+      shell: bash
+      run: |
+        docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-ui:${{ inputs.releaseTag }} --push src/leapfrogai_ui
+        docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/ui-migrations:${{ inputs.releaseTag }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=src/leapfrogai_ui/supabase/migrations" .
+
+        zarf package create packages/ui --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm
+        zarf package create packages/ui --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm
+
+        zarf package publish zarf-package-leapfrogai-ui-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+        zarf package publish zarf-package-leapfrogai-ui-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+
+        docker image prune -af
+        rm zarf-package-leapfrogai-ui-*.tar.zst
+
+    - name: Build and Publish Supabase
+      shell: bash
+      run: |
+        docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/supabase-migrations:${{ inputs.releaseTag }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/supabase/migrations" .
+
+        zarf package create packages/supabase --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm
+        zarf package create packages/supabase --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm
+
+        zarf package publish zarf-package-supabase-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+        zarf package publish zarf-package-supabase-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+
+    - name: Build and Publish Repeater
+      shell: bash
+      run: |
+        docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/repeater:${{ inputs.releaseTag }} --push -f packages/repeater/Dockerfile .
+
+        zarf package create packages/repeater --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm
+        zarf package create packages/repeater --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm
+
+        zarf package publish zarf-package-repeater-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+        zarf package publish zarf-package-repeater-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+
+        docker image prune -af
+        rm zarf-package-repeater-*.tar.zst
+
+    - name: Build and Publish LLaMA-CPP-Python
+      shell: bash
+      run: |
+        docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/llama-cpp-python:${{ inputs.releaseTag }} --push -f packages/llama-cpp-python/Dockerfile .
+
+        zarf package create packages/llama-cpp-python --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm
+        zarf package create packages/llama-cpp-python --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm
+
+        zarf package publish zarf-package-llama-cpp-python-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+        zarf package publish zarf-package-llama-cpp-python-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+
+        docker image prune -af
+        rm zarf-package-llama-*.tar.zst
+
+    - name: Build and Publish vLLM
+      shell: bash
+      run: |
+        docker buildx build --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/vllm:${{ inputs.releaseTag }} --push -f packages/vllm/Dockerfile .
+
+        ZARF_CONFIG=packages/vllm/zarf-config.yaml zarf package create packages/vllm --set=IMAGE_VERSION=${{ inputs.releaseTag }} --flavor upstream --confirm
+
+        zarf package publish zarf-package-vllm-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+
+        docker image prune -af
+        rm zarf-package-vllm-*.tar.zst
+
+    - name: Build and Publish Text-Embeddings
+      shell: bash
+      run: |
+        docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/text-embeddings:${{ inputs.releaseTag }} --push -f packages/text-embeddings/Dockerfile .
+
+        zarf package create packages/text-embeddings --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm
+        zarf package create packages/text-embeddings --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm
+
+        zarf package publish zarf-package-text-embeddings-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+        zarf package publish zarf-package-text-embeddings-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+
+        docker image prune -af
+        rm zarf-package-text-embeddings-*.tar.zst
+
+    - name: Build and Publish Whisper
+      shell: bash
+      run: |
+        docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/whisper:${{ inputs.releaseTag }} --push -f packages/whisper/Dockerfile .
+
+        zarf package create packages/whisper --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture amd64 --flavor upstream --confirm
+        zarf package create packages/whisper --set=IMAGE_VERSION=${{ inputs.releaseTag }} --architecture arm64 --flavor upstream --confirm
+
+        zarf package publish zarf-package-whisper-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+        zarf package publish zarf-package-whisper-arm64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
+
+        docker image prune -af
+        rm zarf-package-whisper-*.tar.zst
diff --git a/.github/actions/uds-cluster/action.yaml b/.github/actions/uds-cluster/action.yaml
index 34620025b..97396397b 100644
--- a/.github/actions/uds-cluster/action.yaml
+++ b/.github/actions/uds-cluster/action.yaml
@@ -8,6 +8,8 @@ inputs:
     description: Registry1 Password
   ghToken:
     description: GitHub Token
+  chainguardIdentity:
+    description: Chainguard login identity
 
 runs:
   using: composite
@@ -18,10 +20,10 @@ runs:
         registry1Username: ${{ inputs.registry1Username }}
         registry1Password: ${{ inputs.registry1Password }}
         ghToken: ${{ inputs.ghToken }}
-        udsCliVersion: 0.14.0
+        chainguardIdentity: ${{ inputs.chainguardIdentity }}
 
     - name: Checkout Repo
-      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
     - name: Create UDS Cluster
       shell: bash
diff --git a/.github/release-please-config.json b/.github/release-please-config.json
index 6cf564cdb..8f475204d 100644
--- a/.github/release-please-config.json
+++ b/.github/release-please-config.json
@@ -26,6 +26,11 @@
           "path": "**/zarf.yaml",
           "glob": true
         },
+        {
+          "type": "generic",
+          "path": "**/zarf-config.yaml",
+          "glob": true
+        },
         {
           "type": "generic",
           "path": "**/uds-bundle.yaml",
diff --git a/.github/scripts/uds_verification_report.py b/.github/scripts/uds_verification_report.py
new file mode 100755
index 000000000..0e4d4e8fe
--- /dev/null
+++ b/.github/scripts/uds_verification_report.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+
+import os
+import re
+
+
+def remove_ansi_escape_sequences(text):
+    ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
+    return ansi_escape.sub("", text)
+
+
+# Capabilities that affect the entire capability, not just a single package
+def uds_capability_wide_errors(text: str) -> bool:
+    if "Not all pods have the istio sidecar" in text:
+        return True
+    return False
+
+
+# CI environment variable enables GitHub annotations
+def print_package_info(
+    package_name,
+    failures_count,
+    errors_count,
+    warnings_count,
+    failure_descriptions,
+    error_descriptions,
+    warning_descriptions,
+    uds_capability_wide_errors_count,
+):
+    if uds_capability_wide_errors_count >= 1:
+        errors_count -= uds_capability_wide_errors_count
+    if package_name:
+        print("-----------------------------")
+        if os.getenv("CI") == "true":
+            print(f"::group::{package_name}")
+        print(f"Package: {package_name}\n")
+        if failures_count > 0:
+            if os.getenv("CI") == "true":
+                print("::error::", end="")
+            print(f"⛔ Failures: {failures_count}")
+        else:
+            if errors_count > 0:
+                if os.getenv("CI") == "true":
+                    print("::error::", end="")
+                print(f"❌ Errors: {errors_count}")
+            if warnings_count > 0:
+                if os.getenv("CI") == "true":
+                    print("::warning::", end="")
+                print(f"⚠️  Warnings: {warnings_count}")
+        if failures_count > 0:
+            print("\n⛔ Failure Descriptions:")
+            for desc in failure_descriptions:
+                print(f"  - {desc}")
+        else:
+            if errors_count > 0:
+                print("\n❌ Error Descriptions:")
+                for desc in error_descriptions:
+                    print(f"  - {desc}")
+            if warnings_count > 0:
+                print("\n⚠️  Warning Descriptions:")
+                for desc in warning_descriptions:
+                    print(f"  - {desc}")
+        if os.getenv("CI") == "true":
+            print("::endgroup::")
+
+
+def main():
+    # Read data from the specified file instead of stdin
+    file_path = os.path.join(
+        os.getenv("GITHUB_WORKSPACE", ""), "reports/intermediate-report.txt"
+    )
+    with open(file_path, mode="r", encoding="utf-8", errors="ignore") as file:
+        data = file.read()
+    # Remove ANSI escape sequences
+    clean_data = remove_ansi_escape_sequences(data)
+    # Initialize variables
+    package_name = ""
+    failures_count = 0
+    errors_count = 0
+    warnings_count = 0
+    uds_capability_wide_errors_count = 0
+    failure_descriptions = []
+    error_descriptions = []
+    warning_descriptions = []
+    uds_capability_wide_error_descriptions = []
+    previous_package_name = None
+
+    # Process each line
+    for line in clean_data.splitlines():
+        # Remove leading and trailing whitespace
+        line = line.strip()
+
+        # Match and extract the package name
+        match = re.match(r"^ℹ️\s+Package\s+Name:\s+(.*)$", line)
+        if match:
+            # Print the previous package's info before starting a new one
+            if previous_package_name is not None:
+                print_package_info(
+                    previous_package_name,
+                    failures_count,
+                    errors_count,
+                    warnings_count,
+                    failure_descriptions,
+                    error_descriptions,
+                    warning_descriptions,
+                    uds_capability_wide_errors_count,
+                )
+            # Reset variables for the new package
+            package_name = match.group(1)
+            failures_count = 0
+            errors_count = 0
+            warnings_count = 0
+            failure_descriptions = []
+            error_descriptions = []
+            warning_descriptions = []
+            previous_package_name = package_name
+            continue
+
+        if uds_capability_wide_errors(line):
+            uds_capability_wide_errors_count = 1
+            uds_capability_wide_error_descriptions = [
+                "Not all pods have the istio sidecar"
+            ]
+            continue
+        else:
+            # Match and extract counts for failures, errors, and warnings
+            match = re.match(r"^(❌|⚠️|⛔)\s+(\d+)\s+([a-z]+)\s+found$", line)
+            if match:
+                count = int(match.group(2))
+                type_ = match.group(3)
+                if type_ == "errors":
+                    errors_count = count
+                elif type_ == "warnings":
+                    warnings_count = count
+                elif type_ == "failures":
+                    failures_count = count
+                continue
+
+            # Match and collect issue descriptions
+            match = re.match(r"^(❌|⚠️|⛔)\s+(.*)$", line)
+            if match:
+                emoji = match.group(1)
+                description = match.group(2)
+                if emoji == "❌":
+                    error_descriptions.append(description)
+                elif emoji == "⚠️":
+                    warning_descriptions.append(description)
+                elif emoji == "⛔":
+                    failure_descriptions.append(description)
+                continue
+
+    # Print the last package's information
+    if previous_package_name is not None:
+        print_package_info(
+            previous_package_name,
+            failures_count,
+            errors_count,
+            warnings_count,
+            failure_descriptions,
+            error_descriptions,
+            warning_descriptions,
+            uds_capability_wide_errors_count,
+        )
+        if uds_capability_wide_errors_count >= 1:
+            print("-----------------------------")
+            if os.getenv("CI") == "true":
+                print("::group::UDS Capability-Wide Issues")
+                print("::error::", end="")
+            print("UDS Capability Issues")
+            print("\n❌ Error Descriptions:")
+            for desc in uds_capability_wide_error_descriptions:
+                print(f"  - {desc}")
+            if os.getenv("CI") == "true":
+                print("::endgroup::")
+
+
+if __name__ == "__main__":
+    main()
+    # Print the final ending separator
+    print("-----------------------------")
diff --git a/.github/workflows/commit-lint.yaml b/.github/workflows/commit-lint.yaml
index 3d8cd67ff..9c4b1c8b6 100644
--- a/.github/workflows/commit-lint.yaml
+++ b/.github/workflows/commit-lint.yaml
@@ -22,7 +22,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
       with:
         fetch-depth: 0
 
diff --git a/.github/workflows/docker-lint.yaml b/.github/workflows/docker-lint.yaml
index c46e5557d..98f2124d0 100644
--- a/.github/workflows/docker-lint.yaml
+++ b/.github/workflows/docker-lint.yaml
@@ -31,7 +31,7 @@ jobs:
           password: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
         with:
diff --git a/.github/workflows/e2e-llama-cpp-python.yaml b/.github/workflows/e2e-llama-cpp-python.yaml
index e3d573bba..e116616e7 100644
--- a/.github/workflows/e2e-llama-cpp-python.yaml
+++ b/.github/workflows/e2e-llama-cpp-python.yaml
@@ -32,6 +32,7 @@ on:
 
       # Ignore local development files
       - "!.pre-commit-config.yaml"
+      - "!tasks.yaml"
 
       # Ignore non e2e tests changes
       - "!tests/pytest/**"
@@ -56,9 +57,14 @@ jobs:
     runs-on: ai-ubuntu-big-boy-8-core
     if: ${{ !github.event.pull_request.draft }}
 
+    permissions:
+      contents: read
+      packages: read
+      id-token: write # This is needed for OIDC federation.
+
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - name: Setup Python
         uses: ./.github/actions/python
@@ -69,6 +75,7 @@ jobs:
           registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
           registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
           ghToken: ${{ secrets.GITHUB_TOKEN }}
+          chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
 
       - name: Setup API and Supabase
         uses: ./.github/actions/lfai-core
diff --git a/.github/workflows/e2e-playwright.yaml b/.github/workflows/e2e-playwright.yaml
index 7200155fe..3b6464dbd 100644
--- a/.github/workflows/e2e-playwright.yaml
+++ b/.github/workflows/e2e-playwright.yaml
@@ -34,6 +34,7 @@ on:
 
       # Ignore local development files
       - "!.pre-commit-config.yaml"
+      - "!tasks.yaml"
 
       # Ignore non e2e tests changes
       - "!tests/pytest/**"
@@ -57,9 +58,14 @@ jobs:
     runs-on: ai-ubuntu-big-boy-8-core
     if: ${{ !github.event.pull_request.draft }}
 
+    permissions:
+      contents: read
+      packages: read
+      id-token: write # This is needed for OIDC federation.
+
     steps:
         - name: Checkout Repo
-          uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+          uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
         - name: Setup Node
           uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
@@ -82,6 +88,7 @@ jobs:
             registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
             registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
             ghToken: ${{ secrets.GITHUB_TOKEN }}
+            chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
 
         - name: Create Test User
           run: |
@@ -120,7 +127,7 @@ jobs:
         - name: UI/API/Supabase E2E Playwright Tests
           run: |
             cp src/leapfrogai_ui/.env.example src/leapfrogai_ui/.env
-            rm src/leapfrogai_ui/tests/global.teardown.ts 
+            rm src/leapfrogai_ui/tests/global.teardown.ts
             mkdir -p src/leapfrogai_ui/playwright/.auth
             SERVICE_ROLE_KEY=$(uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o jsonpath={.data.service-key} | base64 -d)
             echo "::add-mask::$SERVICE_ROLE_KEY"
diff --git a/.github/workflows/e2e-text-backend-full-cpu.yaml b/.github/workflows/e2e-text-backend-full-cpu.yaml
index 6e8507ae3..bca3364b4 100644
--- a/.github/workflows/e2e-text-backend-full-cpu.yaml
+++ b/.github/workflows/e2e-text-backend-full-cpu.yaml
@@ -32,6 +32,7 @@ on:
 
       # Ignore local development files
       - "!.pre-commit-config.yaml"
+      - "!tasks.yaml"
 
       # Ignore non e2e tests changes
       - "!tests/pytest/**"
@@ -57,9 +58,14 @@ jobs:
     runs-on: ai-ubuntu-big-boy-8-core
     if: ${{ !github.event.pull_request.draft }}
 
+    permissions:
+      contents: read
+      packages: read
+      id-token: write # This is needed for OIDC federation.
+
     steps:
         - name: Checkout Repo
-          uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+          uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
         - name: Setup Python
           uses: ./.github/actions/python
@@ -69,6 +75,8 @@ jobs:
           with:
             registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
             registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
+            ghToken: ${{ secrets.GITHUB_TOKEN }}
+            chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
 
         - name: Setup LFAI-API and Supabase
           uses: ./.github/actions/lfai-core
@@ -97,5 +105,7 @@ jobs:
         # Test
         ##########
         - name: Test Text Backend
+          env:
+              LEAPFROGAI_MODEL: llama-cpp-python
           run: |
             python -m pytest ./tests/e2e/test_text_backend_full.py -v
diff --git a/.github/workflows/e2e-text-embeddings.yaml b/.github/workflows/e2e-text-embeddings.yaml
index 20f7eb97a..c61f77fcd 100644
--- a/.github/workflows/e2e-text-embeddings.yaml
+++ b/.github/workflows/e2e-text-embeddings.yaml
@@ -32,6 +32,7 @@ on:
 
       # Ignore local development files
       - "!.pre-commit-config.yaml"
+      - "!tasks.yaml"
 
       # Ignore non e2e tests changes
       - "!tests/pytest/**"
@@ -58,9 +59,14 @@ jobs:
     runs-on: ai-ubuntu-big-boy-8-core
     if: ${{ !github.event.pull_request.draft }}
 
+    permissions:
+      contents: read
+      packages: read
+      id-token: write # This is needed for OIDC federation.
+
     steps:
         - name: Checkout Repo
-          uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+          uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
         - name: Setup Python
           uses: ./.github/actions/python
@@ -71,6 +77,7 @@ jobs:
             registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
             registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
             ghToken: ${{ secrets.GITHUB_TOKEN }}
+            chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
 
         - name: Setup LFAI-API and Supabase
           uses: ./.github/actions/lfai-core
diff --git a/.github/workflows/e2e-vllm.yaml b/.github/workflows/e2e-vllm.yaml
index 07e9f046f..585e8b6a8 100644
--- a/.github/workflows/e2e-vllm.yaml
+++ b/.github/workflows/e2e-vllm.yaml
@@ -32,6 +32,7 @@ on:
 
       # Ignore local development files
       - "!.pre-commit-config.yaml"
+      - "!tasks.yaml"
 
       # Ignore non e2e tests changes
       - "!tests/pytest/**"
@@ -58,9 +59,14 @@ jobs:
     runs-on: ai-ubuntu-big-boy-8-core
     if: ${{ !github.event.pull_request.draft }}
 
+    permissions:
+      contents: read
+      packages: read
+      id-token: write # This is needed for OIDC federation.
+
     steps:
         - name: Checkout Repo
-          uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+          uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
         - name: Setup Python
           uses: ./.github/actions/python
@@ -73,7 +79,7 @@ jobs:
             registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
             registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
             ghToken: ${{ secrets.GITHUB_TOKEN }}
-            udsCliVersion: 0.14.0
+            chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
 
         ##########
         # vLLM
@@ -82,4 +88,4 @@ jobs:
         ##########
         - name: Build vLLM
           run: |
-            make build-vllm LOCAL_VERSION=e2e-test
+            make build-vllm LOCAL_VERSION=e2e-test ZARF_CONFIG=packages/vllm/zarf-config.yaml
diff --git a/.github/workflows/e2e-whisper.yaml b/.github/workflows/e2e-whisper.yaml
index dee2cf45a..a4620b89a 100644
--- a/.github/workflows/e2e-whisper.yaml
+++ b/.github/workflows/e2e-whisper.yaml
@@ -32,6 +32,7 @@ on:
 
       # Ignore local development files
       - "!.pre-commit-config.yaml"
+      - "!tasks.yaml"
 
       # Ignore non e2e tests changes
       - "!tests/pytest/**"
@@ -56,9 +57,14 @@ jobs:
     runs-on: ai-ubuntu-big-boy-8-core
     if: ${{ !github.event.pull_request.draft }}
 
+    permissions:
+      contents: read
+      packages: read
+      id-token: write # This is needed for OIDC federation.
+
     steps:
         - name: Checkout Repo
-          uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+          uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
         - name: Setup Python
           uses: ./.github/actions/python
@@ -71,6 +77,7 @@ jobs:
             registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
             registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
             ghToken: ${{ secrets.GITHUB_TOKEN }}
+            chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
 
         - name: Setup LFAI-API and Supabase
           uses: ./.github/actions/lfai-core
diff --git a/.github/workflows/helm-lint.yaml b/.github/workflows/helm-lint.yaml
index 14bd6b107..b5f085944 100644
--- a/.github/workflows/helm-lint.yaml
+++ b/.github/workflows/helm-lint.yaml
@@ -31,7 +31,7 @@ jobs:
           password: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - name: Setup Helm
         uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index 7f51aec04..c7e43d2bf 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -16,7 +16,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - name: Validate Lint
         uses: chartboost/ruff-action@e18ae971ccee1b2d7bbef113930f00c670b78da4 # v1.0.0
diff --git a/.github/workflows/markdown-lint.yaml b/.github/workflows/markdown-lint.yaml
index 45fea49ce..b34888167 100644
--- a/.github/workflows/markdown-lint.yaml
+++ b/.github/workflows/markdown-lint.yaml
@@ -32,7 +32,7 @@ jobs:
 
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - uses: DavidAnson/markdownlint-cli2-action@db43aef879112c3119a410d69f66701e0d530809 # v17.0.0
         with:
diff --git a/.github/workflows/nightly-snapshot-release.yaml b/.github/workflows/nightly-snapshot-release.yaml
new file mode 100644
index 000000000..82936a339
--- /dev/null
+++ b/.github/workflows/nightly-snapshot-release.yaml
@@ -0,0 +1,212 @@
+name: nightly-snapshot-release
+
+on:
+  schedule:
+    - cron: "0 8 * * *" # Runs daily at 12 AM PST
+  workflow_dispatch: # trigger manually as needed
+  pull_request:
+    types:
+      - opened            # default trigger
+      - reopened          # default trigger
+      - synchronize       # default trigger
+      - ready_for_review  # don't run on draft PRs
+      - milestoned        # allows us to trigger on bot PRs
+    paths:
+      - .github/workflows/nightly-snapshot-release.yaml
+
+concurrency:
+  group: nightly-snapshot-release-${{ github.ref }}
+  cancel-in-progress: true
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  SNAPSHOT_VERSION: snapshot-latest
+  SNAPSHOT_SUB_REPOSITORY: /uds/snapshots/
+
+permissions:
+  contents: read
+  packages: write
+  id-token: write # This is needed for OIDC federation.
+
+jobs:
+  snapshot-release:
+    runs-on: ai-ubuntu-big-boy-8-core
+    name: nightly_snapshot_release
+    if: ${{ !github.event.pull_request.draft }}
+
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
+
+      - name: Release LeapfrogAI ${{ env.SNAPSHOT_VERSION }}
+        uses: ./.github/actions/release
+        with:
+          releaseTag: ${{ env.SNAPSHOT_VERSION }}
+          subRepository: ${{ env.SNAPSHOT_SUB_REPOSITORY }}
+          registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
+          registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
+          ghToken: ${{ secrets.GITHUB_TOKEN }}
+          chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
+
+  test-snapshot:
+    runs-on: ai-ubuntu-big-boy-8-core
+    name: nightly_test_snapshot
+    if: ${{ !github.event.pull_request.draft }}
+    needs: snapshot-release
+
+    permissions:
+      contents: read
+      packages: write
+      id-token: write # This is needed for OIDC federation.
+
+    steps:
+      # Checkout main just to see the latest release in the release-please manifest
+      - name: Checkout Repo (main)
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
+        with:
+          ref: main
+
+      - name: Get Latest Release Version
+        id: get_version
+        run: |
+          LFAI_VERSION=$(jq -r '.["."]' .github/.release-please-manifest.json)
+          echo "LFAI_VERSION=$LFAI_VERSION" >> $GITHUB_OUTPUT
+
+      ################
+      # LATEST RELEASE
+      ################
+
+      # Checkout the latest release in the release-please manifest
+      - name: Checkout Repo (v${{ steps.get_version.outputs.LFAI_VERSION }})
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
+        with:
+          ref: v${{ steps.get_version.outputs.LFAI_VERSION }}
+
+      - name: Setup UDS Cluster (v${{ steps.get_version.outputs.LFAI_VERSION }})
+        uses: ./.github/actions/uds-cluster
+        with:
+          registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
+          registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
+          ghToken: ${{ secrets.GITHUB_TOKEN }}
+          chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
+
+      # This is needed due to delay in tagged releases versus the version refs within the UDS bundles
+      - name: Mutation of the UDS Bundle (v${{ steps.get_version.outputs.LFAI_VERSION }})
+        run: |
+          uds zarf tools yq -i '.metadata.version = "v${{ steps.get_version.outputs.LFAI_VERSION }}"' bundles/latest/cpu/uds-bundle.yaml
+
+          uds zarf tools yq -i '.packages[].ref |= sub("^[^ ]+-upstream$", "${{ steps.get_version.outputs.LFAI_VERSION }}-upstream")' bundles/latest/cpu/uds-bundle.yaml
+
+      - name: Create and Deploy UDS Bundle (v${{ steps.get_version.outputs.LFAI_VERSION }})
+        run: |
+          cd bundles/latest/cpu
+          uds create . --confirm && \
+            uds deploy uds-bundle-leapfrogai-amd64-v${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst --confirm --no-progress && \
+            rm -rf uds-bundle-leapfrogai-amd64-v${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst && \
+            docker system prune -af
+
+      #################
+      # MAIN (SNAPSHOT)
+      #################
+
+      - name: Checkout Repo (main)
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
+        with:
+          ref: main
+
+      - name: Print the Commit SHA (main)
+        run: |
+          COMMIT_SHA=$(git rev-parse HEAD)
+          echo "The latest commit on the main branch is: $COMMIT_SHA"
+
+      - name: Setup Python (main)
+        uses: ./.github/actions/python
+
+      # Set UDS CPU bundle refs and repositories to snapshot-latest
+      - name: Mutation of the UDS Bundle (main)
+        run: |
+          uds zarf tools yq -i '.metadata.version = "${{ env.SNAPSHOT_VERSION }}"' bundles/latest/cpu/uds-bundle.yaml
+
+          uds zarf tools yq -i '.packages[].ref |= sub("^[^ ]+-upstream$", "${{ env.SNAPSHOT_VERSION }}-upstream")' bundles/latest/cpu/uds-bundle.yaml
+
+          uds zarf tools yq -i '.packages[].repository |= sub("/uds/", "/uds/snapshots/")' bundles/latest/cpu/uds-bundle.yaml
+
+      - name: Create and Deploy UDS Bundle (main)
+        run: |
+          cd bundles/latest/cpu
+          uds create . --confirm && \
+            uds deploy uds-bundle-leapfrogai-amd64-${{ env.SNAPSHOT_VERSION }}.tar.zst --confirm --no-progress && \
+            rm -rf uds-bundle-leapfrogai-amd64-${{ env.SNAPSHOT_VERSION }}.tar.zst && \
+            docker system prune -af
+
+      #########
+      # TESTING
+      #########
+
+      - name: Generate Secrets
+        id: generate_secrets
+        run: |
+          ANON_KEY=$(uds zarf tools kubectl get secret supabase-bootstrap-jwt -n leapfrogai -o jsonpath='{.data.anon-key}' | base64 -d)
+          echo "::add-mask::$ANON_KEY"
+          echo "ANON_KEY=$ANON_KEY" >> $GITHUB_OUTPUT
+          FAKE_PASSWORD=$(cat <(openssl rand -base64 32 | tr -dc 'a-zA-Z0-9!@#$%^&*()_+-=[]{}|;:,.<>?' | head -c 20) <(echo '!@1Aa') | fold -w1 | shuf | tr -d '\n')
+          echo "::add-mask::$FAKE_PASSWORD"
+          echo "FAKE_PASSWORD=$FAKE_PASSWORD" >> $GITHUB_OUTPUT
+          SERVICE_KEY=$(uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o jsonpath={.data.service-key} | base64 -d)
+          echo "::add-mask::$SERVICE_KEY"
+          echo "SERVICE_KEY=$SERVICE_KEY" >> $GITHUB_OUTPUT
+
+      - name: Verify Secrets
+        run: |
+          echo "FAKE_PASSWORD is set: ${{ steps.generate_secrets.outputs.FAKE_PASSWORD != '' }}"
+          echo "ANON_KEY is set: ${{ steps.generate_secrets.outputs.ANON_KEY != '' }}"
+          echo "SERVICE_KEY is set: ${{ steps.generate_secrets.outputs.SERVICE_KEY != '' }}"
+
+      # Backends
+      - name: Run Backend E2E Tests
+        env:
+          ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }}
+          SERVICE_KEY: ${{ steps.generate_secrets.outputs.SERVICE_KEY }}
+          LEAPFROGAI_MODEL: llama-cpp-python
+        run: |
+          python -m pytest -vvv -s ./tests/e2e
+
+      - name: Setup Playwright
+        run: |
+          npm --prefix src/leapfrogai_ui ci
+          npx --prefix src/leapfrogai_ui playwright install
+
+      - name: Run Playwright E2E Tests
+        env:
+          SERVICE_ROLE_KEY: ${{ steps.generate_secrets.outputs.SERVICE_KEY }}
+          FAKE_E2E_USER_PASSWORD: ${{ steps.generate_secrets.outputs.FAKE_PASSWORD }}
+          ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }}
+        run: |
+          chmod +x ./.github/scripts/createUser.sh
+          ./.github/scripts/createUser.sh
+
+          cp src/leapfrogai_ui/.env.example src/leapfrogai_ui/.env
+          mkdir -p playwright/auth
+          touch playwright/auth.user.json
+
+          SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY TEST_ENV=CI USERNAME=doug PASSWORD=$FAKE_E2E_USER_PASSWORD PUBLIC_SUPABASE_ANON_KEY=$ANON_KEY DEFAULT_MODEL=llama-cpp-python npm --prefix src/leapfrogai_ui run test:integration:ci
+
+      - name: Archive Playwright Report
+        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        if: ${{ !cancelled() }}
+        with:
+          name: playwright-report
+          path: src/leapfrogai_ui/e2e-report/
+          retention-days: 30
+
+      - name: Get Cluster Debug Information
+        id: debug
+        if: ${{ !cancelled() }}
+        uses: defenseunicorns/uds-common/.github/actions/debug-output@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1
+
+      - name: Get Cluster Debug Information
+        if: ${{ !cancelled() && steps.debug.conclusion == 'success' }}
+        uses: defenseunicorns/uds-common/.github/actions/save-logs@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1
diff --git a/.github/workflows/nightly-uds-badge-verification.yaml b/.github/workflows/nightly-uds-badge-verification.yaml
new file mode 100644
index 000000000..d500b9693
--- /dev/null
+++ b/.github/workflows/nightly-uds-badge-verification.yaml
@@ -0,0 +1,94 @@
+name: nightly-uds-badge-verification
+
+on:
+  schedule:
+    - cron: "0 11 * * *" # Runs daily at 3 AM PST
+  workflow_dispatch: # trigger manually as needed
+  pull_request:
+    paths:
+      - .github/workflows/nightly-uds-badge-verification.yaml
+      - tasks.yaml
+
+concurrency:
+  group: nightly-uds-badge-verification-${{ github.ref }}
+  cancel-in-progress: true
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  SNAPSHOT_VERSION: snapshot-latest
+
+permissions:
+  contents: read
+  packages: read
+  id-token: write # This is needed for OIDC federation.
+
+jobs:
+  uds-badge-verification:
+    runs-on: ai-ubuntu-big-boy-8-core
+    name: nightly_uds_badge_verification
+
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
+        with:
+          ref: main
+
+      - name: Setup UDS Cluster
+        uses: ./.github/actions/uds-cluster
+        with:
+          registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
+          registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
+          ghToken: ${{ secrets.GITHUB_TOKEN }}
+          chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
+
+      - name: Print the Commit SHA
+        run: |
+          COMMIT_SHA=$(git rev-parse HEAD)
+          echo "The latest commit on the main branch is: $COMMIT_SHA"
+
+      # Set UDS CPU bundle refs and repositories to snapshot-latest
+      - name: Mutation of the UDS Bundle
+        run: |
+          uds zarf tools yq -i '.metadata.version = "${{ env.SNAPSHOT_VERSION }}"' bundles/latest/cpu/uds-bundle.yaml
+
+          uds zarf tools yq -i '.packages[].ref |= sub("^[^ ]+-upstream$", "${{ env.SNAPSHOT_VERSION }}-upstream")' bundles/latest/cpu/uds-bundle.yaml
+
+          uds zarf tools yq -i '.packages[].repository |= sub("/uds/", "/uds/snapshots/")' bundles/latest/cpu/uds-bundle.yaml
+
+      - name: Create and Deploy UDS Bundle (${{ env.SNAPSHOT_VERSION }})
+        run: |
+          cd bundles/latest/cpu
+          uds create . --confirm && \
+          uds deploy uds-bundle-leapfrogai-amd64-${{ env.SNAPSHOT_VERSION }}.tar.zst --confirm --no-progress && \
+          rm -rf uds-bundle-leapfrogai-amd64-${{ env.SNAPSHOT_VERSION }}.tar.zst && \
+          docker system prune -af
+
+      # Workaround for handling emojis in the upstream badge verification UDS task
+      - name: Set Locale to UTF-8
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y locales
+          sudo locale-gen en_US.UTF-8
+          export LANG=en_US.UTF-8
+          export LANGUAGE=en_US:en
+          export LC_ALL=en_US.UTF-8
+
+      # Setup Python for the report cleaning script in the next step
+      - name: Set up Python
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
+        with:
+          python-version-file: "pyproject.toml"
+
+      - name: Run UDS Badge Verification Task
+        run: |
+          uds run nightly-uds-badge-verification --no-progress
+
+      - name: Archive UDS Badge Verification Report
+        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        with:
+          name: uds-badge-verification-report
+          path: reports
+          retention-days: 7
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
index 93d0f0832..f906032a3 100644
--- a/.github/workflows/pytest.yaml
+++ b/.github/workflows/pytest.yaml
@@ -31,7 +31,10 @@ on:
       - "!packages/ui/**"
 
 # Declare default permissions as read only.
-permissions: read-all
+permissions:
+  contents: read
+  packages: read
+  id-token: write # This is needed for OIDC federation.
 
 concurrency:
   group: pytest-integration-${{ github.ref }}
@@ -43,7 +46,7 @@ jobs:
 
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - name: Setup Python
         uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
@@ -64,6 +67,7 @@ jobs:
         run: make test-api-unit
         env:
           LFAI_RUN_REPEATER_TESTS: true
+          DEV: true
 
   integration:
     runs-on: ai-ubuntu-big-boy-8-core
@@ -74,7 +78,7 @@ jobs:
 
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - name: Setup Python
         uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
@@ -97,6 +101,7 @@ jobs:
           registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
           registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
           ghToken: ${{ secrets.GITHUB_TOKEN }}
+          chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
 
       - name: Setup API and Supabase
         uses: ./.github/actions/lfai-core
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 298f361e6..58336ef0a 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,12 +1,11 @@
 name: Publish Release Artifacts
 
-on:
-  workflow_call
+on: workflow_call
 
 permissions:
   contents: read
   packages: write
-
+  id-token: write # This is needed for OIDC federation.
 
 jobs:
   build-and-publish-artifacts:
@@ -14,25 +13,7 @@ jobs:
 
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
-
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # v3.0.0
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@0d103c3126aa41d772a8362f6aa67afac040f80c # v3.1.0
-
-      - name: Setup Python
-        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
-        with:
-          python-version-file: "pyproject.toml"
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - name: Get Version
         id: get_version
@@ -40,123 +21,11 @@ jobs:
           LFAI_VERSION=$(jq -r '.["."]' .github/.release-please-manifest.json)
           echo "LFAI_VERSION=$LFAI_VERSION" >> $GITHUB_OUTPUT
 
-      - name: Instal Python Deps
-        run: |
-          python -m pip install ".[dev,dev-whisper,dev-vllm]"
-
-      - name: Build and Publish k3d-gpu image
-        run: |
-          cd packages/k3d-gpu
-          docker build \
-            --platform linux/amd64 \
-            -t ghcr.io/defenseunicorns/leapfrogai/k3d-gpu:${{ steps.get_version.outputs.LFAI_VERSION }} .
-          docker push ghcr.io/defenseunicorns/leapfrogai/k3d-gpu:${{ steps.get_version.outputs.LFAI_VERSION }}
-          cd ../..
-
-      - name: Download Python Wheels and Publish Builder Image
-        run: |
-          docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-sdk:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f src/leapfrogai_sdk/Dockerfile .
-
-      - name: Install Zarf
-        uses: defenseunicorns/setup-zarf@10e539efed02f75ec39eb8823e22a5c795f492ae #v1.0.1
-
-      - name: Build and Publish API
-        run: |
-          docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/api/Dockerfile .
-          docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/api-migrations:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/api/supabase/migrations" .
-
-          zarf package create packages/api --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm
-          zarf package create packages/api --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm
-
-          zarf package publish zarf-package-leapfrogai-api-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-          zarf package publish zarf-package-leapfrogai-api-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-
-          docker image prune -af
-          rm zarf-package-leapfrogai-api-*.tar.zst
-
-      - name: Build and Publish UI
-        run: |
-          docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-ui:${{ steps.get_version.outputs.LFAI_VERSION }} --push src/leapfrogai_ui
-          docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/ui-migrations:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=src/leapfrogai_ui/supabase/migrations" .
-
-          zarf package create packages/ui --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm
-          zarf package create packages/ui --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm
-
-          zarf package publish zarf-package-leapfrogai-ui-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-          zarf package publish zarf-package-leapfrogai-ui-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-
-          docker image prune -af
-          rm zarf-package-leapfrogai-ui-*.tar.zst
-
-      - name: Build and Publish Supabase
-        run: |
-          docker buildx build --platform amd64,arm64 -t ghcr.io/defenseunicorns/leapfrogai/supabase-migrations:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/supabase/migrations" .
-
-          zarf package create packages/supabase --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm
-          zarf package create packages/supabase --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm
-
-          zarf package publish zarf-package-supabase-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-          zarf package publish zarf-package-supabase-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-
-      - name: Build and Publish repeater
-        run: |
-          docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/repeater:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/repeater/Dockerfile .
-
-          zarf package create packages/repeater --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm
-          zarf package create packages/repeater --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm
-
-          zarf package publish zarf-package-repeater-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-          zarf package publish zarf-package-repeater-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-
-          docker image prune -af
-          rm zarf-package-repeater-*.tar.zst
-
-      - name: Build and Publish llama
-        run: |
-          docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/llama-cpp-python:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/llama-cpp-python/Dockerfile .
-
-          zarf package create packages/llama-cpp-python --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm
-          zarf package create packages/llama-cpp-python --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm
-
-          zarf package publish zarf-package-llama-cpp-python-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-          zarf package publish zarf-package-llama-cpp-python-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-
-          docker image prune -af
-          rm zarf-package-llama-*.tar.zst
-
-      - name: Build and Publish vLLM
-        run: |
-          docker buildx build --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/vllm:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/vllm/Dockerfile .
-
-          zarf package create packages/vllm --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --flavor upstream --confirm
-
-          zarf package publish zarf-package-vllm-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-
-          docker image prune -af
-          rm zarf-package-vllm-*.tar.zst
-
-      - name: Build and Publish Text-Embeddings
-        run: |
-          docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/text-embeddings:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/text-embeddings/Dockerfile .
-
-          zarf package create packages/text-embeddings --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm
-          zarf package create packages/text-embeddings --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm
-
-          zarf package publish zarf-package-text-embeddings-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-          zarf package publish zarf-package-text-embeddings-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-
-          docker image prune -af
-          rm zarf-package-text-embeddings-*.tar.zst
-
-      - name: Build and Publish whisper
-        run: |
-          docker buildx build --platform amd64,arm64 --build-arg LOCAL_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} -t ghcr.io/defenseunicorns/leapfrogai/whisper:${{ steps.get_version.outputs.LFAI_VERSION }} --push -f packages/whisper/Dockerfile .
-
-          zarf package create packages/whisper --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture amd64 --flavor upstream --confirm
-          zarf package create packages/whisper --set=IMAGE_VERSION=${{ steps.get_version.outputs.LFAI_VERSION }} --architecture arm64 --flavor upstream --confirm
-
-          zarf package publish zarf-package-whisper-amd64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-          zarf package publish zarf-package-whisper-arm64-${{ steps.get_version.outputs.LFAI_VERSION }}.tar.zst oci://ghcr.io/defenseunicorns/packages/uds/leapfrogai
-
-          docker image prune -af
-          rm zarf-package-whisper-*.tar.zst
+      - name: Release LeapfrogAI ${{ steps.get_version.outputs.LFAI_VERSION }}
+        uses: ./.github/actions/release
+        with:
+          releaseTag: ${{ steps.get_version.outputs.LFAI_VERSION }}
+          registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
+          registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
+          ghToken: ${{ secrets.GITHUB_TOKEN }}
+          chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml
index 3454adae3..527469efd 100644
--- a/.github/workflows/scorecard.yaml
+++ b/.github/workflows/scorecard.yaml
@@ -23,7 +23,7 @@ jobs:
 
     steps:
       - name: "Checkout code"
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
         with:
           persist-credentials: false
 
diff --git a/.github/workflows/secret-scan.yaml b/.github/workflows/secret-scan.yaml
index 4270bbcca..9e15c4693 100644
--- a/.github/workflows/secret-scan.yaml
+++ b/.github/workflows/secret-scan.yaml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Checkout code
-      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
       with:
         fetch-depth: 0
     - name: Secret Scanning
diff --git a/.github/workflows/uds-lint.yaml b/.github/workflows/uds-lint.yaml
index 8f2e6834c..168a43818 100644
--- a/.github/workflows/uds-lint.yaml
+++ b/.github/workflows/uds-lint.yaml
@@ -22,7 +22,7 @@ jobs:
 
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - name: Set up Python
         uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
@@ -33,7 +33,7 @@ jobs:
         run: pip install check-jsonschema==0.28.0
 
       - name: Download UDS Bundle Schema
-        run: curl -o uds.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json
+        run: curl -o uds.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json
 
       - name: Validate uds-bundle.yaml (dev)
         if: always()
@@ -46,3 +46,11 @@ jobs:
         run: |
           check-jsonschema bundles/latest/gpu/uds-bundle.yaml --schemafile uds.schema.json
           check-jsonschema bundles/latest/cpu/uds-bundle.yaml --schemafile uds.schema.json
+
+      - name: Download UDS Tasks Schema
+        run: curl -o tasks.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/tasks.schema.json
+
+      - name: Validate tasks.yaml
+        if: always()
+        run: |
+          check-jsonschema tasks.yaml --schemafile tasks.schema.json
diff --git a/.github/workflows/ui-test.yaml b/.github/workflows/ui-test.yaml
index 24414ad52..19370a35b 100644
--- a/.github/workflows/ui-test.yaml
+++ b/.github/workflows/ui-test.yaml
@@ -23,7 +23,7 @@ jobs:
 
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
         with:
diff --git a/.github/workflows/e2e-registry1-weekly.yaml b/.github/workflows/weekly-registry1-flavor-test.yaml
similarity index 55%
rename from .github/workflows/e2e-registry1-weekly.yaml
rename to .github/workflows/weekly-registry1-flavor-test.yaml
index 65f4c5897..f7b583546 100644
--- a/.github/workflows/e2e-registry1-weekly.yaml
+++ b/.github/workflows/weekly-registry1-flavor-test.yaml
@@ -1,8 +1,8 @@
-name: e2e-registry1-weekly
+name: weekly-registry1-flavor-test
 
 on:
   schedule:
-    - cron: "0 0 * * 6" # Run every Sunday at 12 AM EST
+    - cron: "0 8 * * 0" # Run every Sunday at 12 AM PST
   workflow_dispatch: # trigger manually as needed
   pull_request:
     types:
@@ -12,11 +12,11 @@ on:
       - ready_for_review # don't run on draft PRs
       - milestoned # allows us to trigger on bot PRs
     paths:
-      - .github/workflows/e2e-registry1-weekly.yaml
+      - .github/workflows/weekly-registry1-flavor-test.yaml
       - bundles/latest/**
 
 concurrency:
-  group: e2e-registry1-weekly-${{ github.ref }}
+  group: weekly-registry1-flavor-test-${{ github.ref }}
   cancel-in-progress: true
 
 defaults:
@@ -24,67 +24,98 @@ defaults:
     shell: bash
 
 jobs:
-  test-flavors:
+  registry1-flavor-test:
     runs-on: ai-ubuntu-big-boy-8-core
-    name: e2e_registry1_weekly
+    name: weekly_registry1_flavor_test
     if: ${{ !github.event.pull_request.draft }}
 
     permissions:
       contents: read
-      packages: write
+      packages: read
       id-token: write # This is needed for OIDC federation.
 
     steps:
-      - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      # Checkout main just to see the latest release in the release-please manifest
+      - name: Checkout Repo (main)
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
         with:
-          # x-release-please-start-version
-          ref: "caf4f9c3093a55a003b49fcbf05c03221be6a232" # 0.12.2 w/ integration tests turned-on
-          # x-release-please-end
+          ref: main
 
-      - name: Setup Python
-        uses: ./.github/actions/python
+      - name: Get Latest Release Version
+        id: get_version
+        run: |
+          LFAI_VERSION=$(jq -r '.["."]' .github/.release-please-manifest.json)
+          echo "LFAI_VERSION=$LFAI_VERSION" >> $GITHUB_OUTPUT
 
-      - name: Install API and SDK Dev Dependencies
-        run : |
-          make install
+      ################
+      # LATEST RELEASE
+      ################
+
+      - name: Checkout Repo
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
+        with:
+          fetch-tags: true
+          ref: v${{ steps.get_version.outputs.LFAI_VERSION }}
 
-      - name: Setup UDS Cluster
-        uses: ./.github/actions/uds-cluster
+      - name: Setup UDS Environment
+        uses: defenseunicorns/uds-common/.github/actions/setup@24c8a2a48eeb33773b76b3587c489cb17496c9e0 # v0.12.0
         with:
           registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
           registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
           ghToken: ${{ secrets.GITHUB_TOKEN }}
-          udsCliVersion: 0.14.0
+          chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }}
 
-      - name: Create UDS Cluster
-        shell: bash
+      - name: Setup Python
+        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c #v5.0.0
+        with:
+          python-version-file: "pyproject.toml"
+
+      - name: Install Python Dependencies
+        run: pip install ".[dev]" "src/leapfrogai_api" "src/leapfrogai_sdk" --no-cache-dir
+
+      - name: Mutation of the Zarf Packages
         run: |
-          UDS_CONFIG=.github/config/uds-config.yaml make create-uds-cpu-cluster
+          uds zarf tools yq -i '
+            .components[].images[0] |= sub(":v[0-9\.]+$", ":v${{ steps.get_version.outputs.LFAI_VERSION }}")
+          ' packages/api/zarf.yaml
+          uds zarf tools yq -i '.api.image.tag = "v${{ steps.get_version.outputs.LFAI_VERSION }}"' packages/api/values/registry1-values.yaml
 
-      - name: Setup Playwright
+      - name: Print the Modified Zarf Packages
         run: |
-          npm --prefix src/leapfrogai_ui ci
-          npx --prefix src/leapfrogai_ui playwright install
+          cat packages/api/zarf.yaml
+          cat packages/api/values/registry1-values.yaml
 
-      - name: Create Registry1 Packages
+      - name: Create Registry1 Zarf Packages
         run: |
-          LOCAL_VERSION=registry1 FLAVOR=registry1 make build-api
+          uds zarf package create packages/api --set image_version="${{ steps.get_version.outputs.LFAI_VERSION }}" --flavor registry1 -a amd64 --confirm
 
       # Mutate UDS bundle definition to use Registry1 packages
-      - name: Mutation to Registry1 Bundle
-        # TODO: fix bundle path
+      # Mutate non-Registry1 packages to be the current tagged version
+      - name: Mutation of the UDS Bundle
         run: |
-          uds zarf tools yq -i '.packages[1] |= del(.repository)' bundles/latest/cpu/uds-bundle.yaml
-          uds zarf tools yq -i '.packages[1] |= .ref = "registry1"' bundles/latest/cpu/uds-bundle.yaml
-          uds zarf tools yq -i '.packages[1] |= .path = "../../../packages/api"' bundles/latest/cpu/uds-bundle.yaml
           uds zarf tools yq -i '.metadata.version = "registry1"' bundles/latest/cpu/uds-bundle.yaml
 
-      - name: Create and Deploy Bundle
+          uds zarf tools yq -i '.packages[].ref |= sub("^[^ ]+-upstream$", "${{ steps.get_version.outputs.LFAI_VERSION }}-upstream")' bundles/latest/cpu/uds-bundle.yaml
+
+          uds zarf tools yq -i '.packages[1] |= del(.repository)' bundles/latest/cpu/uds-bundle.yaml
+          uds zarf tools yq -i '.packages[1] |= .ref = "${{ steps.get_version.outputs.LFAI_VERSION }}"' bundles/latest/cpu/uds-bundle.yaml
+          uds zarf tools yq -i '.packages[1] |= .path = "../../../"' bundles/latest/cpu/uds-bundle.yaml
+
+      - name: Print the Modified UDS Bundle
+        run: |
+          cat bundles/latest/cpu/uds-config.yaml
+          cat bundles/latest/cpu/uds-bundle.yaml
+
+      - name: Create UDS Cluster
+        shell: bash
+        run: |
+          UDS_CONFIG=.github/config/uds-config.yaml make create-uds-cpu-cluster
+
+      - name: Create and Deploy Registry1 Bundle
         run: |
           cd bundles/latest/cpu
           uds create . --confirm && \
-            uds deploy uds-bundle-leapfrogai-amd64-registry1.tar.zst --confirm --no-progress && \
+            uds deploy uds-bundle-leapfrogai-amd64-registry1.tar.zst --confirm --no-progress --log-level debug && \
             rm -rf uds-bundle-leapfrogai-amd64-registry1.tar.zst && \
             docker system prune -af
 
@@ -107,32 +138,19 @@ jobs:
           echo "ANON_KEY is set: ${{ steps.generate_secrets.outputs.ANON_KEY != '' }}"
           echo "SERVICE_KEY is set: ${{ steps.generate_secrets.outputs.SERVICE_KEY != '' }}"
 
-      - name: Run Integration Tests
-        env:
-          SUPABASE_ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }}
-          SUPABASE_PASS: ${{ steps.generate_secrets.outputs.FAKE_PASSWORD }}
-          SUPABASE_EMAIL: integration@uds.dev
-          SUPABASE_URL: https://supabase-kong.uds.dev
-          # Turn off NIAH tests that are not applicable for integration testing using the Repeater model
-          LFAI_RUN_NIAH_TESTS: "false"
-        run: |
-          uds zarf connect --name=llama-cpp-python-model --namespace=leapfrogai --local-port=50051 --remote-port=50051 &
-          while ! nc -z localhost 50051; do sleep 1; done
-
-          make test-user-pipeline
-          env $(cat .env | xargs) python -m pytest -v -s tests/integration/api
-
       # Backends
       - name: Run Backend E2E Tests
         env:
           ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }}
           SERVICE_KEY: ${{ steps.generate_secrets.outputs.SERVICE_KEY }}
+          LEAPFROGAI_MODEL: llama-cpp-python
         run: |
-          python -m pytest ./tests/e2e/test_llama.py -vv
-          python -m pytest ./tests/e2e/test_text_embeddings.py -vv
-          python -m pytest ./tests/e2e/test_whisper.py -vv
-          python -m pytest ./tests/e2e/test_supabase.py -vv
-          python -m pytest ./tests/e2e/test_api.py -vv
+          python -m pytest -vvv -s ./tests/e2e
+
+      - name: Setup Playwright
+        run: |
+          npm --prefix src/leapfrogai_ui ci
+          npx --prefix src/leapfrogai_ui playwright install
 
       - name: Run Playwright E2E Tests
         env:
@@ -156,3 +174,12 @@ jobs:
           name: playwright-report
           path: src/leapfrogai_ui/e2e-report/
           retention-days: 30
+
+      - name: Get Cluster Debug Information
+        id: debug
+        if: ${{ !cancelled() }}
+        uses: defenseunicorns/uds-common/.github/actions/debug-output@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1
+
+      - name: Get Cluster Debug Information
+        if: ${{ !cancelled() && steps.debug.conclusion == 'success' }}
+        uses: defenseunicorns/uds-common/.github/actions/save-logs@e3008473beab00b12a94f9fcc7340124338d5c08 # v0.13.1
diff --git a/.github/workflows/zarf-lint.yaml b/.github/workflows/zarf-lint.yaml
index 2abf681b8..1a3e232de 100644
--- a/.github/workflows/zarf-lint.yaml
+++ b/.github/workflows/zarf-lint.yaml
@@ -22,7 +22,7 @@ jobs:
 
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
 
       - name: Set up Python
         uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
@@ -31,7 +31,7 @@ jobs:
 
       - name: Download Zarf Package Schema
         # TODO: renovate setup
-        run: curl -o zarf.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+        run: curl -o zarf.schema.json https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 
       - name: Install jsonschema
         run: pip install check-jsonschema==0.28.0
diff --git a/.gitignore b/.gitignore
index 645bd6ff5..d0c8a20f3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@ node_modules
 package.json
 package-lock.json
 **/*.schema.json
+reports
 
 # local model and tokenizer files
 *.bin
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6caadd6c8..693b07a28 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -102,7 +102,7 @@ repos:
           if [ -f "$(git rev-parse --show-toplevel)/$FILE" ]; then
             echo "$FILE already exists in the root of the git project, skipping download."
           else
-            curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+            curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
           fi'
         language: system
 
@@ -125,7 +125,7 @@ repos:
           if [ -f "$(git rev-parse --show-toplevel)/$FILE" ]; then
             echo "$FILE already exists in the root of the git project, skipping download."
           else
-            curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json
+            curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json
           fi'
         language: system
 
@@ -137,3 +137,26 @@ repos:
         files: "uds-bundle.yaml"
         types: [yaml]
         args: ["--schemafile", "uds-v0.14.0.schema.json"]
+
+  # UDS TASKS CHECK
+  - repo: local
+    hooks:
+      - id: download-schema
+        name: "Download UDS Tasks Schema"
+        entry: |
+          bash -c 'FILE="tasks-v0.14.0.schema.json"
+          if [ -f "$(git rev-parse --show-toplevel)/$FILE" ]; then
+            echo "$FILE already exists in the root of the git project, skipping download."
+          else
+            curl -o "$FILE" https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/tasks.schema.json
+          fi'
+        language: system
+
+  - repo: https://github.com/python-jsonschema/check-jsonschema
+    rev: 0.14.0
+    hooks:
+      - id: check-jsonschema
+        name: "Validate UDS Bundles Against Schema"
+        files: "tasks.yaml"
+        types: [yaml]
+        args: ["--schemafile", "tasks-v0.14.0.schema.json"]
diff --git a/Makefile b/Makefile
index bf8afb315..da9266246 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,10 @@
 ARCH ?= amd64
+FLAVOR ?= upstream
 REG_PORT ?= 5000
 REG_NAME ?= registry
 LOCAL_VERSION ?= $(shell git rev-parse --short HEAD)
 DOCKER_FLAGS :=
 ZARF_FLAGS :=
-FLAVOR := upstream
 SILENT_DOCKER_FLAGS := --quiet
 SILENT_ZARF_FLAGS := --no-progress -l warn --no-color
 MAX_JOBS := 4
@@ -55,24 +55,34 @@ build-supabase: local-registry docker-supabase
 docker-api: local-registry sdk-wheel
 	@echo $(DOCKER_FLAGS)
 	@echo $(ZARF_FLAGS)
-ifeq ($(FLAVOR),upstream)
+
 	## Build the API image (and tag it for the local registry)
 	docker build ${DOCKER_FLAGS} --platform=linux/${ARCH} --build-arg LOCAL_VERSION=${LOCAL_VERSION} -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION} -f packages/api/Dockerfile .
 	docker tag ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION} localhost:${REG_PORT}/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION}
-endif
+
 	## Build the migration container for this version of the API
 	docker build ${DOCKER_FLAGS} --platform=linux/${ARCH} -t ghcr.io/defenseunicorns/leapfrogai/api-migrations:${LOCAL_VERSION} -f Dockerfile.migrations --build-arg="MIGRATIONS_DIR=packages/api/supabase/migrations" .
 	docker tag ghcr.io/defenseunicorns/leapfrogai/api-migrations:${LOCAL_VERSION} localhost:${REG_PORT}/defenseunicorns/leapfrogai/api-migrations:${LOCAL_VERSION}
 
-build-api: local-registry docker-api ## Build the leapfrogai_api container and Zarf package
+## If registry1, don't locally Docker-build anything
+ifeq ($(FLAVOR),upstream)
+    DOCKER_TARGETS := local-registry docker-api
+else
+    DOCKER_TARGETS :=
+endif
+
+build-api: $(DOCKER_TARGETS) ## Build the leapfrogai_api container and Zarf package
+	## Only push to local registry and build if this is an upstream-flavored package
 ifeq ($(FLAVOR),upstream)
 	## Push the images to the local registry (Zarf is super slow if the image is only in the local daemon)
 	docker push ${DOCKER_FLAGS} localhost:${REG_PORT}/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION}
-endif
 	docker push ${DOCKER_FLAGS} localhost:${REG_PORT}/defenseunicorns/leapfrogai/api-migrations:${LOCAL_VERSION}
-
 	## Build the Zarf package
 	uds zarf package create packages/api --flavor ${FLAVOR} -a ${ARCH} -o packages/api --registry-override=ghcr.io=localhost:${REG_PORT} --insecure --set IMAGE_VERSION=${LOCAL_VERSION} ${ZARF_FLAGS} --confirm
+else
+	## Build the registry1 Zarf package
+	ZARF_CONFIG=packages/api/zarf-config.yaml uds zarf package create packages/api --flavor ${FLAVOR} -a ${ARCH} -o packages/api ${ZARF_FLAGS} --confirm
+endif
 
 docker-ui:
 	## Build the UI image (and tag it for the local registry)
@@ -113,7 +123,7 @@ build-vllm: local-registry docker-vllm ## Build the vllm container and Zarf pack
 	docker push ${DOCKER_FLAGS} localhost:${REG_PORT}/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION}
 
 	## Build the Zarf package
-	uds zarf package create packages/vllm --flavor ${FLAVOR} -a ${ARCH} -o packages/vllm --registry-override=ghcr.io=localhost:${REG_PORT} --insecure --set IMAGE_VERSION=${LOCAL_VERSION} ${ZARF_FLAGS} --confirm
+	ZARF_CONFIG=packages/vllm/zarf-config.yaml uds zarf package create packages/vllm --flavor ${FLAVOR} -a ${ARCH} -o packages/vllm --registry-override=ghcr.io=localhost:${REG_PORT} --insecure --set IMAGE_VERSION=${LOCAL_VERSION} ${ZARF_FLAGS} --confirm
 
 docker-text-embeddings: sdk-wheel
 	## Build the image (and tag it for the local registry)
@@ -253,7 +263,7 @@ silent-deploy-llama-cpp-python-package:
 silent-deploy-vllm-package:
 	@echo "Starting VLLM deployment..."
 	@mkdir -p .logs
-	@uds zarf package deploy packages/vllm/zarf-package-vllm-${ARCH}-${LOCAL_VERSION}.tar.zst ${ZARF_FLAGS} --confirm > .logs/deploy-vllm.log 2>&1
+	@ZARF_CONFIG=packages/vllm/zarf-config.yaml uds zarf package deploy packages/vllm/zarf-package-vllm-${ARCH}-${LOCAL_VERSION}.tar.zst ${ZARF_FLAGS} --confirm > .logs/deploy-vllm.log 2>&1
 	@echo "VLLM deployment completed"
 
 silent-deploy-text-embeddings-package:
diff --git a/README.md b/README.md
index 7c09b075b..2429da763 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,9 @@
 ![LeapfrogAI](https://github.com/defenseunicorns/leapfrogai/raw/main/docs/imgs/leapfrogai.png)
 
 [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/defenseunicorns/leapfrogai/badge)](https://api.securityscorecards.dev/projects/github.com/defenseunicorns/leapfrogai)
+[![Nightly Snapshot Tests](https://github.com/defenseunicorns/leapfrogai/actions/workflows/nightly-snapshot-release.yaml/badge.svg?branch=main)](https://github.com/defenseunicorns/leapfrogai/actions/workflows/nightly-snapshot-release.yaml)
+[![Nightly Made for UDS Test](https://github.com/defenseunicorns/leapfrogai/actions/workflows/nightly-uds-badge-verification.yaml/badge.svg?branch=main)](https://github.com/defenseunicorns/leapfrogai/actions/workflows/nightly-uds-badge-verification.yaml)
+[![Weekly Registry1 Test](https://github.com/defenseunicorns/leapfrogai/actions/workflows/weekly-registry1-flavor-test.yaml/badge.svg?branch=main)](https://github.com/defenseunicorns/leapfrogai/actions/workflows/weekly-registry1-flavor-test.yaml)
 
 ## Table of Contents
 
diff --git a/adr/0004-rag-eval-framework.md b/adr/0004-rag-eval-toolset.md
similarity index 100%
rename from adr/0004-rag-eval-framework.md
rename to adr/0004-rag-eval-toolset.md
diff --git a/adr/0007-rag-eval-framework.md b/adr/0007-rag-eval-framework.md
new file mode 100644
index 000000000..a1be25465
--- /dev/null
+++ b/adr/0007-rag-eval-framework.md
@@ -0,0 +1,233 @@
+# LeapfrogAI RAG Evaluation Framework MVP
+
+## Table of Contents
+
+- [LeapfrogAI RAG Evaluation Framework MVP](#leapfrogai-rag-evaluation-framework-mvp)
+  - [Table of Contents](#table-of-contents)
+  - [Status](#status)
+  - [Context](#context)
+  - [Decisions and Rationale](#decisions-and-rationale)
+    - [Tools](#tools)
+    - [Datasets](#datasets)
+    - [Models to Evaluate](#models-to-evaluate)
+    - [LLM-as-Judge / LLMs-as-Jury](#llm-as-judge--llms-as-jury)
+    - [Metrics / Evaluations](#metrics--evaluations)
+    - [Execution / Delivery](#execution--delivery)
+    - [Model Card](#model-card)
+  - [Related ADRs](#related-adrs)
+  - [References](#references)
+
+## Status
+
+APPROVED
+
+## Context
+
+LeapfrogAI uses RAG to provide context-aware responses to users who have specific data they need to reference. In order to make sure RAG is operating at the levels we need it to, we need to get measurable feedback from our RAG pipeline to make it better. We also need a standard to show to mission heroes that we are in fact operating at that level. We do this with RAG-focused evaluations. Additionally, utilizing evaluations as a whole and developing a standard approach will allow customizations of RAG and its components (for various deployment scenarios) to be better tested and evaluated against. This ADR documents all of the decisions and lessons learned for enabling a full-scale RAG evaluations pipeline MVP.
+
+## Decisions and Rationale
+
+This section covers all of the decision points that needed to be made along side an explanation of how those decisions were made. Each section covers a different aspect of the RAG evaluations framework.
+
+### Tools
+<details>
+  <summary>Details</summary>
+
+  #### Decision
+  The primary toolset for architecting RAG evaluations will be **[DeepEval](https://docs.confident-ai.com/)**.
+  #### Rationale
+  Please see the the [RAG Evaluations Toolset](/adr/0004-rag-eval-toolset.md) ADR for an in-depth discussion of why DeepEval was chosen over other alternatives.
+
+</details>
+
+### Datasets
+<details>
+  <summary>Details</summary>
+
+  #### Decision
+  To handle RAG evaluations, two types of datasets were determined to be needed:
+  - Question/Answer (QA)
+  - Needle in a Haystack (NIAH)
+
+  A QA dataset should contain a set of [test cases](https://docs.confident-ai.com/docs/evaluation-test-cases) that have:
+  - Questions, which will be prompted to the LLM
+  - Ground truth answers, which will be used to compare against the generated answer by the LLM
+  - Context, which will contain the correct piece of source documentation that supports the true answer
+  - The full source documentation from which the context is derived
+
+  A dataset for [NIAH Testing](https://arize.com/blog-course/the-needle-in-a-haystack-test-evaluating-the-performance-of-llm-rag-systems/) should contain:
+  - A series of irrelevant texts of varying context length that have one point of information hidden within
+
+  To support these needs, two datasets were created:
+  - [LFAI_RAG_qa_v1](https://huggingface.co/datasets/defenseunicorns/LFAI_RAG_qa_v1)
+  - [LFAI_RAG_niah_v1](https://huggingface.co/datasets/defenseunicorns/LFAI_RAG_niah_v1)
+
+  These two datasets will be used as the basis for MVP LeapfrogAI RAG evaluations that require data sources.
+
+  Advanced versions of these datasets will be needed after MVP status as LeapfrogAI baseline performance grows. If baseline LeapfrogAI can pass all tests and score top marks on all metrics for these tests, then the tests lose their ability to assist in tracking growth over time.
+  
+  An advanced QA dataset differs in the following ways:
+  - More documents to use as the basis for questions. This provides a larger pool that RAG has to perform retrieval on and provides more opportunities for question types
+  - Narrow the scope of the types of documents used. By keeping the topics of each document more similar to each other, this makes retrieval a more difficult task
+
+  An advanced NIAH dataset has the following:
+  - A collection of documents (the haystack) where one document contains a target piece of information (the needle) hidden somewhere within
+    - The documents should be of the same topic (or in other words, be semantically similar) so it's not obvious which document has the right information
+    - The needle itself should also be topically related to the rest of the documents but identifiable as unique information (i.e this information should be not obvious and only exists in one location out of all the documents)
+
+  #### Rationale
+
+  These datasets were created because it filled a gap in the openly available datasets that could have been used. For example, in QA datasets, there did not exist any dataset that had all **4** components listed above. Many had the questions, answers, and context, but none also included the source documents in a readily accessible manner. Therefore, the fastest and most effective course of action was to generate a QA dataset from source documentation using the [DeepEval Synthesizer](https://docs.confident-ai.com/docs/evaluation-datasets-synthetic-data). The documentation that was used to create the QA dataset was chosen to be both representative of deployment needs (by including some DoD specific documentation) and a variety of topics (including technical documents and financial reports).
+
+  As for the NIAH dataset, there was a similar "incompleteness" problem that was observed. While other iterations of NIAH datasets are more readily available than QA datasets, some [datasets](https://huggingface.co/datasets/nanotron/simple_needle_in_a_hay_stack) had haystacks constructed of small repeating sentences, which did not mirror what a deployment context is more likely to look like. Other implementations mirrored the original [NIAH experiment](https://x.com/GregKamradt/status/1722386725635580292?lang=en) using [Paul Graham essays](https://paulgraham.com/articles.html), but did not release their specific datasets. Therefore, it made sense to quickly generate a dataset that uses the same Paul Graham essays as context, while inserting individual "needles" into certain context lengths to create a custom dataset. LFAI_RAG_niah_v1 includes context lengths from 512 to 128k characters.
+
+</details>
+
+### Models to Evaluate
+<details>
+  <summary>Details</summary>
+
+  #### Decision
+
+  The three models that will initially be evaluated are going to be:
+
+  - [SynthIA-7B](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GPTQ) (the initial default model for LeapfrogAI)
+  - [Hermes 2 Pro](https://huggingface.co/defenseunicorns/Hermes-2-Pro-Mistral-7B-4bit-32g-GPTQ) (Defense Unicorns quantization)
+  - [Llama3.1-8B](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-bnb-4bit) (using a 4 bit quantization)
+
+  GPT-4o will also be used as a point of comparison in the results.
+
+  #### Rationale
+  Three models were chosen to evaluate against initially in order to balance the scale between complexity and variety. There are endless variations of models that could be evaluated against, but these ones were chosen with specific reasons in mind.
+  - **SynthIA-7B**: This model has been the default backbone of LeapfrogAI since the beginning and (at the time of writing this ADR) is still the default model deployment choice. It is a 4 bit QPTQ quantization, so it is small enough to load on edge deployments. It is also compatible with both backend deployment options: llama-cpp-python and vllm. As it is still the default model choice, it should be evaluated on to see how it performs as time has gone on.
+  - **Hermes 2 Pro**: This model is a fine-tune of the Mistral-7b-Instruct model using the [OpenHermes-2.5](https://huggingface.co/datasets/teknium/OpenHermes-2.5) dataset. Hermes 2 Pro also includes [Hermes Function Calling](https://github.com/NousResearch/Hermes-Function-Calling). This particular model is a 4 bit GPTQ quantization on the [VMWare Open Instruct](https://huggingface.co/datasets/vmware/open-instruct) dataset that was generated by Defense Unicorns. Hermes 2 Pro advances on Mistral 7b with excellent general task and conversation capabilities and enhanced function calling and generation of JSON structured outputs. This model also meets the requirements of being small enough to load in edge deployment scenarios.
+  - **Llama3.1-8B**: This model has been shown to be an exemplary addition to the small model space [(Model Card)](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md). With additional language capabilities (trained on 8 languages), the Llama3.1 family of models offers high performance under a variety of scenarios. The model that will be evaluated against is a 4 bit bnb quanitzation of LLama3.1-8B. This quantization again allows for smaller deployment scenarios and makes a more relevant comparison point to the models already in use within LeapfrogAI.
+
+All of the above models have similar vRAM requirements (able to be run on < 16Gb of vRAM), similar parameter count (7-8 billion parameters), and the same quantization level (4-bit). By balancing these factors, we can verify that each of these models can be swapped out for another and the system requirements do not need to change. This will assist in being able to provide comparisons that are different by as few variables as possible.
+
+As time goes on, additional models will be considered and added as comparison points.
+
+</details>
+
+### LLM-as-Judge / LLMs-as-Jury
+<details>
+  <summary>Details</summary>
+
+  #### Decision
+  
+  For the RAG Evals MVP, [Claude 3.5 Sonnet](https://www.anthropic.com/news/claude-3-5-sonnet) by Anthropic will be used as a single LLM-as-Judge.
+
+  #### Rationale
+  
+  There are two points to rationalize; the model choice and the decision to use a single judge.
+
+  In order to reach an MVP product, a single LLM judge will be utilized for the evaluations that require it. This will be the first stage so that the evaluation framework can begin receiving results. As progress is made, additional LLM-based judges will be incorporated to develop an LLM-jury styled approach. For context, please see the following [paper](https://arxiv.org/pdf/2404.18796).
+
+  Claude 3.5 Sonnet was chosen to be used as the first judge due to it's high levels of [performance](https://artificialanalysis.ai/models/claude-35-sonnet), which is crucial when utilizing an LLM judge. Claude 3.5 Sonnet as compared to other models (as seen in it's [model card](https://www-cdn.anthropic.com/fed9cc193a14b84131812372d8d5857f8f304c52/Model_Card_Claude_3_Addendum.pdf)) outperforms other large models on various evaluation benchmarks. These benchmarks include:
+  - MMLU (general multitask reasoning)
+  - DROP (reading comprehension)
+  - BIG-Bench Hard (mixed task evaluations)
+  - Needle in a Haystack recall (for understanding lots of context)
+  - XSTest (for testing rejection of harmful requests)
+
+ By utilizing a model that outperforms other similarly large models on all of these tasks, we can have confidence that we are using the most capable LLM-as-judge model.
+
+ Additionally, Claude 3.5 Sonnet exists outside the family of models that will be evaluated against, which has been shown to be effective in comparison to using models of the same family due to [self-enhancement bias](https://arxiv.org/pdf/2306.05685).
+
+</details>
+
+### Metrics / Evaluations
+<details>
+  <summary>Details</summary>
+
+  #### Decision
+  
+  The LeapfrogAI RAG evaluation framework will utilize the following evaluations:
+
+  LLM-as-a-judge metrics to use:
+  - [Contextual Recall](https://docs.confident-ai.com/docs/metrics-contextual-recall) (for evaluating retrieval)
+  - [Answer Correctness](https://docs.confident-ai.com/docs/metrics-llm-evals) (for evaluating generation)
+  - [Faithfulness](https://docs.confident-ai.com/docs/metrics-faithfulness) (for evaluating generation)
+  
+  Non-LLM-enabled evaluations:
+  - Needle in a Haystack (for evaluating retrieval and generation)
+  - Annotation Relevancy (for evaluating retrieval)
+
+  Standard LLM benchmarks:
+  - [HumanEval](https://docs.confident-ai.com/docs/benchmarks-human-eval) (for evaluating code generation)
+  - [MMLU](https://docs.confident-ai.com/docs/benchmarks-mmlu) (for evaluating reasoning across multiple subjects; generation only)
+
+  Performance Metrics:
+  - Total Execution Runtime
+
+  #### Rationale
+
+  These metrics were chosen to balance the explainability/understandability of non-LLM based evaluations and the flexibility/scalability of LLM-as-judge evaluations.
+  - Contextual Recall: evaluates the extent to which the context retrieved by RAG corresponds to an expected output
+  - Answer Correctness: evaluates if an answer generated by an LLM is accurate when compared to the question asked and its context
+  - Faithfulness: evaluates whether an answer generated by an LLM factually aligns with the context provided
+  - Needle in a Haystack (retrieval): determines if a needle of information is correctly retrieved from the vector store by RAG
+  - Needle in a Haystack (response): determines if a needle of information is correctly given in the final response of the LLM in a RAG pipeline
+  - HumanEval: Evaluates an LLM's code generation abilities (not RAG-enabled, but useful as an established baseline to compare against)
+  - MMLU: Evaluates an LLM's ability to reason on multiple task topics using multiple choice questions (not RAG-enabled, but useful as an established baseline to compare against)
+  - Annotation Relevancy: A custom metric that measures how often documents that have nothing to do with the question are cited in the annotations. Higher is better
+
+  Established LLM benchmarks (MMLU and HumanEval) are included in this MVP evaluation framework despite not requiring information from a retrieval system. It's important that this framework have a few generation-only metrics to be better at diagnosing whether issues in performance are happening due to RAG or the model. The other metrics included in this MVP evaluate either the retrieval stage on its own or the information-assisted generation. If the metrics evaluated on the information-assisted generation (e.g Faithfulness or NIAH response) are scoring low, it is difficult to parse out whether or not the low score is caused by the information retrieval, the generation itself, or both. Having these benchmarks provides a way to validate whether or not the generation works as expected, indicating a potential problem with the retrieval. These benchmarks are also standard, and therefore used across many LLMs. Therefore, these values can be used when comparing what performance is expected of these models and what is being observed in LeapfrogAI. These benchmarks can assist in diagnosing problems with both quantization (which often don't have these benchmarks) and implementation differences.
+
+  While these metrics are going to be utilized first to balance value-gained and time to implement, we will be adding additional evaluation metrics soon following MVP status. Potential options include:
+  - RAG retrieval Hit Rate: non-LLM metric that evaluates how often a retrieved context matches the expected context for a question/answer scenario
+  - Performance metrics: non-LLM metrics that measure performance targets such as runtime, compute (cpu and gpu), etc. (requires a standarized deployment context)
+
+</details>
+
+### Execution / Delivery
+<details>
+  <summary>Details</summary>
+
+  #### Decision
+  For MVP status, we will be running the evaluation framework in one-off instances utilizing the `leapfrogai_evals` module. This module contains the runners for the current evaluations and measures the metrics that have been established thus far.
+
+  #### Rationale
+  In order to start getting feedback from evaluations, we simply need to get the results in whatever form we can. Since there is not an established cadence for how often evals will be run (a determination for post MVP), the storage of said evals does not need to be consistent at this time.
+
+  The next steps for the execution and delivery of evals will likely be the following:
+  - Using the `leapfrogai_evals` module, evaluations will be run at a regular cadence in a Github workflow so that we have a standardized way of running evaluations that we can compare against.
+  - These evaluation results will be stored as artifacts in GitHub so that performance can be tracked over time across version releases.
+
+</details>
+
+### Model Card
+<details>
+  <summary>Details</summary>
+
+  #### Decision
+  
+  The model card will ultimately exist in a few forms:
+
+  - A tabular representation that shows for a given model (or hyperparameter configuration) as a row, the columns consist of all of the scored metrics that were applied to that configuration.
+  - **Assumption**: A deployed instance of LeapfrogAI will likely always accompany UDS runtime. The evaluation results for a deployment will live in a table under its corresponding UDS runtime page.
+    - The evaluation outputs themselves will eventually be provided in `json` format for easier ingestion into observability tools or other additional frameworks.
+    - This will likely become more relevant after MVP status.
+    - This assumption will need to be vetted by the UDS team and therefore may have to be adjusted in the future.
+
+  A model card report will consist of the table of evaluation metrics as well as a written summary of what the metrics mean, how they relate to specific performance considerations, as well as model recommendations. Therefore, this report can be   generalized for a wide audience, but will need to be customized for a given potential deployment scenario. A metrics table may look something like this:
+  ![Screenshot from 2024-09-18 18-03-18](https://github.com/user-attachments/assets/479f385b-1d09-4842-b1f0-e2d8992b0b3d)
+
+  #### Rationale
+
+  The needs of the model card will likely evolve over time as the needs of delivering evaluations changes. This can be observed in three potential stages:
+  - Near-term: evaluations benefit the product team to help identify new model choices for new defaults, diagnose implementation bugs, and evaluate upgrades to the RAG pipeline.
+    - Data format needed: raw numbers, potentially in tabular format for ease of ingesting
+  - Mid-term: evaluations on default model options for mission heroes are part of the delivery process. These recommendations are provided to assist mission heroes in selecting the models they want in their deployments.
+    - Data format needed: same as near-term, but a higher emphasis on the report will be necessary
+  - Long-term: evaluations are ingrained within all LeapfrogAI deployments to diagnose potential runtime issues and to evaluate multiple model options directly within the cluster
+    - Data format needed: evaluations will need to be directly tied into other metrics-measuring tools, such as prometheus, to integrate directly into UDS runtime.
+   
+  By providing an iterable approach to delivering evaluation results, the model card's use-case will be able to evolve over time to scale to meet the needs of the product team, delivery team, and mission heroes.
+
+</details>
+
+## Related ADRs
+This ADR was influenced by the [RAG Evaluations Toolset](/adr/0004-rag-eval-toolset.md) ADR.
+
+## References
diff --git a/bundles/dev/cpu/uds-bundle.yaml b/bundles/dev/cpu/uds-bundle.yaml
index 0df6fd4d9..5e24c4eaa 100644
--- a/bundles/dev/cpu/uds-bundle.yaml
+++ b/bundles/dev/cpu/uds-bundle.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json
 
 kind: UDSBundle
 metadata:
diff --git a/bundles/dev/gpu/uds-bundle.yaml b/bundles/dev/gpu/uds-bundle.yaml
index c6205f0c4..3ad029f99 100644
--- a/bundles/dev/gpu/uds-bundle.yaml
+++ b/bundles/dev/gpu/uds-bundle.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json
 
 kind: UDSBundle
 metadata:
diff --git a/bundles/dev/gpu/uds-config.yaml b/bundles/dev/gpu/uds-config.yaml
index 9ad6cfdb4..1ef7a2634 100644
--- a/bundles/dev/gpu/uds-config.yaml
+++ b/bundles/dev/gpu/uds-config.yaml
@@ -9,8 +9,31 @@ variables:
     gpu_limit: 0  # runs on CPU until GPU limit is increased
 
   vllm:
-    gpu_limit: 1 # if <1, vllm won't work, VLLM is GPU only
-    #tensor_parallel_size: 1   # TODO: reintroduce when vllm changes get pulled in
+    trust_remote_code: "True"
+    tensor_parallel_size: "1"
+    enforce_eager: "False"
+    gpu_memory_utilization: "0.90"
+    worker_use_ray: "True"
+    engine_use_ray: "True"
+    quantization: "None"
+    load_format: "auto"
+    # LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development)
+    max_context_length: "32768"
+    stop_tokens: "</s>, <|im_end|>, <|endoftext|>"
+    prompt_format_chat_system: "SYSTEM: {}\n"
+    prompt_format_chat_user: "USER: {}\n"
+    prompt_format_chat_assistant: "ASSISTANT: {}\n"
+    temperature: "0.1"
+    top_p: "1.0"
+    top_k: "0"
+    repetition_penalty: "1.0"
+    max_new_tokens: "8192"
+    # Pod deployment configuration
+    gpu_limit: "1"
+    gpu_runtime: "nvidia"
+    pvc_size: "15Gi"
+    pvc_access_mode: "ReadWriteOnce"
+    pvc_storage_class: "local-path"
 
   supabase:
     domain: "uds.dev"
diff --git a/bundles/latest/cpu/uds-bundle.yaml b/bundles/latest/cpu/uds-bundle.yaml
index 747645ae3..23170f504 100644
--- a/bundles/latest/cpu/uds-bundle.yaml
+++ b/bundles/latest/cpu/uds-bundle.yaml
@@ -1,38 +1,38 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json
 
 kind: UDSBundle
 metadata:
   name: leapfrogai
   description: A UDS bundle for deploying LeapfrogAI
-  version: 0.12.2-upstream
+  version: 0.13.1-upstream
 
 packages:
   # Supabase backend for the UI and API to interface with Postgresql
   - name: supabase
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/supabase
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # API
   - name: leapfrogai-api
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/leapfrogai-api
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # Chat Model
   - name: llama-cpp-python
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/llama-cpp-python
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # Text Embeddings Model
   - name: text-embeddings
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/text-embeddings
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # Transcription Model
   - name: whisper
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/whisper
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # UI
   - name: leapfrogai-ui
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/leapfrogai-ui
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
diff --git a/bundles/latest/gpu/uds-bundle.yaml b/bundles/latest/gpu/uds-bundle.yaml
index 3867749a4..39b0acf79 100644
--- a/bundles/latest/gpu/uds-bundle.yaml
+++ b/bundles/latest/gpu/uds-bundle.yaml
@@ -1,38 +1,38 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json
 
 kind: UDSBundle
 metadata:
   name: leapfrogai
   description: A UDS bundle for deploying LeapfrogAI
-  version: 0.12.2-upstream
+  version: 0.13.1-upstream
 
 packages:
   # Supabase backend for the UI and API to interface with Postgresql
   - name: supabase
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/supabase
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # OpenAI-like API
   - name: leapfrogai-api
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/leapfrogai-api
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # Model for generic chat and summarization
   - name: vllm
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/vllm
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # Model for providing vector embeddings for text
   - name: text-embeddings
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/text-embeddings
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # Model for converting audio to text
   - name: whisper
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/whisper
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
 
   # UI
   - name: leapfrogai-ui
     repository: ghcr.io/defenseunicorns/packages/uds/leapfrogai/leapfrogai-ui
-    ref: 0.12.2-upstream
+    ref: 0.13.1-upstream
diff --git a/bundles/latest/gpu/uds-config.yaml b/bundles/latest/gpu/uds-config.yaml
index 9ad6cfdb4..1ef7a2634 100644
--- a/bundles/latest/gpu/uds-config.yaml
+++ b/bundles/latest/gpu/uds-config.yaml
@@ -9,8 +9,31 @@ variables:
     gpu_limit: 0  # runs on CPU until GPU limit is increased
 
   vllm:
-    gpu_limit: 1 # if <1, vllm won't work, VLLM is GPU only
-    #tensor_parallel_size: 1   # TODO: reintroduce when vllm changes get pulled in
+    trust_remote_code: "True"
+    tensor_parallel_size: "1"
+    enforce_eager: "False"
+    gpu_memory_utilization: "0.90"
+    worker_use_ray: "True"
+    engine_use_ray: "True"
+    quantization: "None"
+    load_format: "auto"
+    # LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development)
+    max_context_length: "32768"
+    stop_tokens: "</s>, <|im_end|>, <|endoftext|>"
+    prompt_format_chat_system: "SYSTEM: {}\n"
+    prompt_format_chat_user: "USER: {}\n"
+    prompt_format_chat_assistant: "ASSISTANT: {}\n"
+    temperature: "0.1"
+    top_p: "1.0"
+    top_k: "0"
+    repetition_penalty: "1.0"
+    max_new_tokens: "8192"
+    # Pod deployment configuration
+    gpu_limit: "1"
+    gpu_runtime: "nvidia"
+    pvc_size: "15Gi"
+    pvc_access_mode: "ReadWriteOnce"
+    pvc_storage_class: "local-path"
 
   supabase:
     domain: "uds.dev"
diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md
index 897bfaf5d..98343ef7f 100644
--- a/docs/DEVELOPMENT.md
+++ b/docs/DEVELOPMENT.md
@@ -13,20 +13,20 @@ Please first see the pre-requisites listed on the LeapfrogAI documentation websi
 
 It is **_HIGHLY RECOMMENDED_** that PyEnv be installed on your machine, and a new virtual environment is created for every new development branch.
 
-Follow the installation instructions outlined in the [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) repository to install Python 3.11.6:
+Follow the installation instructions outlined in the [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) repository to install Python 3.11.9:
 
   ```bash
   # install the correct python version
-  pyenv install 3.11.6
+  pyenv install 3.11.9
 
   # create a new virtual environment named "leapfrogai"
-  pyenv virtualenv 3.11.6 leapfrogai
+  pyenv virtualenv 3.11.9 leapfrogai
 
   # activate the virtual environment
   pyenv activate leapfrogai
   ```
 
-If your installation process completes successfully but indicates missing packages such as `sqlite3`, execute the following command to install the required packages then proceed with the reinstallation of Python 3.11.6:
+If your installation process completes successfully but indicates missing packages such as `sqlite3`, execute the following command to install the required packages then proceed with the reinstallation of Python 3.11.9:
 
   ```bash
   sudo apt-get install build-essential zlib1g-dev libffi-dev \
@@ -62,6 +62,52 @@ Many of the directories and sub-directories within this project contain Make tar
 
 Please refer to each Makefile for more arguments and details on what each target does and is dependent on.
 
+## UDS Tasks
+
+UDS tasks use the UDS CLI runner, and are defined in the root `tasks.yaml` file.
+
+Currently, the only tasks within the file are for checking the progress of the LeapfrogAI towards the `Made for UDS` packaging standards. To run the task verification task you must have a [UDS Kubernetes cluster](../packages/k3d-gpu/README.md) and LeapfrogAI (GPU or CPU) deployed. After deploying both major capabilities, you can execute the following:
+
+```bash
+uds run nightly-uds-badge-verification --no-progress
+```
+
+You should get an output similar to this, depending on how many components of LeapfrogAI are actually deployed:
+
+```bash
+  •  Running "Create Reports Directory"
+
+  ✔  Completed "Create Reports Directory"
+
+  •  Running "Run UDS Badge Verification Task"
+
+  ✔  Completed "Run UDS Badge Verification Task"
+
+  •  Running "Clean Up Final Report"
+-----------------------------
+Package: leapfrogai-api
+
+❌ Errors: 4
+⚠️  Warnings: 3
+
+❌ Error Descriptions:
+  - Endpoint leapfrogai-api.uds.dev is returning 404
+  - Not all applicable network policies are using selectors
+  - Not all applicable network policies are using ports
+  - No monitors defined
+
+⚠️  Warning Descriptions:
+  - Version is not consistent across flavors and package
+  - Network policies with 'remoteGenerated: Anywhere' are present, review needed
+  - No SSO configuration found, review needed
+-----------------------------
+UDS Capability Issues
+
+❌ Error Descriptions:
+  - Not all pods have the istio sidecar
+-----------------------------
+```
+
 ## Environment Variables
 
 Be wary of `*config*.yaml` or `.env*` files that are in individual components of the stack. The component's README will usually tell the developer when to fill them out or supply environment variables to a script.
@@ -81,6 +127,7 @@ uds zarf tools registry prune --confirm
 
 # create and deploy the new package
 # FLAVOR can be upstream (default) or registry1 - see README for availability details
+# See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md)
 LOCAL_VERSION=dev FLAVOR=upstream REGISTRY_PORT=5000 ARCH=amd64 make build-api
 LOCAL_VERSION=dev FLAVOR=upstream REGISTRY_PORT=5000 ARCH=amd64 make deploy-api
 ```
@@ -107,6 +154,7 @@ uds zarf package deploy zarf-package-*.tar.zst --confirm
 
     ```bash
     # FLAVOR can be upstream (default) or registry1 - see README for availability details
+    # See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md)
     LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-cpu    # ui, api, llama-cpp-python, text-embeddings, whisper, supabase
     # OR
     LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-gpu    # ui, api, vllm, text-embeddings, whisper, supabase
@@ -120,6 +168,7 @@ uds zarf package deploy zarf-package-*.tar.zst --confirm
 
     ```bash
     # FLAVOR can be upstream (default) or registry1 - see README for availability details
+    # See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md)
     LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-ui
     LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-api
     LOCAL_VERSION=dev FLAVOR=upstream ARCH=amd64 make build-supabase
@@ -154,7 +203,7 @@ Although not provided in the example UDS bundle manifests found in this reposito
   - name: leapfrogai-api
     repository: ghcr.io/defenseunicorns/packages/leapfrogai/leapfrogai-api
     # x-release-please-start-version
-    ref: 0.12.2
+    ref: 0.13.1
     # x-release-please-end
 
     # THE BELOW LINES WERE ADDED FOR DEMONSTRATION PURPOSES
@@ -188,6 +237,7 @@ To demonstrate what this would look like for an Apple Silicon Mac:
 
 ```bash
 # FLAVOR can be upstream (default) or registry1 - see README for availability details
+# See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md)
 REG_PORT=5001 ARCH=arm64 LOCAL_VERSION=dev FLAVOR=upstream make build-cpu
 ```
 
@@ -195,6 +245,7 @@ To demonstrate what this would look like for an older Intel Mac:
 
 ```bash
 # FLAVOR can be upstream (default) or registry1 - see README for availability details
+# See individual sub-directories for any flavor-specific instructions (e.g., packages/api/README.md)
 REG_PORT=5001 ARCH=arm64 LOCAL_VERSION=dev FLAVOR=upstream make build-cpu
 ```
 
diff --git a/mk-clean.mk b/mk-clean.mk
index ff7e8c61d..4ca00ae89 100644
--- a/mk-clean.mk
+++ b/mk-clean.mk
@@ -15,8 +15,8 @@ clean-artifacts: # Zarf packages, UDS bundles, Python build artifacts, etc.
 
 clean-cache:
 	-rm -rf ./**/__pycache__ ./**/*/__pycache__ ./**/**/*/__pycache__
-	-rm -rf ./**/*/.ruff_cache ./**/.ruff_cache
-	-rm -rf ./**/.pytest_cache ./**/*/.pytest_cache
+	-rm -rf ./.ruff_cache ./**/*/.ruff_cache ./**/.ruff_cache
+	-rm -rf ./.pytest_cache ./**/.pytest_cache ./**/*/.pytest_cache
 	-rm -rf ./.mypy_cache
 
 clean-env:
diff --git a/packages/api/README.md b/packages/api/README.md
index aa2b34690..2d68d67f8 100644
--- a/packages/api/README.md
+++ b/packages/api/README.md
@@ -27,6 +27,13 @@ make build-api LOCAL_VERSION=dev FLAVOR=upstream
 uds zarf package deploy packages/api/zarf-package-leapfrogai-api-*-dev.tar.zst --confirm
 ```
 
+For other package flavors, use the following example:
+
+```bash
+make build-api FLAVOR=registry1
+uds zarf package deploy packages/api/zarf-package-leapfrogai-api-*-dev.tar.zst --confirm
+```
+
 ### Local Development
 
 See the [source code documentation](../../src/leapfrogai_api/README.md) for running the API from the source code for local Python environment development.
diff --git a/packages/api/chart/templates/istio-admin.yaml b/packages/api/chart/templates/istio-admin.yaml
new file mode 100644
index 000000000..c369e8786
--- /dev/null
+++ b/packages/api/chart/templates/istio-admin.yaml
@@ -0,0 +1,24 @@
+{{- if .Capabilities.APIVersions.Has "security.istio.io/v1beta1" }}
+apiVersion: security.istio.io/v1beta1
+kind: AuthorizationPolicy
+metadata:
+  name: api-block-metrics-access-from-public-gateway
+  namespace: {{ .Release.Namespace }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "chart.selectorLabels" . | nindent 6 }}
+  action: DENY
+  rules:
+    - to:
+        - operation:
+            ports:
+              - "8080"
+            paths:
+            - /metrics*
+      from:
+        - source:
+            notNamespaces:
+            - istio-admin-gateway
+            - monitoring
+{{- end }}
diff --git a/packages/api/chart/templates/uds-package.yaml b/packages/api/chart/templates/uds-package.yaml
index a6a83dea8..17220788d 100644
--- a/packages/api/chart/templates/uds-package.yaml
+++ b/packages/api/chart/templates/uds-package.yaml
@@ -7,6 +7,11 @@ metadata:
   labels:
     {{- include "chart.labels" . | nindent 4 }}
 spec:
+  monitor:
+    - portName: http
+      targetPort: {{ .Values.api.service.port }}
+      selector:
+        {{- include "chart.selectorLabels" . | nindent 8 }}
   network:
     expose:
       - service: {{ include "chart.fullname" . }}
diff --git a/packages/api/chart/values.yaml b/packages/api/chart/values.yaml
index 65b397e46..4c217ba8a 100644
--- a/packages/api/chart/values.yaml
+++ b/packages/api/chart/values.yaml
@@ -25,6 +25,8 @@ api:
       value: "*.toml"
     - name: DEFAULT_EMBEDDINGS_MODEL
       value: "text-embeddings"
+    - name: DEV
+      value: "false"
     - name: PORT
       value: "8080"
     - name: SUPABASE_URL
diff --git a/packages/api/common/zarf.yaml b/packages/api/common/zarf.yaml
index 08f52f60a..3462103d2 100644
--- a/packages/api/common/zarf.yaml
+++ b/packages/api/common/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 
 kind: ZarfPackageConfig
 metadata:
diff --git a/packages/api/values/registry1-values.yaml b/packages/api/values/registry1-values.yaml
index d269c6415..91f92b168 100644
--- a/packages/api/values/registry1-values.yaml
+++ b/packages/api/values/registry1-values.yaml
@@ -1,9 +1,7 @@
 api:
   image:
     repository: "registry1.dso.mil/ironbank/opensource/defenseunicorns/leapfrogai/api"
-    # x-release-please-start-version
-    tag: v0.12.2
-    # x-release-please-end
+    tag: v###ZARF_CONST_IMAGE_VERSION###
 
   expose: "###ZARF_VAR_EXPOSE_API###"
 
@@ -16,6 +14,8 @@ api:
       value: "*.toml"
     - name: DEFAULT_EMBEDDINGS_MODEL
       value: "###ZARF_VAR_DEFAULT_EMBEDDINGS_MODEL###"
+    - name: DEV
+      value: "###ZARF_VAR_DEV###"
     - name: PORT
       value: "8080"
     - name: SUPABASE_URL
diff --git a/packages/api/values/upstream-values.yaml b/packages/api/values/upstream-values.yaml
index 6d867260e..ef2dcdad9 100644
--- a/packages/api/values/upstream-values.yaml
+++ b/packages/api/values/upstream-values.yaml
@@ -14,6 +14,8 @@ api:
       value: "*.toml"
     - name: DEFAULT_EMBEDDINGS_MODEL
       value: "###ZARF_VAR_DEFAULT_EMBEDDINGS_MODEL###"
+    - name: DEV
+      value: "###ZARF_VAR_DEV###"
     - name: PORT
       value: "8080"
     - name: SUPABASE_URL
diff --git a/packages/api/zarf-config.yaml b/packages/api/zarf-config.yaml
new file mode 100644
index 000000000..475ac2d48
--- /dev/null
+++ b/packages/api/zarf-config.yaml
@@ -0,0 +1,6 @@
+package:
+  create:
+    set:
+      # x-release-please-start-version
+      image_version: "0.13.1"
+      # x-release-please-end
diff --git a/packages/api/zarf.yaml b/packages/api/zarf.yaml
index 4fa6c59f2..10a183e9c 100644
--- a/packages/api/zarf.yaml
+++ b/packages/api/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 
 kind: ZarfPackageConfig
 metadata:
@@ -16,6 +16,9 @@ variables:
     description: "Flag to expose the OpenAPI schema for debugging."
   - name: DEFAULT_EMBEDDINGS_MODEL
     default: "text-embeddings"
+  - name: DEV
+    default: "false"
+    description: "Flag to enable development endpoints."
 
 components:
   - name: leapfrogai-api
@@ -47,7 +50,7 @@ components:
         valuesFiles:
           - "values/registry1-values.yaml"
     images:
-      - "registry1.dso.mil/ironbank/opensource/defenseunicorns/leapfrogai/api:v0.12.2"
+      - "registry1.dso.mil/ironbank/opensource/defenseunicorns/leapfrogai/api:v###ZARF_PKG_TMPL_IMAGE_VERSION###"
       # TODO: replace with Ironbank image once hardened: registry1.dso.mil/ironbank/opensource/defenseunicorns/leapfrogai/api/migrations
       - "ghcr.io/defenseunicorns/leapfrogai/api-migrations:###ZARF_PKG_TMPL_IMAGE_VERSION###"
       - "registry1.dso.mil/ironbank/kiwigrid/k8s-sidecar:1.23.3"
diff --git a/packages/llama-cpp-python/zarf.yaml b/packages/llama-cpp-python/zarf.yaml
index 2320e5a26..49ac98f34 100644
--- a/packages/llama-cpp-python/zarf.yaml
+++ b/packages/llama-cpp-python/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 
 kind: ZarfPackageConfig
 metadata:
diff --git a/packages/repeater/zarf.yaml b/packages/repeater/zarf.yaml
index 0e1f76378..38d6090f2 100644
--- a/packages/repeater/zarf.yaml
+++ b/packages/repeater/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 
 kind: ZarfPackageConfig
 metadata:
diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml
index 3c6d5c4f7..44ea46c1c 100644
--- a/packages/supabase/zarf.yaml
+++ b/packages/supabase/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 kind: ZarfPackageConfig
 metadata:
   name: supabase
diff --git a/packages/text-embeddings/zarf.yaml b/packages/text-embeddings/zarf.yaml
index d11d50ff5..fc270d48a 100644
--- a/packages/text-embeddings/zarf.yaml
+++ b/packages/text-embeddings/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 
 kind: ZarfPackageConfig
 metadata:
diff --git a/packages/ui/chart/templates/ui/service.yaml b/packages/ui/chart/templates/ui/service.yaml
index 15243e806..2cb919567 100644
--- a/packages/ui/chart/templates/ui/service.yaml
+++ b/packages/ui/chart/templates/ui/service.yaml
@@ -18,11 +18,3 @@ spec:
       protocol: TCP
       port: {{ .Values.service.port }}
       targetPort: {{ .Values.service.port }}
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: {{ include "chart.serviceAccountName" . }}
-  namespace: {{ .Release.Namespace | default "leapfrogai" }}
-  labels:
-    {{- include "chart.labels" . | nindent 4 }}
diff --git a/packages/ui/zarf.yaml b/packages/ui/zarf.yaml
index 14de3c89d..7a0741a5e 100644
--- a/packages/ui/zarf.yaml
+++ b/packages/ui/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 
 kind: ZarfPackageConfig
 metadata:
diff --git a/packages/vllm/.env.example b/packages/vllm/.env.example
index 1e3a00170..0a995e234 100644
--- a/packages/vllm/.env.example
+++ b/packages/vllm/.env.example
@@ -1,13 +1,12 @@
-export LAI_HF_HUB_ENABLE_HF_TRANSFER="1"
-export LAI_REPO_ID="TheBloke/Synthia-7B-v2.0-GPTQ"
-export LAI_REVISION="gptq-4bit-32g-actorder_True"
-export LAI_QUANTIZATION="gptq"
-export LAI_TENSOR_PARALLEL_SIZE=1
-export LAI_MODEL_SOURCE=".model/"
-export LAI_MAX_CONTEXT_LENGTH=32768
-export LAI_STOP_TOKENS='["</s>","<|endoftext|>","<|im_end|>"]'
-export LAI_PROMPT_FORMAT_CHAT_SYSTEM="SYSTEM: {}\n"
-export LAI_PROMPT_FORMAT_CHAT_ASSISTANT="ASSISTANT: {}\n"
-export LAI_PROMPT_FORMAT_CHAT_USER="USER: {}\n"
-export LAI_PROMPT_FORMAT_DEFAULTS_TOP_P=1.0
-export LAI_PROMPT_FORMAT_DEFAULTS_TOP_K=0
\ No newline at end of file
+LFAI_REPO_ID="TheBloke/SynthIA-7B-v2.0-GPTQ"
+LFAI_REVISION="gptq-4bit-32g-actorder_True"
+
+VLLM_TENSOR_PARALLEL_SIZE=1
+VLLM_TRUST_REMOTE_CODE=True
+VLLM_MAX_CONTEXT_LENGTH=32768
+VLLM_ENFORCE_EAGER=False
+VLLM_GPU_MEMORY_UTILIZATION=0.90
+VLLM_WORKER_USE_RAY=True
+VLLM_ENGINE_USE_RAY=True
+VLLM_QUANTIZATION=None
+VLLM_LOAD_FORMAT=auto
diff --git a/packages/vllm/Dockerfile b/packages/vllm/Dockerfile
index 8676f5eda..f53088ead 100755
--- a/packages/vllm/Dockerfile
+++ b/packages/vllm/Dockerfile
@@ -6,8 +6,9 @@ FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 AS builder
 # set SDK location
 # set the pyenv and Python versions
 ARG SDK_DEST=src/leapfrogai_sdk/build \
-    PYTHON_VERSION=3.11.6 \
-    PYENV_GIT_TAG=v2.4.8
+    PYTHON_VERSION=3.11.9 \
+    PYENV_GIT_TAG=v2.4.8\
+    COMPONENT_DIRECTORY="packages/vllm"
 
 # use root user for deps installation and nonroot user creation
 USER root
@@ -41,7 +42,7 @@ USER nonroot
 # copy-in SDK from sdk stage and vllm source code from host
 WORKDIR /home/leapfrogai
 COPY --from=sdk --chown=nonroot:nonroot /leapfrogai/${SDK_DEST} ./${SDK_DEST}
-COPY --chown=nonroot:nonroot packages/vllm packages/vllm
+COPY --chown=nonroot:nonroot ${COMPONENT_DIRECTORY} packages/vllm
 
 # create virtual environment for light-weight portability and minimal libraries
 RUN curl https://pyenv.run | bash && \
@@ -54,10 +55,10 @@ RUN curl https://pyenv.run | bash && \
 ENV PYENV_ROOT="/home/nonroot/.pyenv" \
     PATH="/home/nonroot/.pyenv/bin:$PATH"
 
-# Install Python 3.11.6, set it as global, and create a venv
+# Install Python, set it as global, and create a venv
 RUN . ~/.bashrc && \
-    PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install 3.11.6 && \
-    pyenv global 3.11.6 && \
+    PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install 3.11.9 && \
+    pyenv global ${PYTHON_VERSION} && \
     pyenv exec python -m venv .venv
 
 # set path to venv python
@@ -67,26 +68,15 @@ RUN rm -f packages/vllm/build/*.whl && \
     python -m pip wheel packages/vllm -w packages/vllm/build --find-links=${SDK_DEST} && \
     pip install packages/vllm/build/lfai_vllm*.whl --no-index --find-links=packages/vllm/build/
 
+#################
+# FINAL CONTAINER
+#################
+
 FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04
 
 # set SDK location
 ARG SDK_DEST=src/leapfrogai_sdk/build
 
-# model-specific arguments
-ARG ARG HF_HUB_ENABLE_HF_TRANSFER="1" \
-    REPO_ID="TheBloke/Synthia-7B-v2.0-GPTQ" \
-    REVISION="gptq-4bit-32g-actorder_True" \
-    MODEL_SOURCE="/data/.model/" \
-    MAX_CONTEXT_LENGTH=32768 \
-    STOP_TOKENS='["</s>"]' \
-    PROMPT_FORMAT_CHAT_SYSTEM="SYSTEM: {}\n" \
-    PROMPT_FORMAT_CHAT_USER="USER: {}\n" \
-    PROMPT_FORMAT_CHAT_ASSISTANT="ASSISTANT: {}\n" \
-    PROMPT_FORMAT_DEFAULTS_TOP_P=1.0 \
-    PROMPT_FORMAT_DEFAULTS_TOP_K=0 \
-    TENSOR_PARALLEL_SIZE=1 \
-    QUANTIZATION="gptq"
-
 # setup nonroot user and permissions
 USER root
 RUN groupadd -g 65532 vglusers && \
@@ -101,24 +91,10 @@ COPY --from=sdk --chown=nonroot:nonroot /leapfrogai/${SDK_DEST} ./${SDK_DEST}
 COPY --from=builder --chown=nonroot:nonroot /home/leapfrogai/.venv /home/leapfrogai/.venv
 COPY --from=builder --chown=nonroot:nonroot /home/leapfrogai/packages/vllm/src /home/leapfrogai/packages/vllm/src
 # copy-in python binaries
-COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.pyenv/versions/3.11.6/ /home/nonroot/.pyenv/versions/3.11.6/
-
-# load ARG values into env variables for pickup by confz
-ENV LAI_HF_HUB_ENABLE_HF_TRANSFER=${HF_HUB_ENABLE_HF_TRANSFER} \
-    LAI_REPO_ID=${REPO_ID} \
-    LAI_REVISION=${REVISION} \
-    LAI_MODEL_SOURCE=${MODEL_SOURCE} \
-    LAI_MAX_CONTEXT_LENGTH=${MAX_CONTEXT_LENGTH} \
-    LAI_STOP_TOKENS=${STOP_TOKENS} \
-    LAI_PROMPT_FORMAT_CHAT_SYSTEM=${PROMPT_FORMAT_CHAT_SYSTEM} \
-    LAI_PROMPT_FORMAT_CHAT_USER=${PROMPT_FORMAT_CHAT_USER} \
-    LAI_PROMPT_FORMAT_CHAT_ASSISTANT=${PROMPT_FORMAT_CHAT_ASSISTANT} \
-    LAI_PROMPT_FORMAT_DEFAULTS_TOP_P=${PROMPT_FORMAT_DEFAULTS_TOP_P} \
-    LAI_PROMPT_FORMAT_DEFAULTS_TOP_K=${PROMPT_FORMAT_DEFAULTS_TOP_K} \
-    LAI_TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE} \
-    LAI_QUANTIZATION=${QUANTIZATION} \
-    # remove vLLM callback to stats server
-    VLLM_NO_USAGE_STATS=1
+COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.pyenv/versions/${PYTHON_VERSION}/ /home/nonroot/.pyenv/versions/${PYTHON_VERSION}/
+
+# remove vLLM callback to stats server
+ENV VLLM_NO_USAGE_STATS=1
 
 ENV PATH="/home/leapfrogai/.venv/bin:$PATH"
 
diff --git a/packages/vllm/Makefile b/packages/vllm/Makefile
index 98e8b29db..c764a78f2 100644
--- a/packages/vllm/Makefile
+++ b/packages/vllm/Makefile
@@ -1,6 +1,27 @@
+ARCH ?= amd64
+LOCAL_VERSION ?= $(shell git rev-parse --short HEAD)
+DOCKER_FLAGS :=
+
 install:
 	python -m pip install ../../src/leapfrogai_sdk
 	python -m pip install -e ".[dev]"
 
-dev:
-	python -m leapfrogai_sdk.cli --app-dir=src/ main:Model
+download:
+	@env $$(cat .env | xargs) python src/model_download.py
+
+dev: download
+	@env $$(cat .env | xargs) python -m leapfrogai_sdk.cli --app-dir=src/ main:Model
+
+docker: download
+	docker build ${DOCKER_FLAGS} \
+		--platform=linux/${ARCH} \
+		--build-arg LOCAL_VERSION=${LOCAL_VERSION} \
+		--build-arg COMPONENT_DIRECTORY="./" \
+		-t ghcr.io/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION} \
+		-f ./Dockerfile .
+
+	docker run -it --rm \
+		--env-file ./.env \
+		-v $(PWD)/config.yaml:/home/leapfrogai/config.yaml \
+		-v $(PWD)/.model:/home/leapfrogai/.model \
+		ghcr.io/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION}
diff --git a/packages/vllm/README.md b/packages/vllm/README.md
index a55238cfd..5bc7a052f 100644
--- a/packages/vllm/README.md
+++ b/packages/vllm/README.md
@@ -16,13 +16,21 @@ See the LeapfrogAI documentation website for [system requirements](https://docs.
 
 The default model that comes with this backend in this repository's officially released images is a [4-bit quantization of the Synthia-7b model](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GPTQ).
 
-You can optionally specify different models or quantization types using the following Docker build arguments:
+All of the commands in this sub-section are executed within this `packages/vllm` sub-directory.
 
-- `--build-arg HF_HUB_ENABLE_HF_TRANSFER="1"`: Enable or disable HuggingFace Hub transfer (default: 1)
-- `--build-arg REPO_ID="TheBloke/Synthia-7B-v2.0-GPTQ"`: HuggingFace repository ID for the model
-- `--build-arg REVISION="gptq-4bit-32g-actorder_True"`: Revision or commit hash for the model
-- `--build-arg QUANTIZATION="gptq"`: Quantization type (e.g., gptq, awq, or empty for un-quantized)
-- `--build-arg TENSOR_PARALLEL_SIZE="1"`: The number of gpus to spread the tensor processing across
+Optionally, you can specify a different model during Zarf creation:
+
+```bash
+uds zarf package create --confirm --set MODEL_REPO_ID=defenseunicorns/Hermes-2-Pro-Mistral-7B-4bit-32g --set MODEL_REVISION=main
+```
+
+If you decide to use a different model, there will likely be a need to change generation and engine runtime configurations, please see the [Zarf Package Config](./zarf-config.yaml) and the [values override file](./values/upstream-values.yaml) for details on what runtime parameters can be modified. These parameters are model-specific, and can be found in the HuggingFace model cards and/or configuration files (e.g., prompt templates).
+
+For example, during Zarf deployment, you can override the Zarf Package Config defaults by doing the following:
+
+```bash
+uds zarf package deploy zarf-package-vllm-amd64-dev.tar.zst --confirm --set ENFORCE_EAGER=True
+```
 
 ### Deployment
 
@@ -39,11 +47,26 @@ uds zarf package deploy packages/vllm/zarf-package-vllm-*-dev.tar.zst --confirm
 
 ### Local Development
 
-To run the vllm backend locally:
+In local development the [config.yaml](./config.yaml) and [.env.example](./.env.example) must be modified if the model has changed away from the default. The LeapfrogAI SDK picks up the `config.yaml` automatically, and the `.env` must be sourced into the Python environment.
 
 > [!IMPORTANT]
 > Execute the following commands from this sub-directory
 
+Create a `.env` file based on the [`.env.example`](./.env.example):
+
+```bash
+cp .env.example .env
+source .env
+```
+
+As necessary, modify the existing [`config.yaml`](./config.yaml):
+
+```bash
+vim config.yaml
+```
+
+To run the vllm backend locally:
+
 ```bash
 # Install dev and runtime dependencies
 make install
@@ -54,3 +77,19 @@ python src/model_download.py
 # Start the model backend
 make dev
 ```
+
+#### Local Docker Container
+
+To run the Docker container, use the following Makefile commands. `LOCAL_VERSION` must be consistent across the two Make commands.
+
+In the root of the LeapfrogAI repository:
+
+```bash
+LOCAL_VERSION=dev make sdk-wheel
+```
+
+In the root of this vLLM sub-directory:
+
+```bash
+LOCAL_VERSION=dev make docker
+```
diff --git a/packages/vllm/chart/templates/deployment.yaml b/packages/vllm/chart/templates/deployment.yaml
index 7b88cc137..3f8aa0540 100644
--- a/packages/vllm/chart/templates/deployment.yaml
+++ b/packages/vllm/chart/templates/deployment.yaml
@@ -36,7 +36,7 @@ spec:
             [
               "sh",
               "-c",
-              'while [ ! -f /data/.model/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"',
+              'while [ ! -f ###ZARF_CONST_MODEL_PATH###/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"',
             ]
           resources:
             {{- toYaml .Values.modelInjectionContainer.resources | nindent 12 }}
@@ -46,6 +46,9 @@ spec:
         - name: leapfrogai-pv-storage
           persistentVolumeClaim:
             claimName: lfai-{{ .Values.nameOverride }}-pv-claim
+        - name: leapfrogai-sdk-configmap
+          configMap:
+            name: "{{ .Values.nameOverride }}-sdk-configmap"
       securityContext:
         {{- toYaml .Values.podSecurityContext | nindent 8 }}
       containers:
@@ -58,6 +61,9 @@ spec:
           env:
             {{- toYaml . | nindent 12 }}
           {{- end }}
+          envFrom:
+            - configMapRef:
+                name: "{{ .Values.nameOverride }}-engine-configmap"
           ports:
             - name: http
               containerPort: {{ .Values.service.port }}
@@ -67,6 +73,10 @@ spec:
           volumeMounts:
             - name: leapfrogai-pv-storage
               mountPath: "/data"
+            - name: leapfrogai-sdk-configmap
+              mountPath: "/home/leapfrogai/config.yaml"
+              subPath: "config.yaml"
+              readOnly: true
       {{- with .Values.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}
diff --git a/packages/vllm/chart/templates/leapfrogai-sdk-configmap.yaml b/packages/vllm/chart/templates/leapfrogai-sdk-configmap.yaml
new file mode 100644
index 000000000..cdc08be5e
--- /dev/null
+++ b/packages/vllm/chart/templates/leapfrogai-sdk-configmap.yaml
@@ -0,0 +1,37 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: "{{ .Values.nameOverride }}-sdk-configmap"
+  namespace: {{ .Release.Namespace | default "leapfrogai" }}
+data:
+  config.yaml: |
+    model:
+      source: {{ .Values.leapfrogaiConfig.model.source | quote }}
+    max_context_length: {{ .Values.leapfrogaiConfig.maxContextLength | quote }}
+    stop_tokens:
+      {{- $stopTokens := .Values.leapfrogaiConfig.stopTokens }}
+      {{- range $stopToken := splitList ", " .Values.leapfrogaiConfig.stopTokens }}
+      - {{ printf "%s" $stopToken }}
+      {{- end }}
+    prompt_format:
+    {{- with .Values.leapfrogaiConfig.promptFormat.chat }}
+      chat:
+      {{- if .system }}
+        system: {{ .system | quote }}
+      {{- end }}
+      {{- if .assistant }}
+        assistant: {{ .assistant | quote }}
+      {{- end }}
+      {{- if .user }}
+        user: {{ .user | quote }}
+      {{- end }}
+      {{- if .function }}
+        function: {{ .function | quote }}
+      {{- end }}
+    {{- end }}
+    defaults:
+      temperature: {{ .Values.leapfrogaiConfig.defaults.temperature | quote }}
+      top_p: {{ .Values.leapfrogaiConfig.defaults.topP | quote }}
+      top_k: {{ .Values.leapfrogaiConfig.defaults.topK | quote }}
+      repetition_penalty: {{ .Values.leapfrogaiConfig.defaults.repetitionPenalty | quote }}
+      max_new_tokens: {{ .Values.leapfrogaiConfig.defaults.maxNewTokens | quote }}
diff --git a/packages/vllm/chart/templates/vllm-engine-configmap.yaml b/packages/vllm/chart/templates/vllm-engine-configmap.yaml
new file mode 100644
index 000000000..5ac82b42c
--- /dev/null
+++ b/packages/vllm/chart/templates/vllm-engine-configmap.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: "{{ .Values.nameOverride }}-engine-configmap"
+  namespace: {{ .Release.Namespace | default "leapfrogai" }}
+data:
+  VLLM_TRUST_REMOTE_CODE: "{{ .Values.vllmConfig.trustRemoteCode }}"
+  VLLM_TENSOR_PARALLEL_SIZE: "{{ .Values.vllmConfig.tensorParallelSize }}"
+  VLLM_ENFORCE_EAGER: "{{ .Values.vllmConfig.enforceEager }}"
+  VLLM_GPU_MEMORY_UTILIZATION: "{{ .Values.vllmConfig.gpuMemoryUtilization }}"
+  VLLM_WORKER_USE_RAY: "{{ .Values.vllmConfig.workerUseRay }}"
+  VLLM_ENGINE_USE_RAY: "{{ .Values.vllmConfig.engineUseRay }}"
+  VLLM_QUANTIZATION: "{{ .Values.vllmConfig.quantization }}"
+  VLLM_LOAD_FORMAT: "{{ .Values.vllmConfig.loadFormat }}"
diff --git a/packages/vllm/chart/values.yaml b/packages/vllm/chart/values.yaml
index 0f7fe9911..0209a8b34 100644
--- a/packages/vllm/chart/values.yaml
+++ b/packages/vllm/chart/values.yaml
@@ -13,6 +13,33 @@ image:
 nameOverride: "vllm"
 fullnameOverride: ""
 
+leapfrogaiConfig:
+  model:
+    source: "/data/.model/"
+  maxContextLength: "32768"
+  stopTokens: "</s>, <|im_end|>, <|endoftext|>"
+  promptFormat:
+    chat:
+      system: "SYSTEM: {}\n"
+      assistant: "ASSISTANT: {}\n"
+      user: "USER: {}\n"
+  defaults:
+    temperature: "0.1"
+    topP: "1.0"
+    topK: "0"
+    repetitionPenalty: "1.0"
+    maxNewTokens: "8192"
+
+vllmConfig:
+  trustRemoteCode: "True"
+  tensorParallelSize: "1"
+  enforceEager: "False"
+  gpuMemoryUtilization: "0.90"
+  workerUseRay: "True"
+  engineUseRay: "True"
+  quantization: "None"
+  loadFormat: "auto"
+
 env:
   - name: LFAI_LOG_LEVEL
     value: "INFO"
@@ -41,7 +68,7 @@ resources:
   limits:
     cpu: 0
     memory: 0
-    nvidia.com/gpu: 0
+    nvidia.com/gpu: 1
   requests:
     cpu: 0
     memory: 0
diff --git a/packages/vllm/config.yaml b/packages/vllm/config.yaml
new file mode 100644
index 000000000..22210a74b
--- /dev/null
+++ b/packages/vllm/config.yaml
@@ -0,0 +1,17 @@
+model:
+  source: ".model/"
+max_context_length: 32768
+stop_tokens:
+  - "<|im_end|>"
+  - "<|endoftext|>"
+  - "</s>"
+prompt_format:
+  chat:
+    system: "SYSTEM: {}\n"
+    assistant: "ASSISTANT: {}\n"
+    user: "USER: {}\n"
+defaults:
+  top_p: 1.0
+  top_k: 0
+  repetition_penalty: 1.0
+  max_new_tokens: 8192
diff --git a/packages/vllm/pyproject.toml b/packages/vllm/pyproject.toml
index 4d7955708..24b1363e6 100644
--- a/packages/vllm/pyproject.toml
+++ b/packages/vllm/pyproject.toml
@@ -8,7 +8,7 @@ version = "0.13.1"
 
 dependencies = [
     "pydantic == 2.8.2",
-    "vllm == 0.4.2",
+    "vllm == 0.4.3",
     "python-dotenv == 1.0.1",
     "aiostream ==0.6.2",
     "leapfrogai-sdk",
diff --git a/packages/vllm/src/config.py b/packages/vllm/src/config.py
index debca4ba3..c13af5521 100644
--- a/packages/vllm/src/config.py
+++ b/packages/vllm/src/config.py
@@ -5,10 +5,6 @@
 
 
 class ConfigOptions(BaseConfig):
-    quantization: Literal[None, "awq", "gptq", "squeezellm"] = Field(
-        default=None,
-        description="Type of quantization, for un-quantized models omit this field",
-    )
     tensor_parallel_size: int = Field(
         default=1,
         title="GPU Utilization Count",
@@ -16,39 +12,105 @@ class ConfigOptions(BaseConfig):
         "This must be divisible to the number of attention heads in the model",
         examples=[1, 2, 3],
     )
+    quantization: Literal[
+        "aqlm",
+        "bitsandbytes",
+        "awq",
+        "deepspeedfp",
+        "fp8",
+        "marlin",
+        "gptq_marlin_24",
+        "gptq_marlin",
+        "gptq",
+        "squeezellm",
+        "sparseml",
+        "None",
+        "",
+    ] = Field(
+        title="quantization",
+        description="Quantization type of the model"
+        "Force GPTQ instead of GPTQ_Marlin by explicitly providing `gptq` as value.",
+        examples=["awq", "fp8", "gptq_marlin", "gptq", "squeezellm", "None"],
+    )
+    load_format: Literal["auto", "safetensors", "npz", "pt", "bitsandbytes"] = Field(
+        title="quantization",
+        description="Load format for the type model and files",
+        examples=["auto", "safetensors", "npz", "pt", "bitsandbytes"],
+    )
+    enforce_eager: bool = Field(
+        title="Enable Eager Mode",
+        description="Enable eager mode to start token generation immediately after prompt processing."
+        "Potentially reduces initial latency at the cost of slightly higher memory usage."
+        "Should be set to False in production environments with higher GPU memory.",
+        examples=[True, False],
+    )
+    gpu_memory_utilization: float = Field(
+        title="GPU Memory Limit",
+        description="Maximum amount of GPU vRAM allocated to the vLLM engine and worker(s)",
+        examples=[0.50, 0.80, 0.90],
+    )
+    engine_use_ray: bool = Field(
+        title="Use Ray for Engine",
+        description="If True, uses Ray for managing the execution engine. Allows for distributed inferencing in multi-node situations.",
+        examples=[True, False],
+    )
+    worker_use_ray: bool = Field(
+        title="Use Ray for Worker",
+        description="If True, uses Ray for distributed worker management. Allows for distributed inferencing in multi-node situations.",
+        examples=[True, False],
+    )
+    trust_remote_code: bool = Field(
+        title="Trust Downloaded Model Code",
+        description="Whether to trust inferencing code downloaded as part of the model download."
+        "Please review the Python code in the .model/ directory before trusting custom model code.",
+        examples=[True, False],
+    )
 
 
 class DownloadOptions(BaseConfig):
-    hf_hub_enable_hf_transfer: Literal["0", "1"] = Field(
-        description="Option (0 - Disable, 1 - Enable) for faster transfers, tradeoff stability for faster speeds"
-    )
     repo_id: str = Field(
-        description="HuggingFace repo id",
+        description="The HuggingFace git repository ID",
         examples=[
-            "TheBloke/Synthia-7B-v2.0-GPTQ",
-            "migtissera/Synthia-MoE-v3-Mixtral-8x7B",
-            "microsoft/phi-2",
+            "defenseunicorns/Hermes-2-Pro-Mistral-7B-4bit-32g",
+            "justinthelaw/Phi-3-mini-128k-instruct-4bit-128g",
         ],
     )
     revision: str = Field(
-        description="The model branch to use",
+        description="The HuggingFace repository git branch to use",
         examples=["main", "gptq-4bit-64g-actorder_True"],
     )
 
 
+# vLLM specific runtime configuration options
 class AppConfig(BaseConfig):
     backend_options: ConfigOptions
+    CONFIG_SOURCES = [
+        EnvSource(
+            allow_all=True,
+            prefix="VLLM_",
+            remap={
+                "tensor_parallel_size": "backend_options.tensor_parallel_size",
+                "trust_remote_code": "backend_options.trust_remote_code",
+                "enforce_eager": "backend_options.enforce_eager",
+                "quantization": "backend_options.quantization",
+                "gpu_memory_utilization": "backend_options.gpu_memory_utilization",
+                "worker_use_ray": "backend_options.worker_use_ray",
+                "engine_use_ray": "backend_options.engine_use_ray",
+                "load_format": "backend_options.load_format",
+            },
+        )
+    ]
+
+
+class DownloadConfig(BaseConfig):
     download_options: Optional[DownloadOptions]
     CONFIG_SOURCES = [
         EnvSource(
             allow_all=True,
-            prefix="LAI_",
+            prefix="LFAI_",
             remap={
-                "hf_hub_enable_hf_transfer": "download_options.hf_hub_enable_hf_transfer",
                 "repo_id": "download_options.repo_id",
                 "revision": "download_options.revision",
-                "quantization": "backend_options.quantization",
-                "tensor_parallel_size": "backend_options.tensor_parallel_size",
             },
         )
     ]
diff --git a/packages/vllm/src/main.py b/packages/vllm/src/main.py
index 6a530e4f0..67d36d178 100644
--- a/packages/vllm/src/main.py
+++ b/packages/vllm/src/main.py
@@ -1,15 +1,12 @@
 import asyncio
-import json
 import logging
 import os
 import queue
 import random
-import sys
 import threading
 import time
 from typing import Any, Dict, AsyncGenerator
 
-from confz import EnvSource
 from dotenv import load_dotenv
 from vllm import SamplingParams
 from vllm.engine.arg_utils import AsyncEngineArgs
@@ -18,15 +15,8 @@
 from vllm.utils import random_uuid
 
 from config import AppConfig
-from leapfrogai_sdk import (
-    BackendConfig,
-    ChatCompletionRequest,
-    CompletionRequest,
-)
-from leapfrogai_sdk.llm import (
-    GenerationConfig,
-    LLM,
-)
+from leapfrogai_sdk import BackendConfig
+from leapfrogai_sdk.llm import GenerationConfig, LLM
 
 load_dotenv()
 
@@ -84,60 +74,6 @@ def remove_iterator(self, async_iterable):
             pass  # If the iterable is not found, ignore the error
 
 
-def get_backend_configs():
-    # Manually load env var as ConfZ does not handle complex types (list)
-    stop_tokens: str | None = os.getenv("LAI_STOP_TOKENS")
-    if stop_tokens:
-        processed_stop_tokens = json.loads(stop_tokens)
-    else:
-        processed_stop_tokens = []
-    del os.environ["LAI_STOP_TOKENS"]
-
-    env_source = EnvSource(
-        allow_all=True,
-        prefix="LAI_",
-        remap={
-            "model_source": "model.source",
-            "max_context_length": "max_context_length",
-            "stop_tokens": "stop_tokens",
-            "prompt_format_chat_system": "prompt_format.chat.system",
-            "prompt_format_chat_assistant": "prompt_format.chat.assistant",
-            "prompt_format_chat_user": "prompt_format.chat.user",
-            "prompt_format_defaults_top_p": "prompt_format.defaults.top_p",
-            "prompt_format_defaults_top_k": "prompt_format.defaults.top_k",
-        },
-    )
-
-    BackendConfig.CONFIG_SOURCES = env_source
-    # Initialize an immutable config from env variables without stop_tokens list
-    backend_configs: BackendConfig = BackendConfig()
-    # Updates "processed_stop_tokens" without triggering Pydantic validation errors
-    backend_configs.model_copy(update={"stop_tokens": processed_stop_tokens})
-
-    return backend_configs
-
-
-def get_config_from_request(request: ChatCompletionRequest | CompletionRequest):
-    return GenerationConfig(
-        max_new_tokens=request.max_new_tokens,
-        temperature=request.temperature,
-        top_k=request.top_k,
-        top_p=request.top_p,
-        do_sample=request.do_sample,
-        n=request.n,
-        stop=list(request.stop),
-        repetition_penalty=request.repetition_penalty,
-        presence_penalty=request.presence_penalty,
-        best_of=str(request.best_of),
-        logit_bias=request.logit_bias,
-        return_full_text=request.return_full_text,
-        truncate=request.truncate,
-        typical_p=request.typical_p,
-        watermark=request.watermark,
-        seed=request.seed,
-    )
-
-
 @LLM
 class Model:
     """Implements an LLM model with concurrent output generation and management."""
@@ -152,19 +88,26 @@ def __init__(self):
         _thread = threading.Thread(target=asyncio.run, args=(self.iterate_outputs(),))
         _thread.start()
 
-        self.backend_config = get_backend_configs()
-        self.model = self.backend_config.model.source
+        quantization = (
+            None
+            if AppConfig().backend_options.quantization in ["", "None"]
+            else AppConfig().backend_options.quantization
+        )
+
         self.engine_args = AsyncEngineArgs(
-            engine_use_ray=True,
-            model=self.model,
-            trust_remote_code=False,
-            quantization=AppConfig().backend_options.quantization,
-            max_seq_len_to_capture=self.backend_config.max_context_length,
-            max_model_len=self.backend_config.max_context_length,
-            dtype="auto",
-            worker_use_ray=True,
-            gpu_memory_utilization=0.90,
+            # Taken from the LFAI SDK general LLM configuration
+            model=BackendConfig().model.source,
+            max_seq_len_to_capture=BackendConfig().max_context_length,
+            max_model_len=BackendConfig().max_context_length,
+            # Taken from the vLLM-specific configuration
+            enforce_eager=AppConfig().backend_options.enforce_eager,
+            quantization=quantization,
+            load_format=AppConfig().backend_options.load_format,
             tensor_parallel_size=AppConfig().backend_options.tensor_parallel_size,
+            engine_use_ray=AppConfig().backend_options.engine_use_ray,
+            worker_use_ray=AppConfig().backend_options.worker_use_ray,
+            gpu_memory_utilization=AppConfig().backend_options.gpu_memory_utilization,
+            trust_remote_code=AppConfig().backend_options.trust_remote_code,
         )
         self.engine = AsyncLLMEngine.from_engine_args(self.engine_args)
         print(self.engine_args)
@@ -228,18 +171,39 @@ async def create_response(
         """Initiate a response generation for the given prompt and configuration, adding the result to the iterator
         pool."""
 
-        sampling_params = SamplingParams(
-            temperature=config.temperature,
-            # Clamp top_p value to prevent float errors
-            top_p=clamp(config.top_p, 0.0 + sys.float_info.epsilon, 1.0),
-            # Restrict top_k to valid values, -1 disables top_k
-            top_k=config.top_k if config.top_k >= 1 else -1,
-            stop=self.backend_config.stop_tokens,
-            max_tokens=config.max_new_tokens,
-            skip_special_tokens=False,
-        )
+        # Collect LeapfrogAI SDK-defined parameters not aligned with vLLM SamplingParams
+        params = {
+            "max_tokens": getattr(config, "max_new_tokens"),
+        }
+
+        # Collect LeapfrogAI SDK-defined parameters directly aligned with vLLM SamplingParams
+        aligned_params = [
+            "temperature",
+            "top_p",
+            "top_k",
+            "stop",
+            "n",
+            "repetition_penalty",
+            "presence_penalty",
+            "best_of",
+            "logit_bias",
+            "return_full_text",
+            "truncate",
+            "typical_p",
+            "seed",
+        ]
+
+        # Add only the parameters that exist in the request
+        # vLLM will provide defaults for the rest, if not specified
+        for param in aligned_params:
+            if param in config:
+                params[param] = config[param]
+
+        # Pass the collected params to vLLM SamplingParams
+        sampling_params = SamplingParams(**params)
+
         logger.info(f"Begin generation for request {request_id}")
-        logger.debug(f"{request_id} sampling_paramms: {sampling_params}")
+        logger.debug(f"{request_id} sampling_params: {sampling_params}")
 
         # Generate texts from the prompts. The output is a list of RequestOutput objects
         # that contain the prompt, generated text, and other information.
@@ -284,8 +248,12 @@ async def generate(
             request_id
         ):
             result = ""
-            if not self.is_queue_empty(request_id):
-                result = self.delta_queue_by_id.get(request_id).get()
+
+            # Ensure that the queue is not None and contains items before calling .get()
+            cur_queue = self.delta_queue_by_id.get(request_id)
+            if cur_queue is not None and not cur_queue.empty():
+                result = cur_queue.get()
+
             yield result
 
         logger.info(f"Finished request {request_id}")
diff --git a/packages/vllm/src/model_download.py b/packages/vllm/src/model_download.py
index 29f88942c..b87b6a61e 100644
--- a/packages/vllm/src/model_download.py
+++ b/packages/vllm/src/model_download.py
@@ -1,18 +1,17 @@
 import os
 from huggingface_hub import snapshot_download
-from config import AppConfig
+from config import DownloadConfig
 
-REPO_ID = AppConfig().download_options.repo_id
-REVISION = AppConfig().download_options.revision
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = (
-    AppConfig().download_options.hf_hub_enable_hf_transfer
-)
+REPO_ID = DownloadConfig().download_options.repo_id
+REVISION = DownloadConfig().download_options.revision
+
+# enable hf_transfer to max-out model download bandwidth
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 
 print(f"Downloading model from {REPO_ID} at revision {REVISION}...")
 
 snapshot_download(
     repo_id=REPO_ID,
     local_dir=".model",
-    local_dir_use_symlinks=False,
     revision=REVISION,
 )
diff --git a/packages/vllm/values/upstream-values.yaml b/packages/vllm/values/upstream-values.yaml
index 0fe581bdd..e74ebec4a 100644
--- a/packages/vllm/values/upstream-values.yaml
+++ b/packages/vllm/values/upstream-values.yaml
@@ -2,12 +2,55 @@ image:
   repository: "ghcr.io/defenseunicorns/leapfrogai/vllm"
   tag: "###ZARF_CONST_IMAGE_VERSION###"
 
+nameOverride: "###ZARF_CONST_NAME_OVERRIDE###"
+
+leapfrogaiConfig:
+  model:
+    source: "###ZARF_CONST_MODEL_PATH###"
+  maxContextLength: "###ZARF_VAR_MAX_CONTEXT_LENGTH###"
+  stopTokens: "###ZARF_VAR_STOP_TOKENS###"
+  promptFormat:
+    chat:
+      system: "###ZARF_VAR_PROMPT_FORMAT_CHAT_SYSTEM###"
+      assistant: "###ZARF_VAR_PROMPT_FORMAT_CHAT_ASSISTANT###"
+      user: "###ZARF_VAR_PROMPT_FORMAT_CHAT_USER###"
+  defaults:
+    temperature: "###ZARF_VAR_TEMPERATURE###"
+    topP: "###ZARF_VAR_TOP_P###"
+    topK: "###ZARF_VAR_TOP_K###"
+    repetitionPenalty: "###ZARF_VAR_REPETITION_PENALTY###"
+    maxNewTokens: "###ZARF_VAR_MAX_NEW_TOKENS###"
+
+
+vllmConfig:
+  trustRemoteCode: "###ZARF_VAR_TRUST_REMOTE_CODE###"
+  tensorParallelSize: "###ZARF_VAR_TENSOR_PARALLEL_SIZE###"
+  enforceEager: "###ZARF_VAR_ENFORCE_EAGER###"
+  gpuMemoryUtilization: "###ZARF_VAR_GPU_MEMORY_UTILIZATION###"
+  workerUseRay: "###ZARF_VAR_WORKER_USE_RAY###"
+  engineUseRay: "###ZARF_VAR_ENGINE_USE_RAY###"
+  quantization: "###ZARF_VAR_QUANTIZATION###"
+  loadFormat: "###ZARF_VAR_LOAD_FORMAT###"
+
+env:
+  - name: LFAI_LOG_LEVEL
+    value: "INFO"
+
 gpu:
   runtimeClassName: "###ZARF_VAR_GPU_RUNTIME###"
 
 resources:
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
   limits:
+    cpu: 0
+    memory: 0
     nvidia.com/gpu: "###ZARF_VAR_GPU_LIMIT###"
+  requests:
+    cpu: 0
+    memory: 0
 
 persistence:
   size: "###ZARF_VAR_PVC_SIZE###"
diff --git a/packages/vllm/zarf-config.yaml b/packages/vllm/zarf-config.yaml
new file mode 100644
index 000000000..5f032eecb
--- /dev/null
+++ b/packages/vllm/zarf-config.yaml
@@ -0,0 +1,39 @@
+package:
+  create:
+    set:
+      # x-release-please-start-version
+      image_version: "0.13.0"
+      # x-release-please-end
+
+      model_repo_id: "TheBloke/Synthia-7B-v2.0-GPTQ"
+      model_revision: "gptq-4bit-32g-actorder_True"
+      model_path: "/data/.model/"
+      name_override: "vllm"
+  deploy:
+    set:
+      # vLLM runtime configuration (usually influenced by .env in local development)
+      trust_remote_code: "True"
+      tensor_parallel_size: "1"
+      enforce_eager: "False"
+      gpu_memory_utilization: "0.90"
+      worker_use_ray: "True"
+      engine_use_ray: "True"
+      quantization: "None"
+      load_format: "auto"
+      # LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development)
+      max_context_length: "32768"
+      stop_tokens: "</s>, <|im_end|>, <|endoftext|>"
+      prompt_format_chat_system: "SYSTEM: {}\n"
+      prompt_format_chat_user: "USER: {}\n"
+      prompt_format_chat_assistant: "ASSISTANT: {}\n"
+      temperature: "0.1"
+      top_p: "1.0"
+      top_k: "0"
+      repetition_penalty: "1.0"
+      max_new_tokens: "8192"
+      # Pod deployment configuration
+      gpu_limit: "1"
+      gpu_runtime: "nvidia"
+      pvc_size: "15Gi"
+      pvc_access_mode: "ReadWriteOnce"
+      pvc_storage_class: "local-path"
diff --git a/packages/vllm/zarf.yaml b/packages/vllm/zarf.yaml
index ed88c2f18..f87564e36 100644
--- a/packages/vllm/zarf.yaml
+++ b/packages/vllm/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 kind: ZarfPackageConfig
 metadata:
   name: vllm
@@ -9,27 +9,86 @@ metadata:
 constants:
   - name: IMAGE_VERSION
     value: "###ZARF_PKG_TMPL_IMAGE_VERSION###"
+  - name: MODEL_REPO_ID
+    description: "The HuggingFace repository ID"
+    value: "###ZARF_PKG_TMPL_MODEL_REPO_ID###"
+  - name: MODEL_REVISION
+    description: "The HuggingFace git branch or commit hash"
+    value: "###ZARF_PKG_TMPL_MODEL_REVISION###"
+  - name: MODEL_PATH
+    description: "Defines the location of the Zarf Injected model files in the vLLM container"
+    value: "###ZARF_PKG_TMPL_MODEL_PATH###"
+  - name: NAME_OVERRIDE
+    description: "Provide an override for the name of the deployment (e.g., the model name)"
+    value: "###ZARF_PKG_TMPL_NAME_OVERRIDE###"
 
 variables:
+  # vLLM runtime configuration (usually influenced by .env in local development)
+  - name: TRUST_REMOTE_CODE
+    description: "If True, allows the execution of code within the model files directory"
+    pattern: "^(True|False)$"
+  - name: TENSOR_PARALLEL_SIZE
+    description: "The number of tensor parallelism splits, typically used for model parallelism across GPUs"
+    pattern: "^[1-9][0-9]*$"
+  - name: ENFORCE_EAGER
+    description: "If set to True, enforces eager execution mode instead of lazy execution, impacting performance"
+    pattern: "^(True|False)$"
+  - name: GPU_MEMORY_UTILIZATION
+    description: "The fraction of GPU memory to be utilized, expressed as a decimal value between 0.01 and 0.99"
+    pattern: ^0\.(0[1-9]|[1-9][0-9])$
+  - name: WORKER_USE_RAY
+    description: "If True, uses Ray for distributed worker management"
+    pattern: "^(True|False)$"
+  - name: ENGINE_USE_RAY
+    description: "If True, uses Ray for managing the execution engine"
+    pattern: "^(True|False)$"
+  - name: QUANTIZATION
+    description: "If None, allows vLLM to automatically detect via model files and configuration"
+  - name: LOAD_FORMAT
+    description: "If auto, allows vLLM to automatically detect via model files and configuration"
+  # LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development)
+  - name: MAX_CONTEXT_LENGTH
+    description: "The maximum number of tokens the model can process in a single input before the inferencing engine's overflow strategy is used"
+    pattern: "^[1-9][0-9]*$"
+  - name: STOP_TOKENS
+    description: "A set of special tokens that signal the model to stop producing further output, delimited using a comma and space"
+    pattern: ^(<[^,]+>\s*,\s*)*<[^,]+>\s*$
+  - name: PROMPT_FORMAT_CHAT_SYSTEM
+    description: "Prompt template format for the LeapfrogAI SDK to consume and wrap"
+  - name: PROMPT_FORMAT_CHAT_USER
+    description: "Prompt template format for the LeapfrogAI SDK to consume and wrap"
+  - name: PROMPT_FORMAT_CHAT_ASSISTANT
+    description: "Prompt template format for the LeapfrogAI SDK to consume and wrap"
+  - name: TEMPERATURE
+    description: "Controls the randomness of the model's output"
+    pattern: ^(0(\.\d+)?|1(\.0+)?)$
+  - name: TOP_P
+    description: "The cumulative probability threshold for token sampling, where 1.0 represents no restriction"
+    pattern: ^(0(\.\d+)?|1(\.0+)?)$
+  - name: TOP_K
+    description: "The number of top-K tokens to consider during sampling, where 0 disables top-K sampling"
+    pattern: ^\d+$
+  - name: REPETITION_PENALTY
+    description: "The penalty value for repetition in generation"
+    pattern: ^(0(\.\d+)?|1(\.0+)?)$
+  - name: MAX_NEW_TOKENS
+    description: "Maximum new tokens to generate"
+    pattern: ^\d+$
+  # Pod deployment configuration
   - name: GPU_LIMIT
-    description: The GPU limit for the model inferencing. Must be 1 or more.
-    default: "1"
+    description: "The GPU limit for the model inferencing. Must be 1 or more."
     pattern: "^[1-9][0-9]*$"
   - name: GPU_RUNTIME
-    description: The GPU runtime name for the model inferencing.
-    default: "nvidia"
+    description: "The GPU runtime name for the model inferencing."
     pattern: "^(nvidia)?$"
   - name: PVC_SIZE
-    description: Size of the PVC used for model storage.
-    default: "15Gi"
+    description: "Size of the PVC used for model storage."
     pattern: "^[0-9]+[a-zA-Z]+$"
   - name: PVC_ACCESS_MODE
-    description: Access mode of the PVC used for model storage.
-    default: "ReadWriteOnce"
+    description: "Access mode of the PVC used for model storage."
     pattern: "^(ReadWriteOnce|ReadOnlyMany|ReadWriteMany)$"
   - name: PVC_STORAGE_CLASS
-    description: Storage class of the PVC used for model storage.
-    default: "local-path"
+    description: "Storage class of the PVC used for model storage."
 
 components:
   - name: vllm-model
@@ -37,33 +96,33 @@ components:
     only:
       flavor: upstream
     charts:
-      - name: vllm-model
+      - name: "###ZARF_PKG_TMPL_NAME_OVERRIDE###-model"
         namespace: leapfrogai
         localPath: chart
-        releaseName: vllm-model
+        releaseName: "###ZARF_PKG_TMPL_NAME_OVERRIDE###-model"
         # x-release-please-start-version
         version: 0.13.1
         # x-release-please-end
         valuesFiles:
           - "values/upstream-values.yaml"
     images:
-      - ghcr.io/defenseunicorns/leapfrogai/vllm:###ZARF_PKG_TMPL_IMAGE_VERSION###
-      - cgr.dev/chainguard/bash:latest
+      - "ghcr.io/defenseunicorns/leapfrogai/vllm:###ZARF_PKG_TMPL_IMAGE_VERSION###"
+      - "cgr.dev/chainguard/bash:latest"
     dataInjections:
-      - source: .model/
+      # location where locally downloaded model files are located
+      - source: ".model/"
         target:
-          namespace: leapfrogai
-          selector: app=lfai-vllm
-          container: data-loader
-          path: /data/.model
+          namespace: "leapfrogai"
+          selector: "app=lfai-###ZARF_PKG_TMPL_NAME_OVERRIDE###"
+          container: "data-loader"
+          # location in the container for injection of the model files
+          path: "###ZARF_PKG_TMPL_MODEL_PATH###"
         compress: true
     actions:
       onCreate:
         before:
           # NOTE: This assumes python is installed and in $PATH and 'huggingface_hub[cli,hf_transfer]' has been installed
-          - cmd: python src/model_download.py
+          - cmd: "python src/model_download.py"
             env:
-              - LAI_REPO_ID=TheBloke/Synthia-7B-v2.0-GPTQ
-              - LAI_REVISION=gptq-4bit-32g-actorder_True
-              - LAI_QUANTIZATION=gptq
-              - LAI_HF_HUB_ENABLE_HF_TRANSFER=1
+              - LFAI_REPO_ID=###ZARF_PKG_TMPL_MODEL_REPO_ID###
+              - LFAI_REVISION=###ZARF_PKG_TMPL_MODEL_REVISION###
diff --git a/packages/whisper/Dockerfile b/packages/whisper/Dockerfile
index b3bed054a..a5513e9fa 100644
--- a/packages/whisper/Dockerfile
+++ b/packages/whisper/Dockerfile
@@ -37,8 +37,8 @@ COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/
 
 # set the path to the cuda 11.8 dependencies
 ENV LD_LIBRARY_PATH \
-    /leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cublas/lib:\
-    /leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cudnn/lib
+/leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cublas/lib:\
+/leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cudnn/lib
 
 COPY packages/whisper/main.py .
 
diff --git a/packages/whisper/zarf.yaml b/packages/whisper/zarf.yaml
index cc53f36b6..06ef87cce 100644
--- a/packages/whisper/zarf.yaml
+++ b/packages/whisper/zarf.yaml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/zarf.schema.json
 
 kind: ZarfPackageConfig
 metadata:
diff --git a/src/leapfrogai_api/README.md b/src/leapfrogai_api/README.md
index eec4dd0c6..214c986a9 100644
--- a/src/leapfrogai_api/README.md
+++ b/src/leapfrogai_api/README.md
@@ -56,3 +56,72 @@ See the ["Access" section of the DEVELOPMENT.md](../../docs/DEVELOPMENT.md#acces
 ### Tests
 
 See the [tests directory documentation](../../tests/README.md) for more details.
+
+### Reranking Configuration
+
+The LeapfrogAI API includes a Retrieval Augmented Generation (RAG) pipeline for enhanced question answering. This section details how to configure its reranking options. All RAG configurations are managed through the `/leapfrogai/v1/rag/configure` API endpoint.
+
+#### 1. Enabling/Disabling Reranking
+
+Reranking improves the accuracy and relevance of RAG responses. You can enable or disable it using the `enable_reranking` parameter:
+
+* **Enable Reranking:** Send a PATCH request to `/leapfrogai/v1/rag/configure` with the following JSON payload:
+
+```json
+{
+  "enable_reranking": true
+}
+```
+
+* **Disable Reranking:**  Send a PATCH request with:
+
+```json
+{
+  "enable_reranking": false
+}
+```
+
+#### 2. Selecting a Reranking Model
+
+Multiple reranking models are supported, each offering different performance characteristics.  Choose your preferred model using the `ranking_model` parameter.  Ensure you've installed any necessary Python dependencies for your chosen model (see the [rerankers library documentation](https://github.com/AnswerDotAI/rerankers) on dependencies).
+
+* **Supported Models:**  The system supports several models, including (but not limited to) `flashrank`, `rankllm`, `cross-encoder`, and `colbert`.  Refer to the [rerankers library documentation](https://github.com/AnswerDotAI/rerankers) for a complete list and details on their capabilities.
+
+* **Model Selection:** Use a PATCH request to `/leapfrogai/v1/rag/configure` with the desired model:
+
+```json
+{
+  "enable_reranking": true,  // Reranking must be enabled
+  "ranking_model": "rankllm" // Or another supported model
+}
+```
+
+#### 3. Adjusting the Number of Results Before Reranking (`rag_top_k_when_reranking`)
+
+This parameter sets the number of top results retrieved from the vector database *before* the reranking process begins. A higher value increases the diversity of candidates considered for reranking but also increases processing time. A lower value can lead to missing relevant results if not carefully chosen. This setting is only relevant when reranking is enabled.
+
+* **Configuration:** Use a PATCH request to `/leapfrogai/v1/rag/configure` to set this value:
+
+```json
+{
+  "enable_reranking": true,
+  "ranking_model": "flashrank",
+  "rag_top_k_when_reranking": 150 // Adjust this value as needed
+}
+```
+
+#### 4. Retrieving the Current RAG Configuration
+
+To check the current RAG configuration (including reranking status, model, and `rag_top_k_when_reranking`), send a GET request to `/leapfrogai/v1/rag/configure`. The response will be a JSON object containing all the current settings.
+
+#### 5.  Example Configuration Flow
+
+1. **Initial Setup:**  Start with reranking enabled using the default `flashrank` model and a `rag_top_k_when_reranking` value of 100.
+
+2. **Experiment with Models:**  Test different reranking models (`rankllm`, `colbert`, etc.) by changing the `ranking_model` parameter and observing the impact on response quality.  Adjust `rag_top_k_when_reranking` as needed to find the optimal balance between diversity and performance.
+
+3. **Fine-tuning:** Once you identify a suitable model, fine-tune the `rag_top_k_when_reranking` parameter for optimal performance.  Monitor response times and quality to determine the best setting.
+
+4. **Disabling Reranking:** If needed, disable reranking by setting `"enable_reranking": false`.
+
+Remember to always consult the [rerankers library documentation](https://github.com/AnswerDotAI/rerankers) for information on supported models and their specific requirements.  The API documentation provides further details on request formats and potential error responses.
diff --git a/src/leapfrogai_api/backend/composer.py b/src/leapfrogai_api/backend/composer.py
index b95e957a3..424e6c6d0 100644
--- a/src/leapfrogai_api/backend/composer.py
+++ b/src/leapfrogai_api/backend/composer.py
@@ -78,12 +78,25 @@ async def create_chat_messages(
         thread: Thread,
         additional_instructions: str | None,
         tool_resources: BetaThreadToolResources | None = None,
-    ) -> tuple[list[ChatMessage], list[str]]:
+    ) -> tuple[list[ChatMessage], SearchResponse]:
+        """Create chat message list for consumption by the LLM backend.
+
+        Args:
+            request (RunCreateParamsRequest): The request object.
+            session (Session): The database session.
+            thread (Thread): The thread object.
+            additional_instructions (str | None): Additional instructions.
+            tool_resources (BetaThreadToolResources | None): The tool resources.
+
+        Returns:
+            tuple[list[ChatMessage], SearchResponse]: The chat messages and any RAG responses.
+        """
         # Get existing messages
         thread_messages: list[Message] = await self.list_messages(thread.id, session)
+        rag_responses: SearchResponse = SearchResponse(data=[])
 
         if len(thread_messages) == 0:
-            return [], []
+            return [], rag_responses
 
         def sort_by_created_at(msg: Message):
             return msg.created_at
@@ -125,7 +138,6 @@ def sort_by_created_at(msg: Message):
         chat_messages.extend(chat_thread_messages)
 
         # 4 - The RAG results are appended behind the user's query
-        file_ids: set[str] = set()
         if request.can_use_rag(tool_resources) and chat_thread_messages:
             rag_message: str = "Here are relevant docs needed to reply:\n"
 
@@ -138,22 +150,22 @@ def sort_by_created_at(msg: Message):
             vector_store_ids: list[str] = cast(list[str], file_search.vector_store_ids)
 
             for vector_store_id in vector_store_ids:
-                rag_responses: SearchResponse = await query_service.query_rag(
+                rag_responses = await query_service.query_rag(
                     query=query_message.content_as_str(),
                     vector_store_id=vector_store_id,
                 )
+
                 # Insert the RAG response messages just before the user's query
                 for rag_response in rag_responses.data:
-                    file_ids.add(rag_response.file_id)
                     response_with_instructions: str = f"{rag_response.content}"
                     rag_message += f"{response_with_instructions}\n"
 
             chat_messages.insert(
                 len(chat_messages) - 1,  # Insert right before the user message
                 ChatMessage(role="user", content=rag_message),
-            )  # TODO: Should this go in user or something else like function?
+            )
 
-        return chat_messages, list(file_ids)
+        return chat_messages, rag_responses
 
     async def generate_message_for_thread(
         self,
@@ -182,7 +194,7 @@ async def generate_message_for_thread(
             else:
                 tool_resources = None
 
-        chat_messages, file_ids = await self.create_chat_messages(
+        chat_messages, rag_responses = await self.create_chat_messages(
             request, session, thread, additional_instructions, tool_resources
         )
 
@@ -204,13 +216,15 @@ async def generate_message_for_thread(
 
         choice: ChatChoice = cast(ChatChoice, chat_response.choices[0])
 
-        message = from_text_to_message(choice.message.content_as_str(), file_ids)
+        message: Message = from_text_to_message(
+            text=choice.message.content_as_str(), search_responses=rag_responses
+        )
 
         create_message_request = CreateMessageRequest(
             role=message.role,
             content=message.content,
             attachments=message.attachments,
-            metadata=message.metadata.__dict__ if message.metadata else None,
+            metadata=vars(message.metadata),
         )
 
         await create_message_request.create_message(
@@ -249,7 +263,7 @@ async def stream_generate_message_for_thread(
             else:
                 tool_resources = None
 
-        chat_messages, file_ids = await self.create_chat_messages(
+        chat_messages, rag_responses = await self.create_chat_messages(
             request, session, thread, additional_instructions, tool_resources
         )
 
@@ -274,13 +288,15 @@ async def stream_generate_message_for_thread(
             yield "\n\n"
 
         # Create an empty message
-        new_message: Message = from_text_to_message("", [])
+        new_message: Message = from_text_to_message(
+            text="", search_responses=SearchResponse(data=[])
+        )
 
         create_message_request = CreateMessageRequest(
             role=new_message.role,
             content=new_message.content,
             attachments=new_message.attachments,
-            metadata=new_message.metadata.__dict__ if new_message.metadata else None,
+            metadata=vars(new_message.metadata),
         )
 
         new_message = await create_message_request.create_message(
@@ -319,7 +335,9 @@ async def stream_generate_message_for_thread(
             yield "\n\n"
             index += 1
 
-        new_message.content = from_text_to_message(response, file_ids).content
+        new_message.content = from_text_to_message(
+            text=response, search_responses=rag_responses
+        ).content
         new_message.created_at = int(time.time())
 
         crud_message = CRUDMessage(db=session)
diff --git a/src/leapfrogai_api/backend/converters.py b/src/leapfrogai_api/backend/converters.py
index 8d31b23ba..1fbb844a2 100644
--- a/src/leapfrogai_api/backend/converters.py
+++ b/src/leapfrogai_api/backend/converters.py
@@ -4,6 +4,7 @@
 from openai.types.beta import AssistantStreamEvent
 from openai.types.beta.assistant_stream_event import ThreadMessageDelta
 from openai.types.beta.threads.file_citation_annotation import FileCitation
+from openai.types.beta.threads.file_path_annotation import FilePathAnnotation
 from openai.types.beta.threads import (
     MessageContentPartParam,
     MessageContent,
@@ -17,6 +18,9 @@
     FileCitationAnnotation,
 )
 
+from leapfrogai_api.typedef.vectorstores.search_types import SearchResponse
+from leapfrogai_api.typedef.common import MetadataObject
+
 
 def from_assistant_stream_event_to_str(stream_event: AssistantStreamEvent):
     return f"event: {stream_event.event}\ndata: {stream_event.data.model_dump_json()}"
@@ -44,24 +48,41 @@ def from_content_param_to_content(
         )
 
 
-def from_text_to_message(text: str, file_ids: list[str]) -> Message:
-    all_file_ids: str = ""
+def from_text_to_message(text: str, search_responses: SearchResponse | None) -> Message:
+    """Loads text and RAG search responses into a Message object
 
-    for file_id in file_ids:
-        all_file_ids += f" [{file_id}]"
+    Args:
+        text: The text to load into the message
+        search_responses: The RAG search responses to load into the message
 
-    message_content: TextContentBlock = TextContentBlock(
-        text=Text(
-            annotations=[
+    Returns:
+        The OpenAI compliant Message object
+    """
+
+    all_file_ids: str = ""
+    all_vector_ids: list[str] = []
+    annotations: list[FileCitationAnnotation | FilePathAnnotation] = []
+
+    if search_responses:
+        for search_response in search_responses.data:
+            all_file_ids += f"[{search_response.file_id}]"
+            all_vector_ids.append(search_response.id)
+            file_name = search_response.metadata.get("source", "source")
+            annotations.append(
                 FileCitationAnnotation(
-                    text=f"[{file_id}]",
-                    file_citation=FileCitation(file_id=file_id, quote=""),
+                    text=f"【4:0†{file_name}】",  # TODO: What should these numbers be? https://github.com/defenseunicorns/leapfrogai/issues/1110
+                    file_citation=FileCitation(
+                        file_id=search_response.file_id, quote=search_response.content
+                    ),
                     start_index=0,
                     end_index=0,
                     type="file_citation",
                 )
-                for file_id in file_ids
-            ],
+            )
+
+    message_content: TextContentBlock = TextContentBlock(
+        text=Text(
+            annotations=annotations,
             value=text + all_file_ids,
         ),
         type="text",
@@ -75,7 +96,9 @@ def from_text_to_message(text: str, file_ids: list[str]) -> Message:
         thread_id="",
         content=[message_content],
         role="assistant",
-        metadata=None,
+        metadata=MetadataObject(
+            vector_ids=all_vector_ids.__str__(),
+        ),
     )
 
     return new_message
diff --git a/src/leapfrogai_api/backend/grpc_client.py b/src/leapfrogai_api/backend/grpc_client.py
index f9082fdc2..9d18d2951 100644
--- a/src/leapfrogai_api/backend/grpc_client.py
+++ b/src/leapfrogai_api/backend/grpc_client.py
@@ -63,7 +63,7 @@ async def completion(model: Model, request: lfai.CompletionRequest):
                 CompletionChoice(
                     index=0,
                     text=response.choices[0].text,
-                    finish_reason=finish_reason_enum.to_string(),
+                    finish_reason=finish_reason_enum.to_finish_reason(),
                     logprobs=None,
                 )
             ],
@@ -122,7 +122,7 @@ async def chat_completion(model: Model, request: lfai.ChatCompletionRequest):
                         ).lower(),
                         content=response.choices[0].chat_item.content,
                     ),
-                    finish_reason=finish_reason_enum.to_string(),
+                    finish_reason=finish_reason_enum.to_finish_reason(),
                 )
             ],
             usage=Usage(
diff --git a/src/leapfrogai_api/backend/helpers.py b/src/leapfrogai_api/backend/helpers.py
index 65a2fd0b5..005111601 100644
--- a/src/leapfrogai_api/backend/helpers.py
+++ b/src/leapfrogai_api/backend/helpers.py
@@ -39,7 +39,7 @@ async def recv_completion(
                         index=0,
                         text=c.choices[0].text,
                         logprobs=None,
-                        finish_reason=finish_reason_enum.to_string(),
+                        finish_reason=finish_reason_enum.to_finish_reason(),
                     )
                 ],
                 usage=Usage(
@@ -77,7 +77,7 @@ async def recv_chat(
                         delta=ChatDelta(
                             role="assistant", content=c.choices[0].chat_item.content
                         ),
-                        finish_reason=finish_reason_enum.to_string(),
+                        finish_reason=finish_reason_enum.to_finish_reason(),
                     )
                 ],
                 usage=Usage(
diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py
index 764a65975..4c5d22470 100644
--- a/src/leapfrogai_api/backend/rag/index.py
+++ b/src/leapfrogai_api/backend/rag/index.py
@@ -81,6 +81,8 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil
             temp_file.write(file_bytes)
             temp_file.seek(0)
             documents = await load_file(temp_file.name)
+            for document in documents:
+                document.metadata["source"] = file_object.filename
             chunks = await split(documents)
 
             if len(chunks) == 0:
diff --git a/src/leapfrogai_api/backend/rag/query.py b/src/leapfrogai_api/backend/rag/query.py
index e5e0decce..bd0ae9bf6 100644
--- a/src/leapfrogai_api/backend/rag/query.py
+++ b/src/leapfrogai_api/backend/rag/query.py
@@ -1,11 +1,15 @@
 """Service for querying the RAG model."""
 
+from rerankers.results import RankedResults
 from supabase import AClient as AsyncClient
 from langchain_core.embeddings import Embeddings
 from leapfrogai_api.backend.rag.leapfrogai_embeddings import LeapfrogAIEmbeddings
 from leapfrogai_api.data.crud_vector_content import CRUDVectorContent
-from leapfrogai_api.typedef.vectorstores.search_types import SearchResponse
+from leapfrogai_api.typedef.rag.rag_types import ConfigurationSingleton
+from leapfrogai_api.typedef.vectorstores.search_types import SearchResponse, SearchItem
 from leapfrogai_api.backend.constants import TOP_K
+from leapfrogai_api.utils.logging_tools import logger
+from rerankers import Reranker
 
 # Allows for overwriting type of embeddings that will be instantiated
 embeddings_type: type[Embeddings] | type[LeapfrogAIEmbeddings] | None = (
@@ -22,7 +26,10 @@ def __init__(self, db: AsyncClient) -> None:
         self.embeddings = embeddings_type()
 
     async def query_rag(
-        self, query: str, vector_store_id: str, k: int = TOP_K
+        self,
+        query: str,
+        vector_store_id: str,
+        k: int = TOP_K,
     ) -> SearchResponse:
         """
         Query the Vector Store.
@@ -36,11 +43,70 @@ async def query_rag(
             SearchResponse: The search response from the vector store.
         """
 
+        logger.debug("Beginning RAG query...")
+
         # 1. Embed query
         vector = await self.embeddings.aembed_query(query)
 
         # 2. Perform similarity search
+        _k: int = k
+        if ConfigurationSingleton.get_instance().enable_reranking:
+            """Use the user specified top-k value unless reranking.
+            When reranking, use the reranking top-k value to get the initial results.
+            Then filter the list down later to just the k that the user has requested after reranking."""
+            _k = ConfigurationSingleton.get_instance().rag_top_k_when_reranking
+
         crud_vector_content = CRUDVectorContent(db=self.db)
-        return await crud_vector_content.similarity_search(
-            query=vector, vector_store_id=vector_store_id, k=k
+        results = await crud_vector_content.similarity_search(
+            query=vector, vector_store_id=vector_store_id, k=_k
         )
+
+        # 3. Rerank results
+        if (
+            ConfigurationSingleton.get_instance().enable_reranking
+            and len(results.data) > 0
+        ):
+            ranker = Reranker(ConfigurationSingleton.get_instance().ranking_model)
+            ranked_results: RankedResults = ranker.rank(
+                query=query,
+                docs=[result.content for result in results.data],
+                doc_ids=[result.id for result in results.data],
+            )
+            results = rerank_search_response(results, ranked_results)
+            # Narrow down the results to the top-k value specified by the user
+            results.data = results.data[0:k]
+
+        logger.debug("Ending RAG query...")
+
+        return results
+
+
+def rerank_search_response(
+    original_response: SearchResponse, ranked_results: RankedResults
+) -> SearchResponse:
+    """
+    Reorder the SearchResponse based on reranked results.
+
+    Args:
+        original_response (SearchResponse): The original search response.
+        ranked_results (List[str]): List of ranked content strings.
+
+    Returns:
+        SearchResponse: A new SearchResponse with reordered items.
+    """
+    # Create a mapping of id to original SearchItem
+    content_to_item = {item.id: item for item in original_response.data}
+
+    # Create new SearchItems based on reranked results
+    ranked_items = []
+    for content in ranked_results.results:
+        if content.document.doc_id in content_to_item:
+            item: SearchItem = content_to_item[content.document.doc_id]
+            item.rank = content.rank
+            item.score = content.score
+            ranked_items.append(item)
+
+    ranked_response = SearchResponse(data=ranked_items)
+
+    # Create a new SearchResponse with reranked items
+    return ranked_response
diff --git a/src/leapfrogai_api/data/crud_vector_content.py b/src/leapfrogai_api/data/crud_vector_content.py
index 18c87a18a..d53118986 100644
--- a/src/leapfrogai_api/data/crud_vector_content.py
+++ b/src/leapfrogai_api/data/crud_vector_content.py
@@ -1,20 +1,11 @@
 """CRUD Operations for VectorStore."""
 
-from pydantic import BaseModel
 from supabase import AClient as AsyncClient
 from leapfrogai_api.data.crud_base import get_user_id
 import ast
 from leapfrogai_api.typedef.vectorstores import SearchItem, SearchResponse
 from leapfrogai_api.backend.constants import TOP_K
-
-
-class Vector(BaseModel):
-    id: str = ""
-    vector_store_id: str
-    file_id: str
-    content: str
-    metadata: dict
-    embedding: list[float]
+from leapfrogai_api.typedef.vectorstores import Vector
 
 
 class CRUDVectorContent:
@@ -65,6 +56,30 @@ async def add_vectors(self, object_: list[Vector]) -> list[Vector]:
         except Exception as e:
             raise e
 
+    async def get_vector(self, vector_id: str) -> Vector:
+        """Get a vector by its ID."""
+        data, _count = (
+            await self.db.table(self.table_name)
+            .select("*")
+            .eq("id", vector_id)
+            .single()
+            .execute()
+        )
+
+        _, response = data
+
+        if isinstance(response["embedding"], str):
+            response["embedding"] = self.string_to_float_list(response["embedding"])
+
+        return Vector(
+            id=response["id"],
+            vector_store_id=response["vector_store_id"],
+            file_id=response["file_id"],
+            content=response["content"],
+            metadata=response["metadata"],
+            embedding=response["embedding"],
+        )
+
     async def delete_vectors(self, vector_store_id: str, file_id: str) -> bool:
         """Delete a vector store file by its ID."""
         data, _count = (
diff --git a/src/leapfrogai_api/main.py b/src/leapfrogai_api/main.py
index 85822f7f3..108ccd51e 100644
--- a/src/leapfrogai_api/main.py
+++ b/src/leapfrogai_api/main.py
@@ -8,12 +8,13 @@
 from fastapi import FastAPI
 from fastapi.exception_handlers import request_validation_exception_handler
 from fastapi.exceptions import RequestValidationError
-
+from fastapi.responses import RedirectResponse
 from leapfrogai_api.routers.base import router as base_router
 from leapfrogai_api.routers.leapfrogai import auth
 from leapfrogai_api.routers.leapfrogai import models as lfai_models
 from leapfrogai_api.routers.leapfrogai import vector_stores as lfai_vector_stores
 from leapfrogai_api.routers.leapfrogai import count as lfai_token_count
+from leapfrogai_api.routers.leapfrogai import rag as lfai_rag
 from leapfrogai_api.routers.openai import (
     assistants,
     audio,
@@ -29,6 +30,7 @@
     vector_stores,
 )
 from leapfrogai_api.utils import get_model_config
+from prometheus_fastapi_instrumentator import Instrumentator
 
 logging.basicConfig(
     level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
@@ -61,6 +63,21 @@ async def lifespan(app: FastAPI):
 app = FastAPI(lifespan=lifespan)
 
 
+@app.get("/", include_in_schema=False)
+async def root():
+    """Intercepts the root path and redirects to the API documentation."""
+    return RedirectResponse(url="/docs")
+
+
+Instrumentator(
+    excluded_handlers=["/healthz", "/metrics"],
+    should_group_status_codes=False,
+).instrument(app).expose(
+    app,
+    include_in_schema=False,
+)
+
+
 @app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request, exc):
     logger.error(f"The client sent invalid data!: {exc}")
@@ -81,6 +98,8 @@ async def validation_exception_handler(request, exc):
 app.include_router(messages.router)
 app.include_router(runs_steps.router)
 app.include_router(lfai_vector_stores.router)
+if os.environ.get("DEV"):
+    app.include_router(lfai_rag.router)
 app.include_router(lfai_token_count.router)
 app.include_router(lfai_models.router)
 # This should be at the bottom to prevent it preempting more specific runs endpoints
diff --git a/src/leapfrogai_api/pyproject.toml b/src/leapfrogai_api/pyproject.toml
index 01ae651b0..6779c3dbd 100644
--- a/src/leapfrogai_api/pyproject.toml
+++ b/src/leapfrogai_api/pyproject.toml
@@ -26,6 +26,8 @@ dependencies = [
     "postgrest==0.16.11",                    # required by supabase, bug when using previous versions
     "openpyxl == 3.1.5",
     "psutil == 6.0.0",
+    "prometheus-fastapi-instrumentator == 7.0.0",
+    "rerankers[flashrank] == 0.5.3"
 ]
 requires-python = "~=3.11"
 
diff --git a/src/leapfrogai_api/routers/leapfrogai/rag.py b/src/leapfrogai_api/routers/leapfrogai/rag.py
new file mode 100644
index 000000000..3b61b616e
--- /dev/null
+++ b/src/leapfrogai_api/routers/leapfrogai/rag.py
@@ -0,0 +1,56 @@
+"""LeapfrogAI endpoints for RAG."""
+
+from fastapi import APIRouter
+from leapfrogai_api.typedef.rag.rag_types import (
+    ConfigurationSingleton,
+    ConfigurationPayload,
+)
+from leapfrogai_api.routers.supabase_session import Session
+from leapfrogai_api.utils.logging_tools import logger
+
+router = APIRouter(prefix="/leapfrogai/v1/rag", tags=["leapfrogai/rag"])
+
+
+@router.patch("/configure")
+async def configure(session: Session, configuration: ConfigurationPayload) -> None:
+    """
+    Configures the RAG settings at runtime.
+
+    Args:
+        session (Session): The database session.
+        configuration (Configuration): The configuration to update.
+    """
+
+    # We set the class variable to update the configuration globally
+    ConfigurationSingleton._instance = ConfigurationSingleton.get_instance().copy(
+        update=configuration.dict(exclude_none=True)
+    )
+
+
+@router.get("/configure")
+async def get_configuration(session: Session) -> ConfigurationPayload:
+    """
+    Retrieves the current RAG configuration.
+
+    Args:
+        session (Session): The database session.
+
+    Returns:
+        Configuration: The current RAG configuration.
+    """
+
+    instance = ConfigurationSingleton.get_instance()
+
+    # Create a new dictionary with only the relevant attributes
+    config_dict = {
+        key: value
+        for key, value in instance.__dict__.items()
+        if not key.startswith("_")  # Exclude private attributes
+    }
+
+    # Create a new ConfigurationPayload instance with the filtered dictionary
+    new_configuration = ConfigurationPayload(**config_dict)
+
+    logger.info(f"The current configuration has been set to {new_configuration}")
+
+    return new_configuration
diff --git a/src/leapfrogai_api/routers/leapfrogai/vector_stores.py b/src/leapfrogai_api/routers/leapfrogai/vector_stores.py
index cd2899925..5251440c1 100644
--- a/src/leapfrogai_api/routers/leapfrogai/vector_stores.py
+++ b/src/leapfrogai_api/routers/leapfrogai/vector_stores.py
@@ -4,6 +4,7 @@
 from leapfrogai_api.backend.rag.query import QueryService
 from leapfrogai_api.typedef.vectorstores import SearchResponse
 from leapfrogai_api.routers.supabase_session import Session
+from leapfrogai_api.data.crud_vector_content import CRUDVectorContent, Vector
 from leapfrogai_api.backend.constants import TOP_K
 
 router = APIRouter(
@@ -32,7 +33,26 @@ async def search(
     """
     query_service = QueryService(db=session)
     return await query_service.query_rag(
-        query=query,
-        vector_store_id=vector_store_id,
-        k=k,
+        query=query, vector_store_id=vector_store_id, k=k
     )
+
+
+@router.get("/vector/{vector_id}")
+async def get_vector(
+    session: Session,
+    vector_id: str,
+) -> Vector:
+    """
+    Get a specfic vector by its ID.
+
+    Args:
+        session (Session): The database session.
+        vector_id (str): The ID of the vector.
+
+    Returns:
+        Vector: The vector object.
+    """
+    crud_vector_content = CRUDVectorContent(db=session)
+    vector = await crud_vector_content.get_vector(vector_id=vector_id)
+
+    return vector
diff --git a/src/leapfrogai_api/typedef/__init__.py b/src/leapfrogai_api/typedef/__init__.py
index d65f47391..6e8c30d7b 100644
--- a/src/leapfrogai_api/typedef/__init__.py
+++ b/src/leapfrogai_api/typedef/__init__.py
@@ -1 +1,4 @@
-from .common import Usage as Usage
+from .common import (
+    Usage as Usage,
+    MetadataObject as MetadataObject,
+)
diff --git a/src/leapfrogai_api/typedef/assistants/assistant_types.py b/src/leapfrogai_api/typedef/assistants/assistant_types.py
index 168a0e357..a59fb8f8d 100644
--- a/src/leapfrogai_api/typedef/assistants/assistant_types.py
+++ b/src/leapfrogai_api/typedef/assistants/assistant_types.py
@@ -27,14 +27,13 @@
 logger = logging.getLogger(__name__)
 
 
-class CreateAssistantRequest(BaseModel):
-    """Request object for creating an assistant."""
+class BaseAssistantRequest(BaseModel):
+    """
+    Base Request object for creating or modifying an assistant.
+    This class should not be used directly. Use CreateAssistantRequest or ModifyAssistantRequest instead.
+    Model field is required for CreateAssistantRequest, but optional for ModifyAssistantRequest.
+    """
 
-    model: str = Field(
-        default="llama-cpp-python",
-        examples=["llama-cpp-python"],
-        description="The model to be used by the assistant. Default is 'llama-cpp-python'.",
-    )
     name: str | None = Field(
         default=None,
         examples=["Froggy Assistant"],
@@ -202,11 +201,24 @@ async def attach_existing_vector_store_from_id():
                 self.tool_resources.file_search.vector_stores = None
 
 
-class ModifyAssistantRequest(CreateAssistantRequest):
+class CreateAssistantRequest(BaseAssistantRequest):
+    """Request object for creating an assistant."""
+
+    model: str = Field(
+        default="llama-cpp-python",
+        examples=["llama-cpp-python"],
+        description="The model to be used by the assistant. Default is 'llama-cpp-python'.",
+    )
+
+
+class ModifyAssistantRequest(BaseAssistantRequest):
     """Request object for modifying an assistant."""
 
-    # Inherits all fields from CreateAssistantRequest
-    # All fields are optional for modification
+    model: str | None = Field(
+        default=None,
+        examples=["llama-cpp-python", None],
+        description="The model to be used by the assistant. Default is 'llama-cpp-python'.",
+    )
 
 
 class ListAssistantsResponse(BaseModel):
diff --git a/src/leapfrogai_api/typedef/common.py b/src/leapfrogai_api/typedef/common.py
index 879dc0855..f00b2c4ed 100644
--- a/src/leapfrogai_api/typedef/common.py
+++ b/src/leapfrogai_api/typedef/common.py
@@ -2,6 +2,17 @@
 from leapfrogai_api.backend.constants import DEFAULT_MAX_COMPLETION_TOKENS
 
 
+class MetadataObject:
+    """A metadata object that can be serialized back to a dict."""
+
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+
+    def __getattr__(self, key):
+        return self.__dict__.get(key)
+
+
 class Usage(BaseModel):
     """Usage object."""
 
diff --git a/src/leapfrogai_api/typedef/completion/completion_types.py b/src/leapfrogai_api/typedef/completion/completion_types.py
index 9a5cdad95..f92d91f28 100644
--- a/src/leapfrogai_api/typedef/completion/completion_types.py
+++ b/src/leapfrogai_api/typedef/completion/completion_types.py
@@ -7,15 +7,48 @@
 
 
 class FinishReason(Enum):
-    NONE = 0  # Maps to "None"
-    STOP = 1  # Maps to "stop"
-    LENGTH = 2  # Maps to "length"
+    NONE = 0
+    STOP = 1
+    LENGTH = 2
 
-    def to_string(self) -> str | None:
+    def to_finish_reason(self) -> str | None:
+        """
+        Convert the enum member to its corresponding finish reason string.
+
+        Returns:
+            str | None: The finish reason as a lowercase string if it is not NONE; otherwise, None.
+        """
         if self == FinishReason.NONE:
             return None
         return self.name.lower()
 
+    @classmethod
+    def _missing_(cls, value):
+        """
+        Handle missing values when creating an enum instance.
+
+        This method is called when a value passed to the enum constructor does not match any existing enum members.
+        It provides custom logic to map input values to enum members or raises an error if the value is invalid.
+
+        Args:
+            value: The value that was not found among the enum members.
+
+        Returns:
+            FinishReason: The corresponding enum member after applying custom mapping.
+
+        Raises:
+            ValueError: If the value cannot be mapped to any enum member.
+        """
+        # Handle custom value mappings
+        if value is None or value == "None":
+            return cls.NONE
+        elif value == "stop":
+            return cls.STOP
+        elif value == "length":
+            return cls.LENGTH
+        else:
+            raise ValueError(f"Invalid FinishReason value: {value}")
+
 
 class CompletionChoice(BaseModel):
     """Choice object for completion."""
diff --git a/src/leapfrogai_api/typedef/rag/__init__.py b/src/leapfrogai_api/typedef/rag/__init__.py
new file mode 100644
index 000000000..65c2e26cd
--- /dev/null
+++ b/src/leapfrogai_api/typedef/rag/__init__.py
@@ -0,0 +1,3 @@
+from .rag_types import (
+    ConfigurationSingleton as ConfigurationSingleton,
+)
diff --git a/src/leapfrogai_api/typedef/rag/rag_types.py b/src/leapfrogai_api/typedef/rag/rag_types.py
new file mode 100644
index 000000000..17fe6601c
--- /dev/null
+++ b/src/leapfrogai_api/typedef/rag/rag_types.py
@@ -0,0 +1,40 @@
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class ConfigurationSingleton:
+    """Singleton manager for ConfigurationPayload."""
+
+    _instance = None
+
+    @classmethod
+    def get_instance(cls):
+        if cls._instance is None:
+            cls._instance = ConfigurationPayload()
+            cls._instance.enable_reranking = True
+            cls._instance.rag_top_k_when_reranking = 100
+            cls._instance.ranking_model = "flashrank"
+        return cls._instance
+
+
+class ConfigurationPayload(BaseModel):
+    """Response for RAG configuration."""
+
+    enable_reranking: Optional[bool] = Field(
+        default=None,
+        examples=[True, False],
+        description="Enables reranking for RAG queries",
+    )
+    # More model info can be found here:
+    # https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file
+    # https://pypi.org/project/rerankers/
+    ranking_model: Optional[str] = Field(
+        default=None,
+        description="What model to use for reranking. Some options may require additional python dependencies.",
+        examples=["flashrank", "rankllm", "cross-encoder", "colbert"],
+    )
+    rag_top_k_when_reranking: Optional[int] = Field(
+        default=None,
+        description="The top-k results returned from the RAG call before reranking",
+    )
diff --git a/src/leapfrogai_api/typedef/vectorstores/__init__.py b/src/leapfrogai_api/typedef/vectorstores/__init__.py
index 1491a9767..dde3c2860 100644
--- a/src/leapfrogai_api/typedef/vectorstores/__init__.py
+++ b/src/leapfrogai_api/typedef/vectorstores/__init__.py
@@ -7,6 +7,7 @@
     ListVectorStoresResponse as ListVectorStoresResponse,
 )
 from .search_types import (
+    Vector as Vector,
     SearchItem as SearchItem,
     SearchResponse as SearchResponse,
 )
diff --git a/src/leapfrogai_api/typedef/vectorstores/search_types.py b/src/leapfrogai_api/typedef/vectorstores/search_types.py
index 76abb0822..ea69df1fe 100644
--- a/src/leapfrogai_api/typedef/vectorstores/search_types.py
+++ b/src/leapfrogai_api/typedef/vectorstores/search_types.py
@@ -1,6 +1,17 @@
+from typing import Optional
+
 from pydantic import BaseModel, Field
 
 
+class Vector(BaseModel):
+    id: str = ""
+    vector_store_id: str
+    file_id: str
+    content: str
+    metadata: dict
+    embedding: list[float]
+
+
 class SearchItem(BaseModel):
     """Object representing a single item in a search result."""
 
@@ -16,6 +27,14 @@ class SearchItem(BaseModel):
     similarity: float = Field(
         ..., description="Similarity score of this item to the query."
     )
+    rank: Optional[int] = Field(
+        default=None,
+        description="The rank of this search item after ranking has occurred.",
+    )
+    score: Optional[float] = Field(
+        default=None,
+        description="The score of this search item after ranking has occurred.",
+    )
 
 
 class SearchResponse(BaseModel):
diff --git a/src/leapfrogai_api/utils/logging_tools.py b/src/leapfrogai_api/utils/logging_tools.py
new file mode 100644
index 000000000..aa2448288
--- /dev/null
+++ b/src/leapfrogai_api/utils/logging_tools.py
@@ -0,0 +1,12 @@
+import os
+import logging
+from dotenv import load_dotenv
+
+load_dotenv()
+
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+
+logger = logging.getLogger(__name__)
diff --git a/src/leapfrogai_evals/pyproject.toml b/src/leapfrogai_evals/pyproject.toml
index 1974da81a..9726c51c0 100644
--- a/src/leapfrogai_evals/pyproject.toml
+++ b/src/leapfrogai_evals/pyproject.toml
@@ -8,7 +8,7 @@ version = "0.13.1"
 
 dependencies = [
     "deepeval == 1.3.0",
-    "openai == 1.42.0",
+    "openai == 1.45.0",
     "tqdm == 4.66.5",
     "python-dotenv == 1.0.1",
     "seaborn == 0.13.2",
@@ -16,7 +16,8 @@ dependencies = [
     "huggingface-hub == 0.24.6",
     "anthropic ==0.34.2",
     "instructor ==1.4.3",
-    "pyPDF2 == 3.0.1"
+    "pyPDF2 == 3.0.1",
+    "python-dotenv == 1.0.1"
 ]
 requires-python = "~=3.11"
 readme = "README.md"
diff --git a/src/leapfrogai_ui/src/app.css b/src/leapfrogai_ui/src/app.css
index b1f6ef61b..1afa8ffdd 100644
--- a/src/leapfrogai_ui/src/app.css
+++ b/src/leapfrogai_ui/src/app.css
@@ -9,6 +9,17 @@
   scrollbar-color: #4b5563 #1f2937;
 }
 
+/* Override TailwindCSS default Preflight styles for lists in messages */
+#message-content-container {
+  ul {
+    margin: revert;
+    padding: revert;
+    li {
+      list-style: square;
+    }
+  }
+}
+
 /*TODO - can we get rid of some of these?*/
 @layer utilities {
   .content {
diff --git a/src/leapfrogai_ui/src/app.d.ts b/src/leapfrogai_ui/src/app.d.ts
index d493910cc..f19b0b155 100644
--- a/src/leapfrogai_ui/src/app.d.ts
+++ b/src/leapfrogai_ui/src/app.d.ts
@@ -23,7 +23,6 @@ declare global {
       profile?: Profile;
       threads?: LFThread[];
       assistants?: LFAssistant[];
-      assistant?: LFAssistant;
       files?: FileObject[];
       keys?: APIKeyRow[];
     }
diff --git a/src/leapfrogai_ui/src/lib/components/AssistantAvatar.svelte b/src/leapfrogai_ui/src/lib/components/AssistantAvatar.svelte
index ceca70148..a5e6d8105 100644
--- a/src/leapfrogai_ui/src/lib/components/AssistantAvatar.svelte
+++ b/src/leapfrogai_ui/src/lib/components/AssistantAvatar.svelte
@@ -33,8 +33,7 @@
     ignoreLocation: true
   };
 
-  $: fileNotUploaded = !$form.avatarFile; // if on upload tab, you must upload a file to enable save
-
+  $: fileNotUploaded = !$form.avatar && !$form.avatarFile; // if on upload tab, you must upload a file to enable save
   $: avatarToShow = $form.avatarFile ? URL.createObjectURL($form.avatarFile) : $form.avatar;
 
   $: fileTooBig = $form.avatarFile?.size > MAX_AVATAR_SIZE;
@@ -66,9 +65,7 @@
       modalOpen = false;
       $form.avatar = originalAvatar;
       tempPictogram = selectedPictogramName; // reset to original pictogram
-      if ($form.avatar) {
-        $form.avatarFile = $form.avatar; // reset to original file
-      } else {
+      if (!$form.avatar) {
         clearFileInput();
       }
       fileUploaderRef.value = ''; // Reset the file input value to ensure input event detection
@@ -102,7 +99,7 @@
       }
     } else {
       // pictogram tab
-      selectedPictogramName = tempPictogram; // TODO - can we remove this line
+      selectedPictogramName = tempPictogram;
       $form.pictogram = tempPictogram;
       $form.avatar = ''; // remove saved avatar
       clearFileInput();
@@ -197,8 +194,6 @@
             >
               Upload from computer
             </Button>
-
-            <input type="hidden" name="avatar" bind:value={$form.avatar} />
           </div>
 
           {#if hideUploader}
@@ -222,7 +217,9 @@
       </div>
     </div>
   </Modal>
-  <!--    Important! These inputs must be outside of the modal or the image will be lost when the modal closes-->
+  <!--    Important! These inputs must be outside of the modal or the image will be lost when the modal closes
+  The hidden inputs will not be nested inside the parent <form> if they are included inside the modal
+  -->
   <input
     bind:this={fileUploaderRef}
     on:input={(e) => {
@@ -236,5 +233,6 @@
     name="avatarFile"
     class="sr-only"
   />
-  <input type="hidden" name="pictogram" value={selectedPictogramName} />
+  <input type="hidden" name="avatar" bind:value={$form.avatar} />
+  <input type="hidden" name="pictogram" bind:value={selectedPictogramName} />
 </div>
diff --git a/src/leapfrogai_ui/src/lib/components/AssistantCard.svelte b/src/leapfrogai_ui/src/lib/components/AssistantCard.svelte
index ceabb4098..dfa88a3e4 100644
--- a/src/leapfrogai_ui/src/lib/components/AssistantCard.svelte
+++ b/src/leapfrogai_ui/src/lib/components/AssistantCard.svelte
@@ -1,10 +1,10 @@
 <script lang="ts">
   import { fade } from 'svelte/transition';
-  import { goto, invalidate } from '$app/navigation';
+  import { goto } from '$app/navigation';
   import { Avatar, Button, Card, Dropdown, DropdownItem, Modal, P } from 'flowbite-svelte';
   import { DotsHorizontalOutline, ExclamationCircleOutline } from 'flowbite-svelte-icons';
   import DynamicPictogram from '$components/DynamicPictogram.svelte';
-  import { threadsStore, toastStore } from '$stores';
+  import { assistantsStore, toastStore } from '$stores';
   import { NO_SELECTED_ASSISTANT_ID, STANDARD_FADE_DURATION } from '$constants';
   import type { LFAssistant } from '$lib/types/assistants';
 
@@ -13,33 +13,38 @@
   let deleteModalOpen = false;
 
   const handleDelete = async () => {
-    const res = await fetch('/api/assistants/delete', {
-      method: 'DELETE',
-      body: JSON.stringify({ id: assistant.id }),
-      headers: {
-        'Content-Type': 'application/json'
+    try {
+      const res = await fetch('/api/assistants/delete', {
+        method: 'DELETE',
+        body: JSON.stringify({ id: assistant.id }),
+        headers: {
+          'Content-Type': 'application/json'
+        }
+      });
+      if (res.ok) {
+        assistantsStore.removeAssistant(assistant.id);
+        if ($assistantsStore.selectedAssistantId === assistant.id)
+          assistantsStore.setSelectedAssistantId(NO_SELECTED_ASSISTANT_ID);
+        toastStore.addToast({
+          kind: 'info',
+          title: 'Assistant Deleted.',
+          subtitle: `${assistant.name} Assistant deleted.`
+        });
+      } else {
+        toastStore.addToast({
+          kind: 'error',
+          title: 'Error',
+          subtitle: 'Error deleting Assistant.'
+        });
       }
-    });
-    if ($threadsStore.selectedAssistantId === assistant.id)
-      threadsStore.setSelectedAssistantId(NO_SELECTED_ASSISTANT_ID);
-
-    deleteModalOpen = false;
-
-    if (res.ok) {
-      await invalidate('lf:assistants');
+    } catch {
       toastStore.addToast({
-        kind: 'info',
-        title: 'Assistant Deleted.',
-        subtitle: `${assistant.name} Assistant deleted.`
+        kind: 'error',
+        title: 'Error',
+        subtitle: 'Error deleting Assistant.'
       });
-      return;
     }
-
-    toastStore.addToast({
-      kind: 'error',
-      title: 'Error',
-      subtitle: 'Error deleting Assistant.'
-    });
+    deleteModalOpen = false;
   };
 </script>
 
diff --git a/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.svelte b/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.svelte
index 74d0f8ba9..6cf1ab3e5 100644
--- a/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.svelte
+++ b/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.svelte
@@ -2,7 +2,7 @@
   import { fade } from 'svelte/transition';
   import { filesStore } from '$stores';
   import type { FilesForm } from '$lib/types/files';
-  import { ACCEPTED_FILE_TYPES, STANDARD_FADE_DURATION } from '$constants';
+  import { ACCEPTED_DOC_TYPES, STANDARD_FADE_DURATION } from '$constants';
   import AssistantFileDropdown from '$components/AssistantFileDropdown.svelte';
   import FileUploaderItem from '$components/FileUploaderItem.svelte';
 
@@ -17,7 +17,7 @@
     .filter((id) => $filesStore.selectedAssistantFileIds.includes(id));
 </script>
 
-<AssistantFileDropdown accept={ACCEPTED_FILE_TYPES} {filesForm} class="mb-6" />
+<AssistantFileDropdown accept={ACCEPTED_DOC_TYPES} {filesForm} class="mb-6" />
 
 <div class="grid grid-cols-2 gap-4">
   {#each filteredStoreFiles as file}
diff --git a/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.test.ts b/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.test.ts
index 6bb15f2ae..61c3efed9 100644
--- a/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.test.ts
+++ b/src/leapfrogai_ui/src/lib/components/AssistantFileSelect.test.ts
@@ -4,14 +4,14 @@ import AssistantFileSelect from '$components/AssistantFileSelect.svelte';
 import { superValidate } from 'sveltekit-superforms';
 import { yup } from 'sveltekit-superforms/adapters';
 import { filesSchema } from '$schemas/files';
-import type { FileRow } from '$lib/types/files';
+import type { LFFileObject } from '$lib/types/files';
 import { getUnixSeconds } from '$helpers/dates';
 import userEvent from '@testing-library/user-event';
 
 const filesForm = await superValidate({}, yup(filesSchema), { errors: false });
 
 describe('AssistantFileSelect', () => {
-  const mockFiles: FileRow[] = [
+  const mockFiles: LFFileObject[] = [
     { id: '1', filename: 'file1.pdf', status: 'complete', created_at: getUnixSeconds(new Date()) },
     { id: '2', filename: 'file2.pdf', status: 'error', created_at: getUnixSeconds(new Date()) },
     { id: '3', filename: 'file3.txt', status: 'uploading', created_at: getUnixSeconds(new Date()) }
diff --git a/src/leapfrogai_ui/src/lib/components/AssistantForm.svelte b/src/leapfrogai_ui/src/lib/components/AssistantForm.svelte
index 815e009b2..8e7c97a5a 100644
--- a/src/leapfrogai_ui/src/lib/components/AssistantForm.svelte
+++ b/src/leapfrogai_ui/src/lib/components/AssistantForm.svelte
@@ -6,11 +6,11 @@
   } from '$lib/constants';
   import { superForm } from 'sveltekit-superforms';
   import { page } from '$app/stores';
-  import { beforeNavigate, goto, invalidate } from '$app/navigation';
+  import { beforeNavigate, goto } from '$app/navigation';
   import { Button, Modal, P } from 'flowbite-svelte';
   import Slider from '$components/Slider.svelte';
   import { yup } from 'sveltekit-superforms/adapters';
-  import { filesStore, toastStore, uiStore } from '$stores';
+  import { assistantsStore, filesStore, toastStore, uiStore } from '$stores';
   import { assistantInputSchema, editAssistantInputSchema } from '$lib/schemas/assistants';
   import type { NavigationTarget } from '@sveltejs/kit';
   import { onMount } from 'svelte';
@@ -25,6 +25,10 @@
 
   let bypassCancelWarning = false;
 
+  $: assistant = $assistantsStore.assistants.find(
+    (assistant) => assistant.id === $page.params.assistantId
+  );
+
   const { form, errors, enhance, submitting, isTainted, delayed } = superForm(data.form, {
     invalidateAll: false,
     validators: yup(isEditMode ? editAssistantInputSchema : assistantInputSchema),
@@ -55,8 +59,12 @@
         }
 
         bypassCancelWarning = true;
-        await invalidate('lf:assistants');
-        goto(result.data.redirectUrl);
+        if (isEditMode) {
+          assistantsStore.updateAssistant(result.data.assistant);
+        } else {
+          assistantsStore.addAssistant(result.data.assistant);
+        }
+        await goto(result.data.redirectUrl);
       } else if (result.type === 'failure') {
         // 400 errors will show errors for the respective fields, do not show toast
         if (result.status !== 400) {
@@ -174,7 +182,7 @@
     <input
       type="hidden"
       name="vectorStoreId"
-      value={data?.assistant?.tool_resources?.file_search?.vector_store_ids[0] || undefined}
+      value={assistant?.tool_resources?.file_search?.vector_store_ids[0] || undefined}
     />
 
     <div>
diff --git a/src/leapfrogai_ui/src/lib/components/AssistantProgressToast.test.ts b/src/leapfrogai_ui/src/lib/components/AssistantProgressToast.test.ts
index fb21bd849..fc1d5c5e4 100644
--- a/src/leapfrogai_ui/src/lib/components/AssistantProgressToast.test.ts
+++ b/src/leapfrogai_ui/src/lib/components/AssistantProgressToast.test.ts
@@ -10,7 +10,7 @@ import AssistantProgressToast from '$components/AssistantProgressToast.svelte';
 import { render, screen } from '@testing-library/svelte';
 import filesStore from '$stores/filesStore';
 import { getFakeFiles } from '$testUtils/fakeData';
-import { convertFileObjectToFileRows } from '$helpers/fileHelpers';
+import { convertFileObjectToLFFileObject } from '$helpers/fileHelpers';
 import { delay } from 'msw';
 import { vi } from 'vitest';
 import { toastStore } from '$stores';
@@ -27,7 +27,7 @@ describe('AssistantProgressToast', () => {
       fileIds: files.map((file) => file.id),
       vectorStoreId: '123'
     };
-    filesStore.setFiles(convertFileObjectToFileRows(files));
+    filesStore.setFiles(convertFileObjectToLFFileObject(files));
 
     const timeout = 10; //10ms
     render(AssistantProgressToast, { timeout, toast }); //10ms timeout
diff --git a/src/leapfrogai_ui/src/lib/components/ChatFileUpload.svelte b/src/leapfrogai_ui/src/lib/components/ChatFileUpload.svelte
index e01575ce2..73356ee1a 100644
--- a/src/leapfrogai_ui/src/lib/components/ChatFileUpload.svelte
+++ b/src/leapfrogai_ui/src/lib/components/ChatFileUpload.svelte
@@ -1,7 +1,7 @@
 <script lang="ts">
   import { env } from '$env/dynamic/public';
   import {
-    ACCEPTED_FILE_TYPES,
+    ACCEPTED_DOC_AND_AUDIO_FILE_TYPES,
     APPROX_MAX_CHARACTERS,
     FILE_UPLOAD_PROMPT,
     MAX_NUM_FILES_UPLOAD
@@ -61,24 +61,23 @@
             body: formData
           })
             .then(async (response) => {
-              if (!response.ok) {
+              if (response.ok) {
+                const result = await response.json();
                 return {
                   id: file.id,
                   name: shortenFileName(file.name),
                   type: file.type,
-                  text: ERROR_UPLOADING_FILE_MSG,
-                  status: 'error',
-                  errorText: ERROR_UPLOADING_FILE_MSG
+                  text: result.text,
+                  status: 'complete'
                 };
               }
-
-              const result = await response.json();
               return {
                 id: file.id,
                 name: shortenFileName(file.name),
                 type: file.type,
-                text: result.text,
-                status: 'complete'
+                text: ERROR_UPLOADING_FILE_MSG,
+                status: 'error',
+                errorText: ERROR_UPLOADING_FILE_MSG
               };
             })
             .catch(() => {
@@ -144,7 +143,7 @@
     convertFiles(e.detail);
     fileUploadBtnRef.value = '';
   }}
-  accept={ACCEPTED_FILE_TYPES}
+  accept={ACCEPTED_DOC_AND_AUDIO_FILE_TYPES}
   disabled={uploadingFiles}
   class="remove-btn-style flex  rounded-lg  p-1.5 text-gray-500 hover:bg-inherit dark:hover:bg-inherit"
 >
diff --git a/src/leapfrogai_ui/src/lib/components/FileChatActions.svelte b/src/leapfrogai_ui/src/lib/components/FileChatActions.svelte
index 6a4b0d8ca..09ea90518 100644
--- a/src/leapfrogai_ui/src/lib/components/FileChatActions.svelte
+++ b/src/leapfrogai_ui/src/lib/components/FileChatActions.svelte
@@ -132,7 +132,8 @@
           role: 'assistant',
           metadata: {
             wasTranscriptionOrTranslation: 'true'
-          }
+          },
+          lengthOverride: true
         });
       } catch {
         await handleGeneralError(toastError);
diff --git a/src/leapfrogai_ui/src/lib/components/FileChatActions.test.ts b/src/leapfrogai_ui/src/lib/components/FileChatActions.test.ts
index 316f54233..d920f325b 100644
--- a/src/leapfrogai_ui/src/lib/components/FileChatActions.test.ts
+++ b/src/leapfrogai_ui/src/lib/components/FileChatActions.test.ts
@@ -22,7 +22,7 @@ import {
   FILE_TRANSLATION_ERROR
 } from '$constants/toastMessages';
 import { getFakeThread } from '$testUtils/fakeData';
-import { AUDIO_FILE_SIZE_ERROR_TEXT, NO_SELECTED_ASSISTANT_ID } from '$constants';
+import { AUDIO_FILE_SIZE_ERROR_TEXT } from '$constants';
 
 const thread = getFakeThread();
 
@@ -74,7 +74,6 @@ describe('FileChatActions', () => {
     threadsStore.set({
       threads: [thread], // uses date override starting in March
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: '',
       streamingMessage: null
     });
diff --git a/src/leapfrogai_ui/src/lib/components/LFHeader.test.ts b/src/leapfrogai_ui/src/lib/components/LFHeader.test.ts
index 105caad11..340efb603 100644
--- a/src/leapfrogai_ui/src/lib/components/LFHeader.test.ts
+++ b/src/leapfrogai_ui/src/lib/components/LFHeader.test.ts
@@ -10,7 +10,6 @@ describe('LFHeader', () => {
     threadsStore.set({
       threads: [thread],
       lastVisitedThreadId: thread.id,
-      selectedAssistantId: '',
       sendingBlocked: false,
       streamingMessage: null
     });
diff --git a/src/leapfrogai_ui/src/lib/components/Message.svelte b/src/leapfrogai_ui/src/lib/components/Message.svelte
index 0af165b1f..874504659 100644
--- a/src/leapfrogai_ui/src/lib/components/Message.svelte
+++ b/src/leapfrogai_ui/src/lib/components/Message.svelte
@@ -13,7 +13,7 @@
     UserCircleOutline
   } from 'flowbite-svelte-icons';
   import { twMerge } from 'tailwind-merge';
-  import { threadsStore, toastStore } from '$stores';
+  import { assistantsStore, threadsStore, toastStore } from '$stores';
   import { convertTextToMessageContentArr, getMessageText } from '$helpers/threads';
   import type { Message as OpenAIMessage } from 'openai/resources/beta/threads/messages';
   import {
@@ -59,7 +59,10 @@
   });
 
   let assistantImage = isRunAssistantMessage(message)
-    ? getAssistantImage($page.data.assistants || [], message.assistant_id!)
+    ? getAssistantImage(
+        $assistantsStore.assistants || [],
+        message.assistant_id || message.metadata?.assistant_id
+      )
     : null;
 
   let messageIsHovered = false;
@@ -69,7 +72,8 @@
   const getAssistantName = (id?: string) => {
     if (!id) return 'LeapfrogAI Bot';
     return (
-      $page.data.assistants?.find((assistant) => assistant.id === id)?.name || 'LeapfrogAI Bot'
+      $assistantsStore.assistants?.find((assistant) => assistant.id === id)?.name ||
+      'LeapfrogAI Bot'
     );
   };
 
@@ -83,7 +87,7 @@
       message: { ...message, content: convertTextToMessageContentArr($value) },
       setMessages: setMessages!,
       append: append!,
-      selectedAssistantId: $threadsStore.selectedAssistantId
+      selectedAssistantId: $assistantsStore.selectedAssistantId
     });
   };
 
@@ -166,7 +170,9 @@
         >
           <div class="flex flex-col gap-2">
             <div class="font-bold">
-              {message.role === 'user' ? 'You' : getAssistantName(message.assistant_id)}
+              {message.role === 'user'
+                ? 'You'
+                : getAssistantName(message.assistant_id || message.metadata?.assistant_id)}
             </div>
             {#if fileMetadata}
               <div id="uploaded-files" class={'flex max-w-full  gap-2 overflow-x-auto bg-gray-900'}>
@@ -178,14 +184,16 @@
             {#if message.role !== 'user' && !messageText}
               <MessagePendingSkeleton size="sm" class="mt-4" darkColor="bg-gray-500" />
             {:else}
-              <!--eslint-disable-next-line svelte/no-at-html-tags -- We use DomPurity to sanitize the code snippet-->
-              {@html DOMPurify.sanitize(md.render(messageText), {
-                CUSTOM_ELEMENT_HANDLING: {
-                  tagNameCheck: /^code-block$/,
-                  attributeNameCheck: /^(code|language)$/,
-                  allowCustomizedBuiltInElements: false
-                }
-              })}
+              <div id="message-content-container">
+                <!--eslint-disable-next-line svelte/no-at-html-tags -- We use DomPurity to sanitize the code snippet-->
+                {@html DOMPurify.sanitize(md.render(messageText), {
+                  CUSTOM_ELEMENT_HANDLING: {
+                    tagNameCheck: /^code-block$/,
+                    attributeNameCheck: /^(code|language)$/,
+                    allowCustomizedBuiltInElements: false
+                  }
+                })}
+              </div>
               <div class="flex flex-col items-start">
                 {#each getCitations(message, $page.data.files) as { component: Component, props }}
                   <svelte:component this={Component} {...props} />
@@ -231,7 +239,7 @@
                 message: messages[messages.length - 2],
                 setMessages,
                 append: append,
-                selectedAssistantId: $threadsStore.selectedAssistantId
+                selectedAssistantId: $assistantsStore.selectedAssistantId
               })}
             aria-label="regenerate message"
             tabindex="0"
diff --git a/src/leapfrogai_ui/src/lib/components/Message.test.ts b/src/leapfrogai_ui/src/lib/components/Message.test.ts
index d3efab518..30c59a751 100644
--- a/src/leapfrogai_ui/src/lib/components/Message.test.ts
+++ b/src/leapfrogai_ui/src/lib/components/Message.test.ts
@@ -2,13 +2,12 @@ import { render, screen } from '@testing-library/svelte';
 import { afterAll, afterEach, type MockInstance, vi } from 'vitest';
 import { Message } from '$components/index';
 import userEvent from '@testing-library/user-event';
-import { fakeAssistants, fakeThreads, getFakeMessage } from '$testUtils/fakeData';
+import { fakeThreads, getFakeAssistant, getFakeMessage } from '$testUtils/fakeData';
 import MessageWithToast from '$components/MessageWithToast.test.svelte';
 import { convertMessageToVercelAiMessage, getMessageText } from '$helpers/threads';
 import { type Message as VercelAIMessage } from '@ai-sdk/svelte';
 import { chatHelpers } from '$helpers';
-import { threadsStore } from '$stores';
-import { NO_SELECTED_ASSISTANT_ID } from '$constants';
+import { assistantsStore, threadsStore } from '$stores';
 
 const fakeAppend = vi.fn();
 
@@ -27,6 +26,8 @@ const getDefaultMessageProps = () => {
   };
 };
 
+const assistant = getFakeAssistant();
+
 describe('Message component', () => {
   afterEach(() => {
     fakeAppend.mockReset();
@@ -36,6 +37,10 @@ describe('Message component', () => {
     fakeAppend.mockRestore();
   });
 
+  beforeEach(() => {
+    assistantsStore.setAssistants([assistant]);
+  });
+
   it('displays edit text area when edit btn is clicked', async () => {
     render(Message, { ...getDefaultMessageProps() });
     expect(screen.queryByTestId('edit-message-input')).not.toBeInTheDocument();
@@ -129,7 +134,6 @@ describe('Message component', () => {
     it('disables edit submit button when message is loading', async () => {
       threadsStore.set({
         threads: fakeThreads,
-        selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
         sendingBlocked: true,
         lastVisitedThreadId: '',
         streamingMessage: null
@@ -147,7 +151,6 @@ describe('Message component', () => {
     it('has copy and regenerate buttons for the last AI response', () => {
       threadsStore.set({
         threads: fakeThreads,
-        selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
         sendingBlocked: false,
         lastVisitedThreadId: '',
         streamingMessage: null
@@ -190,7 +193,6 @@ describe('Message component', () => {
     it('removes the regenerate buttons when a response is loading', () => {
       threadsStore.set({
         threads: fakeThreads,
-        selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
         sendingBlocked: true,
         lastVisitedThreadId: '',
         streamingMessage: null
@@ -206,7 +208,6 @@ describe('Message component', () => {
     it('leaves the copy button for messages when it is loading', () => {
       threadsStore.set({
         threads: fakeThreads,
-        selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
         sendingBlocked: true,
         lastVisitedThreadId: '',
         streamingMessage: null
@@ -221,7 +222,6 @@ describe('Message component', () => {
     it('leaves the edit button for messages when it is loading', () => {
       threadsStore.set({
         threads: fakeThreads,
-        selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
         sendingBlocked: true,
         lastVisitedThreadId: '',
         streamingMessage: null
@@ -248,11 +248,12 @@ describe('Message component', () => {
       screen.getByText('LeapfrogAI Bot');
     });
     it('Has the title of the assistant name for regular AI responses', () => {
+      assistantsStore.setSelectedAssistantId(assistant.id);
       render(Message, {
         ...getDefaultMessageProps(),
-        message: getFakeMessage({ role: 'assistant', assistant_id: fakeAssistants[0].id })
+        message: getFakeMessage({ role: 'assistant', assistant_id: assistant.id })
       });
-      screen.getByText(fakeAssistants[0].name!);
+      screen.getByText(assistant.name!);
     });
     it('shows a loading skeleton if the message text is empty', () => {
       render(Message, {
diff --git a/src/leapfrogai_ui/src/lib/components/SelectAssistantDropdown.svelte b/src/leapfrogai_ui/src/lib/components/SelectAssistantDropdown.svelte
index c4a51fccd..c700c4fe6 100644
--- a/src/leapfrogai_ui/src/lib/components/SelectAssistantDropdown.svelte
+++ b/src/leapfrogai_ui/src/lib/components/SelectAssistantDropdown.svelte
@@ -1,28 +1,26 @@
 <script lang="ts">
   import { goto } from '$app/navigation';
-  import { threadsStore } from '$stores';
+  import { assistantsStore } from '$stores';
   import { NO_SELECTED_ASSISTANT_ID } from '$constants';
   import { Button, Dropdown, DropdownItem } from 'flowbite-svelte';
   import { CheckOutline, ChevronDownOutline, UserSettingsOutline } from 'flowbite-svelte-icons';
-  import type { Assistant } from 'openai/resources/beta/assistants';
-
-  export let assistants: Assistant[] = [];
 
   let assistantSelectDropdownOpen = false;
 
   $: selectedAssistantName =
-    assistants.find((assistant) => assistant.id === $threadsStore.selectedAssistantId)?.name ||
-    'Select assistant...';
+    $assistantsStore.assistants.find(
+      (assistant) => assistant.id === $assistantsStore.selectedAssistantId
+    )?.name || 'Select assistant...';
 
   const handleSelectAssistant = (e) => {
     e.preventDefault();
     const item = e.target.closest('button, [data-value]');
     if (item) {
       const selectedAssistantId = item.dataset.value;
-      if ($threadsStore.selectedAssistantId === selectedAssistantId) {
-        threadsStore.setSelectedAssistantId(NO_SELECTED_ASSISTANT_ID);
+      if ($assistantsStore.selectedAssistantId === selectedAssistantId) {
+        assistantsStore.setSelectedAssistantId(NO_SELECTED_ASSISTANT_ID);
       } else {
-        threadsStore.setSelectedAssistantId(selectedAssistantId);
+        assistantsStore.setSelectedAssistantId(selectedAssistantId);
       }
     }
     assistantSelectDropdownOpen = false;
@@ -49,14 +47,14 @@
       Manage Assistants
     </Button>
   </div>
-  {#each assistants as assistant (assistant.id)}
+  {#each $assistantsStore.assistants as assistant (assistant.id)}
     <DropdownItem
       on:click={handleSelectAssistant}
       data-value={assistant.id}
       class="flex justify-between "
     >
       {assistant.name.length > 20 ? `${assistant.name.slice(0, 20)}...` : assistant.name}
-      {#if $threadsStore.selectedAssistantId === assistant.id}
+      {#if $assistantsStore.selectedAssistantId === assistant.id}
         <CheckOutline data-testid="checked" />
       {/if}
     </DropdownItem>
@@ -67,11 +65,11 @@
   >
     <DropdownItem
       on:click={() => {
-        threadsStore.setSelectedAssistantId(NO_SELECTED_ASSISTANT_ID);
+        assistantsStore.setSelectedAssistantId(NO_SELECTED_ASSISTANT_ID);
         assistantSelectDropdownOpen = false;
       }}
       class="flex justify-between"
-      >None {#if $threadsStore.selectedAssistantId === NO_SELECTED_ASSISTANT_ID}
+      >None {#if $assistantsStore.selectedAssistantId === NO_SELECTED_ASSISTANT_ID}
         <CheckOutline data-testid="checked" />
       {/if}</DropdownItem
     >
diff --git a/src/leapfrogai_ui/src/lib/components/Sidebar.test.ts b/src/leapfrogai_ui/src/lib/components/Sidebar.test.ts
index 68654bbc4..200f041c9 100644
--- a/src/leapfrogai_ui/src/lib/components/Sidebar.test.ts
+++ b/src/leapfrogai_ui/src/lib/components/Sidebar.test.ts
@@ -14,7 +14,6 @@ import { vi } from 'vitest';
 import { getUnixSeconds, monthNames } from '$helpers/dates';
 import * as navigation from '$app/navigation';
 import { getMessageText } from '$helpers/threads';
-import { NO_SELECTED_ASSISTANT_ID } from '$constants';
 
 const openThreadEditDeleteMenu = async (label: string) => {
   const sidebarThreadMenuBtn = screen.getByTestId(`thread-menu-btn-${label}`);
@@ -51,7 +50,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -79,7 +77,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: [fakeTodayThread, fakeYesterdayThread], // uses date override starting in March
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: '',
       streamingMessage: null
     });
@@ -107,7 +104,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -148,7 +144,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -184,7 +179,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -208,7 +202,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -232,7 +225,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -264,7 +256,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -290,7 +281,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -318,7 +308,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -342,7 +331,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: fakeThreads,
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -360,7 +348,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: [fakeThread],
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: fakeThreads[0].id,
       streamingMessage: null
     });
@@ -384,7 +371,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: [fakeThread1, fakeThread2, fakeThread3],
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: '',
       streamingMessage: null
     });
@@ -417,7 +403,6 @@ describe('ChatSidebar', () => {
     threadsStore.set({
       threads: [fakeThread1, fakeThread2, fakeThread3],
       sendingBlocked: false,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       lastVisitedThreadId: '',
       streamingMessage: null
     });
diff --git a/src/leapfrogai_ui/src/lib/components/UploadedFileCard.svelte b/src/leapfrogai_ui/src/lib/components/UploadedFileCard.svelte
index 64b98fa1d..3c3e8e30b 100644
--- a/src/leapfrogai_ui/src/lib/components/UploadedFileCard.svelte
+++ b/src/leapfrogai_ui/src/lib/components/UploadedFileCard.svelte
@@ -12,7 +12,7 @@
 
   const dispatch = createEventDispatcher();
 
-  $: hovered = false;
+  let hovered = false;
 </script>
 
 <div in:fade={{ duration: STANDARD_FADE_DURATION }} out:fade={{ duration: STANDARD_FADE_DURATION }}>
diff --git a/src/leapfrogai_ui/src/lib/components/modals/ConfirmFilesDeleteModal.svelte b/src/leapfrogai_ui/src/lib/components/modals/ConfirmFilesDeleteModal.svelte
index d80d93147..d581f83cd 100644
--- a/src/leapfrogai_ui/src/lib/components/modals/ConfirmFilesDeleteModal.svelte
+++ b/src/leapfrogai_ui/src/lib/components/modals/ConfirmFilesDeleteModal.svelte
@@ -3,7 +3,6 @@
   import type { Assistant } from 'openai/resources/beta/assistants';
   import { filesStore, toastStore } from '$stores';
   import { ExclamationCircleOutline } from 'flowbite-svelte-icons';
-  import { invalidate } from '$app/navigation';
   import { createEventDispatcher } from 'svelte';
   import vectorStatusStore from '$stores/vectorStatusStore';
 
@@ -12,6 +11,8 @@
   export let deleting: boolean;
   export let affectedAssistants: Assistant[];
 
+  $: isMultipleFiles = $filesStore.selectedFileManagementFileIds.length > 1;
+
   const dispatch = createEventDispatcher();
 
   const handleCancel = () => {
@@ -20,34 +21,43 @@
     affectedAssistantsLoading = false;
   };
 
+  const handleDeleteError = () => {
+    toastStore.addToast({
+      kind: 'error',
+      title: `Error Deleting ${isMultipleFiles ? 'Files' : 'File'}`
+    });
+  };
+
   const handleConfirmedDelete = async () => {
-    const isMultipleFiles = $filesStore.selectedFileManagementFileIds.length > 1;
     deleting = true;
-    const res = await fetch('/api/files/delete', {
-      method: 'DELETE',
-      body: JSON.stringify({ ids: $filesStore.selectedFileManagementFileIds }),
-      headers: {
-        'Content-Type': 'application/json'
-      }
-    });
-    open = false;
-    await invalidate('lf:files');
-    if (res.ok) {
-      toastStore.addToast({
-        kind: 'success',
-        title: `${isMultipleFiles ? 'Files' : 'File'} Deleted`
-      });
-    } else {
-      toastStore.addToast({
-        kind: 'error',
-        title: `Error Deleting ${isMultipleFiles ? 'Files' : 'File'}`
+    try {
+      const res = await fetch('/api/files/delete', {
+        method: 'DELETE',
+        body: JSON.stringify({ ids: $filesStore.selectedFileManagementFileIds }),
+        headers: {
+          'Content-Type': 'application/json'
+        }
       });
-    }
 
-    vectorStatusStore.removeFiles($filesStore.selectedFileManagementFileIds);
-    filesStore.setSelectedFileManagementFileIds([]);
+      if (res.ok) {
+        open = false;
+        for (const id of $filesStore.selectedFileManagementFileIds) {
+          filesStore.removeFile(id);
+        }
+        vectorStatusStore.removeFiles($filesStore.selectedFileManagementFileIds);
+        filesStore.setSelectedFileManagementFileIds([]);
+        toastStore.addToast({
+          kind: 'success',
+          title: `${isMultipleFiles ? 'Files' : 'File'} Deleted`
+        });
+        dispatch('delete');
+      } else {
+        handleDeleteError();
+      }
+    } catch {
+      handleDeleteError();
+    }
     deleting = false;
-    dispatch('delete');
   };
 
   $: fileNames = $filesStore.files
diff --git a/src/leapfrogai_ui/src/lib/components/modals/DeleteApiKeyModal.svelte b/src/leapfrogai_ui/src/lib/components/modals/DeleteApiKeyModal.svelte
index 58b0d9d58..c0c7083a8 100644
--- a/src/leapfrogai_ui/src/lib/components/modals/DeleteApiKeyModal.svelte
+++ b/src/leapfrogai_ui/src/lib/components/modals/DeleteApiKeyModal.svelte
@@ -10,10 +10,12 @@
   export let selectedRowIds: string[];
   export let deleting: boolean;
 
+  $: isMultiple = selectedRowIds.length > 1;
+
   const dispatch = createEventDispatcher();
 
-  $: keyNames = $page.data.keys
-    ? $page.data.keys
+  $: keyNames = $page.data.apiKeys
+    ? $page.data.apiKeys
         .map((key) => {
           if (selectedRowIds.includes(key.id)) return key.name;
         })
@@ -25,27 +27,35 @@
     confirmDeleteModalOpen = false;
   };
 
+  const handleDeleteError = () => {
+    toastStore.addToast({
+      kind: 'error',
+      title: `Error Deleting ${isMultiple ? 'Keys' : 'Key'}`
+    });
+  };
+
   const handleDelete = async () => {
     deleting = true;
-    const isMultiple = selectedRowIds.length > 1;
-    const res = await fetch('/api/api-keys/delete', {
-      body: JSON.stringify({ ids: selectedRowIds }),
-      method: 'DELETE'
-    });
-    dispatch('delete', selectedRowIds);
-    deleting = false;
-    if (res.ok) {
-      toastStore.addToast({
-        kind: 'success',
-        title: `${isMultiple ? 'Keys' : 'Key'} Deleted`
-      });
-    } else {
-      toastStore.addToast({
-        kind: 'error',
-        title: `Error Deleting ${isMultiple ? 'Keys' : 'Key'}`
+    try {
+      const res = await fetch('/api/api-keys/delete', {
+        body: JSON.stringify({ ids: selectedRowIds }),
+        method: 'DELETE'
       });
+      if (res.ok) {
+        dispatch('delete', selectedRowIds);
+        toastStore.addToast({
+          kind: 'success',
+          title: `${isMultiple ? 'Keys' : 'Key'} Deleted`
+        });
+        await invalidate('lf:api-keys');
+      } else {
+        handleDeleteError();
+      }
+    } catch {
+      handleDeleteError();
     }
-    await invalidate('lf:api-keys');
+
+    deleting = false;
   };
 </script>
 
diff --git a/src/leapfrogai_ui/src/lib/constants/errors.ts b/src/leapfrogai_ui/src/lib/constants/errors.ts
index e26224e6f..a34bd5906 100644
--- a/src/leapfrogai_ui/src/lib/constants/errors.ts
+++ b/src/leapfrogai_ui/src/lib/constants/errors.ts
@@ -1,2 +1,4 @@
 export const FILE_CONTEXT_TOO_LARGE_ERROR_MSG = 'Error: Upload fewer or smaller files';
 export const ERROR_UPLOADING_FILE_MSG = 'Error uploading file';
+export const ASSISTANT_ERROR_MSG =
+  "I'm sorry but I've experienced an error. Please try again, or contact support.";
diff --git a/src/leapfrogai_ui/src/lib/constants/index.ts b/src/leapfrogai_ui/src/lib/constants/index.ts
index 5ad6cac6d..08e813bf0 100644
--- a/src/leapfrogai_ui/src/lib/constants/index.ts
+++ b/src/leapfrogai_ui/src/lib/constants/index.ts
@@ -52,7 +52,7 @@ export const ACCEPTED_AUDIO_FILE_TYPES = [
   '.webm'
 ];
 
-export const ACCEPTED_FILE_TYPES = [
+export const ACCEPTED_DOC_TYPES = [
   '.pdf',
   '.txt',
   '.text',
@@ -62,7 +62,10 @@ export const ACCEPTED_FILE_TYPES = [
   '.pptx',
   '.doc',
   '.docx',
-  '.csv',
+  '.csv'
+];
+export const ACCEPTED_DOC_AND_AUDIO_FILE_TYPES = [
+  ...ACCEPTED_DOC_TYPES,
   ...ACCEPTED_AUDIO_FILE_TYPES
 ];
 
@@ -108,7 +111,7 @@ export const NO_FILE_ERROR_TEXT = 'Please upload an image or select a pictogram'
 export const AVATAR_FILE_SIZE_ERROR_TEXT = `File must be less than ${MAX_AVATAR_SIZE / 1000000} MB`;
 export const FILE_SIZE_ERROR_TEXT = `File must be less than ${MAX_FILE_SIZE / 1000000} MB`;
 export const AUDIO_FILE_SIZE_ERROR_TEXT = `Audio file must be less than ${MAX_AUDIO_FILE_SIZE / 1000000} MB`;
-export const INVALID_FILE_TYPE_ERROR_TEXT = `Invalid file type, accepted types are: ${ACCEPTED_FILE_TYPES.join(', ')}`;
+export const INVALID_FILE_TYPE_ERROR_TEXT = `Invalid file type, accepted types are: ${ACCEPTED_DOC_AND_AUDIO_FILE_TYPES.join(', ')}`;
 export const INVALID_AUDIO_FILE_TYPE_ERROR_TEXT = `Invalid file type, accepted types are: ${ACCEPTED_AUDIO_FILE_TYPES.join(', ')}`;
 export const NO_SELECTED_ASSISTANT_ID = 'noSelectedAssistantId';
 
diff --git a/src/leapfrogai_ui/src/lib/constants/toastMessages.ts b/src/leapfrogai_ui/src/lib/constants/toastMessages.ts
index e431348a5..5bcadadc8 100644
--- a/src/leapfrogai_ui/src/lib/constants/toastMessages.ts
+++ b/src/leapfrogai_ui/src/lib/constants/toastMessages.ts
@@ -19,7 +19,7 @@ export const ERROR_GETTING_ASSISTANT_MSG_TOAST = (
 ): ToastData => ({
   kind: 'error',
   title: 'Error',
-  subtitle: 'Error getting Assistant Response',
+  subtitle: 'Error getting assistant response',
   ...override
 });
 
diff --git a/src/leapfrogai_ui/src/lib/helpers/chatHelpers.ts b/src/leapfrogai_ui/src/lib/helpers/chatHelpers.ts
index 72db4dd58..ef5961ea5 100644
--- a/src/leapfrogai_ui/src/lib/helpers/chatHelpers.ts
+++ b/src/leapfrogai_ui/src/lib/helpers/chatHelpers.ts
@@ -263,3 +263,11 @@ export const getCitations = (message: OpenAIMessage, files: FileObject[]) => {
   }
   return [];
 };
+
+export const refetchThread = async (threadId: string) => {
+  const res = await fetch(`/api/threads/${threadId}`);
+  if (res.ok) {
+    const thread = await res.json();
+    threadsStore.updateThread(thread);
+  }
+};
diff --git a/src/leapfrogai_ui/src/lib/helpers/fileHelpers.ts b/src/leapfrogai_ui/src/lib/helpers/fileHelpers.ts
index a0cd0fc5b..b6d229336 100644
--- a/src/leapfrogai_ui/src/lib/helpers/fileHelpers.ts
+++ b/src/leapfrogai_ui/src/lib/helpers/fileHelpers.ts
@@ -1,11 +1,10 @@
-import type { FileMetadata, FileRow } from '$lib/types/files';
+import type { FileMetadata, LFFileObject } from '$lib/types/files';
 import type { FileObject } from 'openai/resources/files';
 import { FILE_CONTEXT_TOO_LARGE_ERROR_MSG } from '$constants/errors';
 
-export const convertFileObjectToFileRows = (files: FileObject[]): FileRow[] =>
+export const convertFileObjectToLFFileObject = (files: FileObject[]): LFFileObject[] =>
   files.map((file) => ({
-    id: file.id,
-    filename: file.filename,
+    ...file,
     created_at: file.created_at * 1000,
     status: 'hide'
   }));
diff --git a/src/leapfrogai_ui/src/lib/mocks/file-mocks.ts b/src/leapfrogai_ui/src/lib/mocks/file-mocks.ts
index f4ff4460f..88fa6d566 100644
--- a/src/leapfrogai_ui/src/lib/mocks/file-mocks.ts
+++ b/src/leapfrogai_ui/src/lib/mocks/file-mocks.ts
@@ -78,7 +78,7 @@ export const mockConvertFileErrorNoId = () => {
 
 export const mockDeleteCheck = (assistantsToReturn: LFAssistant[]) => {
   server.use(
-    http.post('/api/files/delete-check', async () => {
+    http.post('/api/files/delete/check', async () => {
       await delay(100);
       return HttpResponse.json(assistantsToReturn);
     })
diff --git a/src/leapfrogai_ui/src/lib/stores/assistantsStore.ts b/src/leapfrogai_ui/src/lib/stores/assistantsStore.ts
new file mode 100644
index 000000000..b0356c576
--- /dev/null
+++ b/src/leapfrogai_ui/src/lib/stores/assistantsStore.ts
@@ -0,0 +1,57 @@
+import { writable } from 'svelte/store';
+import type { LFAssistant } from '$lib/types/assistants';
+import { NO_SELECTED_ASSISTANT_ID } from '$constants';
+
+type AssistantsStore = {
+  assistants: LFAssistant[];
+  selectedAssistantId?: string;
+};
+
+const defaultValues: AssistantsStore = {
+  assistants: [],
+  selectedAssistantId: NO_SELECTED_ASSISTANT_ID
+};
+const createAssistantsStore = () => {
+  const { subscribe, set, update } = writable<AssistantsStore>({ ...defaultValues });
+
+  return {
+    subscribe,
+    set,
+    update,
+    setAssistants: (newAssistants: LFAssistant[]) => {
+      update((old) => ({ ...old, assistants: newAssistants }));
+    },
+    setSelectedAssistantId: (selectedAssistantId: string) => {
+      update((old) => {
+        return { ...old, selectedAssistantId };
+      });
+    },
+    addAssistant: (newAssistant: LFAssistant) => {
+      update((old) => ({ ...old, assistants: [...old.assistants, newAssistant] }));
+    },
+    removeAssistant: (id: string) => {
+      update((old) => {
+        const updatedAssistants = [...old.assistants];
+        const assistantIndex = updatedAssistants.findIndex((assistant) => assistant.id === id);
+        if (assistantIndex > -1) {
+          updatedAssistants.splice(assistantIndex, 1);
+        }
+        return { ...old, assistants: updatedAssistants };
+      });
+    },
+    updateAssistant: (newAssistant: LFAssistant) => {
+      update((old) => {
+        const updatedAssistants = [...old.assistants];
+        const assistantIndex = updatedAssistants.findIndex(
+          (assistant) => assistant.id === newAssistant.id
+        );
+        if (assistantIndex > -1) {
+          updatedAssistants[assistantIndex] = newAssistant;
+        }
+        return { ...old, assistants: updatedAssistants };
+      });
+    }
+  };
+};
+const assistantsStore = createAssistantsStore();
+export default assistantsStore;
diff --git a/src/leapfrogai_ui/src/lib/stores/filesStore.ts b/src/leapfrogai_ui/src/lib/stores/filesStore.ts
index c6ba33db8..5e0eeea19 100644
--- a/src/leapfrogai_ui/src/lib/stores/filesStore.ts
+++ b/src/leapfrogai_ui/src/lib/stores/filesStore.ts
@@ -1,14 +1,16 @@
 import { derived, writable } from 'svelte/store';
 import type { FileObject } from 'openai/resources/files';
-import type { FileRow } from '$lib/types/files';
+import type { LFFileObject, PendingOrErrorFile } from '$lib/types/files';
 import { toastStore } from '$stores/index';
+import { getUnixSeconds } from '$helpers/dates';
 
 type FilesStore = {
-  files: FileRow[];
+  files: LFFileObject[];
   selectedFileManagementFileIds: string[];
   selectedAssistantFileIds: string[];
   uploading: boolean;
-  pendingUploads: FileRow[];
+  pendingUploads: PendingOrErrorFile[];
+  needsUpdate?: boolean;
 };
 
 const defaultValues: FilesStore = {
@@ -16,7 +18,8 @@ const defaultValues: FilesStore = {
   selectedFileManagementFileIds: [],
   selectedAssistantFileIds: [],
   uploading: false,
-  pendingUploads: []
+  pendingUploads: [],
+  needsUpdate: false
 };
 
 const createFilesStore = () => {
@@ -27,16 +30,32 @@ const createFilesStore = () => {
     set,
     update,
     setUploading: (status: boolean) => update((old) => ({ ...old, uploading: status })),
-
-    setFiles: (newFiles: FileRow[]) => {
+    removeFile: (id: string) => {
+      update((old) => {
+        const updatedFiles = [...old.files];
+        const fileIndex = updatedFiles.findIndex((file) => file.id === id);
+        if (fileIndex > -1) {
+          updatedFiles.splice(fileIndex, 1);
+        }
+        return { ...old, files: updatedFiles };
+      });
+    },
+    setFiles: (newFiles: LFFileObject[]) => {
       update((old) => ({ ...old, files: [...newFiles] }));
     },
-    setPendingUploads: (newFiles: FileRow[]) => {
+    setPendingUploads: (newFiles: LFFileObject[]) => {
       update((old) => ({ ...old, pendingUploads: [...newFiles] }));
     },
     setSelectedFileManagementFileIds: (newIds: string[]) => {
       update((old) => ({ ...old, selectedFileManagementFileIds: newIds }));
     },
+    setNeedsUpdate: (status: boolean) => {
+      update((old) => ({ ...old, needsUpdate: status }));
+    },
+    fetchFiles: async () => {
+      const files = await fetch('/api/files').then((res) => res.json());
+      update((old) => ({ ...old, files, needsUpdate: false }));
+    },
     addSelectedFileManagementFileId: (id: string) => {
       update((old) => ({
         ...old,
@@ -66,7 +85,7 @@ const createFilesStore = () => {
     },
     addUploadingFiles: (files: File[], { autoSelectUploadedFiles = false } = {}) => {
       update((old) => {
-        const newFiles: FileRow[] = [];
+        const newFiles: Pick<LFFileObject, 'id' | 'filename' | 'status' | 'created_at'>[] = [];
         const newFileIds: string[] = [];
         for (const file of files) {
           const id = `${file.name}-${new Date()}`; // temp id
@@ -74,7 +93,7 @@ const createFilesStore = () => {
             id,
             filename: file.name,
             status: 'uploading',
-            created_at: null
+            created_at: getUnixSeconds(new Date())
           });
           newFileIds.push(id);
         }
@@ -87,16 +106,14 @@ const createFilesStore = () => {
         };
       });
     },
-    updateWithUploadErrors: (newFiles: Array<FileObject | FileRow>) => {
+    updateWithUploadErrors: (newFiles: Array<FileObject | LFFileObject>) => {
       update((old) => {
-        const failedRows: FileRow[] = [];
+        const failedRows: LFFileObject[] = [];
 
         for (const file of newFiles) {
           if (file.status === 'error') {
-            const row: FileRow = {
-              id: file.id,
-              filename: file.filename,
-              created_at: file.created_at,
+            const row: LFFileObject = {
+              ...file,
               status: 'error'
             };
 
@@ -126,15 +143,13 @@ const createFilesStore = () => {
         };
       });
     },
-    updateWithUploadSuccess: (newFiles: Array<FileObject | FileRow>) => {
+    updateWithUploadSuccess: (newFiles: Array<FileObject | LFFileObject>) => {
       update((old) => {
         const successRows = [...old.files];
 
         for (const file of newFiles) {
-          const row: FileRow = {
-            id: file.id,
-            filename: file.filename,
-            created_at: file.created_at,
+          const row: LFFileObject = {
+            ...file,
             status: 'complete'
           };
 
diff --git a/src/leapfrogai_ui/src/lib/stores/index.ts b/src/leapfrogai_ui/src/lib/stores/index.ts
index 90cac2ebd..66da975b0 100644
--- a/src/leapfrogai_ui/src/lib/stores/index.ts
+++ b/src/leapfrogai_ui/src/lib/stores/index.ts
@@ -2,3 +2,4 @@ export { default as threadsStore } from './threads';
 export { default as toastStore } from './toast';
 export { default as uiStore } from './ui';
 export { default as filesStore } from './filesStore';
+export { default as assistantsStore } from './assistantsStore';
diff --git a/src/leapfrogai_ui/src/lib/stores/threads.ts b/src/leapfrogai_ui/src/lib/stores/threads.ts
index 0b9738fbb..a79c66f1a 100644
--- a/src/leapfrogai_ui/src/lib/stores/threads.ts
+++ b/src/leapfrogai_ui/src/lib/stores/threads.ts
@@ -1,6 +1,6 @@
 import { writable } from 'svelte/store';
-import { MAX_LABEL_SIZE, NO_SELECTED_ASSISTANT_ID } from '$lib/constants';
-import { goto, invalidate } from '$app/navigation';
+import { MAX_LABEL_SIZE } from '$lib/constants';
+import { goto } from '$app/navigation';
 import { error } from '@sveltejs/kit';
 import { type Message as VercelAIMessage } from '@ai-sdk/svelte';
 import { toastStore } from '$stores';
@@ -12,7 +12,6 @@ import type { Message } from 'ai';
 
 type ThreadsStore = {
   threads: LFThread[];
-  selectedAssistantId: string;
   sendingBlocked: boolean;
   lastVisitedThreadId: string;
   streamingMessage: VercelAIMessage | null;
@@ -20,7 +19,6 @@ type ThreadsStore = {
 
 const defaultValues: ThreadsStore = {
   threads: [],
-  selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
   sendingBlocked: false,
   lastVisitedThreadId: '',
   streamingMessage: null
@@ -97,11 +95,6 @@ const createThreadsStore = () => {
     setLastVisitedThreadId: (id: string) => {
       update((old) => ({ ...old, lastVisitedThreadId: id }));
     },
-    setSelectedAssistantId: (selectedAssistantId: string) => {
-      update((old) => {
-        return { ...old, selectedAssistantId };
-      });
-    },
     // Important - this method has a built in delay to ensure next user message has a different timestamp when setting to false (unblocking)
     setSendingBlocked: async (status: boolean) => {
       if (!status && process.env.NODE_ENV !== 'test') {
@@ -303,7 +296,6 @@ const createThreadsStore = () => {
           title: 'Error',
           subtitle: `Error deleting message.`
         });
-        await invalidate('lf:threads');
       }
     },
     updateThreadLabel: async (id: string, newLabel: string) => {
diff --git a/src/leapfrogai_ui/src/lib/types/files.d.ts b/src/leapfrogai_ui/src/lib/types/files.d.ts
index 599260041..17355cd32 100644
--- a/src/leapfrogai_ui/src/lib/types/files.d.ts
+++ b/src/leapfrogai_ui/src/lib/types/files.d.ts
@@ -1,16 +1,16 @@
 import type { SuperValidated } from 'sveltekit-superforms';
+import type { FileObject } from 'openai/resources/files';
 
 export type FileUploadStatus = 'uploading' | 'complete' | 'error' | 'hide';
 
 export type VectorStatus = 'in_progress' | 'completed' | 'cancelled' | 'failed';
 
-export type FileRow = {
-  id: string;
-  filename: string;
-  created_at: number | null;
+export type LFFileObject = Omit<FileObject, 'status'> & {
   status: FileUploadStatus;
 };
 
+export type PendingOrErrorFile = Pick<LFFileObject, 'id' | 'filename' | 'status' | 'created_at'>;
+
 // This type is taken from SuperValidated, leaving the any
 export type FilesForm = SuperValidated<
   { files?: (File | null | undefined)[] | undefined },
diff --git a/src/leapfrogai_ui/src/routes/api/api-keys/delete/+server.ts b/src/leapfrogai_ui/src/routes/api/api-keys/delete/+server.ts
index 785c289ac..eacdd3b2d 100644
--- a/src/leapfrogai_ui/src/routes/api/api-keys/delete/+server.ts
+++ b/src/leapfrogai_ui/src/routes/api/api-keys/delete/+server.ts
@@ -10,7 +10,6 @@ export const DELETE: RequestHandler = async ({ request, locals: { session } }) =
   if (!session) {
     error(401, 'Unauthorized');
   }
-
   let requestData: { ids: string };
 
   // Validate request body
diff --git a/src/leapfrogai_ui/src/routes/api/chat/assistants/+server.ts b/src/leapfrogai_ui/src/routes/api/chat/assistants/+server.ts
index b5152cc50..20558f455 100644
--- a/src/leapfrogai_ui/src/routes/api/chat/assistants/+server.ts
+++ b/src/leapfrogai_ui/src/routes/api/chat/assistants/+server.ts
@@ -46,7 +46,6 @@ export const POST: RequestHandler = async ({ request, locals: { session } }) =>
             throw new Error('assistant_id is not set');
           })()
       });
-
       // forward run status would stream message deltas
       let runResult = await forwardStream(runStream);
 
diff --git a/src/leapfrogai_ui/src/routes/api/files/delete/+server.ts b/src/leapfrogai_ui/src/routes/api/files/delete/+server.ts
index 935195842..e8942d8da 100644
--- a/src/leapfrogai_ui/src/routes/api/files/delete/+server.ts
+++ b/src/leapfrogai_ui/src/routes/api/files/delete/+server.ts
@@ -8,7 +8,6 @@ export const DELETE: RequestHandler = async ({ request, locals: { session } }) =
     error(401, 'Unauthorized');
   }
   let requestData: { ids: string[] };
-
   // Validate request body
   try {
     requestData = await request.json();
diff --git a/src/leapfrogai_ui/src/routes/api/files/delete-check/+server.ts b/src/leapfrogai_ui/src/routes/api/files/delete/check/+server.ts
similarity index 100%
rename from src/leapfrogai_ui/src/routes/api/files/delete-check/+server.ts
rename to src/leapfrogai_ui/src/routes/api/files/delete/check/+server.ts
diff --git a/src/leapfrogai_ui/src/routes/api/files/delete-check/server.test.ts b/src/leapfrogai_ui/src/routes/api/files/delete/check/server.test.ts
similarity index 86%
rename from src/leapfrogai_ui/src/routes/api/files/delete-check/server.test.ts
rename to src/leapfrogai_ui/src/routes/api/files/delete/check/server.test.ts
index 1f6bb19bc..f78b142e9 100644
--- a/src/leapfrogai_ui/src/routes/api/files/delete-check/server.test.ts
+++ b/src/leapfrogai_ui/src/routes/api/files/delete/check/server.test.ts
@@ -1,5 +1,5 @@
 import { POST } from './+server';
-import { mockOpenAI } from '../../../../../vitest-setup';
+import { mockOpenAI } from '../../../../../../vitest-setup';
 import {
   getFakeAssistant,
   getFakeFiles,
@@ -7,11 +7,11 @@ import {
   getFakeVectorStoreFile
 } from '$testUtils/fakeData';
 import type { RequestEvent } from '@sveltejs/kit';
-import type { RouteParams } from '../../../../../.svelte-kit/types/src/routes/api/messages/new/$types';
+import type { RouteParams } from './$types';
 import { getLocalsMock } from '$lib/mocks/misc';
 
 const validMessageBody = { fileIds: ['file1', 'file2'] };
-describe('/api/files/delete-check', () => {
+describe('/api/files/delete/check', () => {
   it('returns a 401 when there is no session', async () => {
     const request = new Request('http://thisurlhasnoeffect', {
       method: 'POST',
@@ -22,7 +22,7 @@ describe('/api/files/delete-check', () => {
       POST({
         request,
         locals: getLocalsMock({ nullSession: true })
-      } as RequestEvent<RouteParams, '/api/files/delete-check'>)
+      } as RequestEvent<RouteParams, '/api/files/delete/check'>)
     ).rejects.toMatchObject({
       status: 401
     });
@@ -39,7 +39,7 @@ describe('/api/files/delete-check', () => {
       POST({
         request,
         locals: getLocalsMock()
-      } as RequestEvent<RouteParams, '/api/files/delete-check'>)
+      } as RequestEvent<RouteParams, '/api/files/delete/check'>)
     ).rejects.toMatchObject({
       status: 400
     });
@@ -54,7 +54,7 @@ describe('/api/files/delete-check', () => {
       POST({
         request,
         locals: getLocalsMock()
-      } as RequestEvent<RouteParams, '/api/files/delete-check'>)
+      } as RequestEvent<RouteParams, '/api/files/delete/check'>)
     ).rejects.toMatchObject({
       status: 400
     });
@@ -69,7 +69,7 @@ describe('/api/files/delete-check', () => {
       POST({
         request,
         locals: getLocalsMock()
-      } as RequestEvent<RouteParams, '/api/files/delete-check'>)
+      } as RequestEvent<RouteParams, '/api/files/delete/check'>)
     ).rejects.toMatchObject({
       status: 400
     });
@@ -84,7 +84,7 @@ describe('/api/files/delete-check', () => {
       POST({
         request,
         locals: getLocalsMock()
-      } as RequestEvent<RouteParams, '/api/files/delete-check'>)
+      } as RequestEvent<RouteParams, '/api/files/delete/check'>)
     ).rejects.toMatchObject({
       status: 400
     });
@@ -137,7 +137,7 @@ describe('/api/files/delete-check', () => {
     const res = await POST({
       request,
       locals: getLocalsMock()
-    } as RequestEvent<RouteParams, '/api/files/delete-check'>);
+    } as RequestEvent<RouteParams, '/api/files/delete/check'>);
 
     const resData = await res.json();
     expect(res.status).toEqual(200);
@@ -153,7 +153,7 @@ describe('/api/files/delete-check', () => {
     const res2 = await POST({
       request: request2,
       locals: getLocalsMock()
-    } as RequestEvent<RouteParams, '/api/files/delete-check'>);
+    } as RequestEvent<RouteParams, '/api/files/delete/check'>);
 
     const resData2 = await res2.json();
     expect(res2.status).toEqual(200);
@@ -173,7 +173,7 @@ describe('/api/files/delete-check', () => {
       POST({
         request,
         locals: getLocalsMock()
-      } as RequestEvent<RouteParams, '/api/files/delete-check'>)
+      } as RequestEvent<RouteParams, '/api/files/delete/check'>)
     ).rejects.toMatchObject({
       status: 500
     });
diff --git a/src/leapfrogai_ui/src/routes/api/helpers.ts b/src/leapfrogai_ui/src/routes/api/helpers.ts
new file mode 100644
index 000000000..c64bfe611
--- /dev/null
+++ b/src/leapfrogai_ui/src/routes/api/helpers.ts
@@ -0,0 +1,18 @@
+import type { LFThread } from '$lib/types/threads';
+import { getOpenAiClient } from '$lib/server/constants';
+import type { LFMessage } from '$lib/types/messages';
+
+export const getThreadWithMessages = async (
+  thread_id: string,
+  access_token: string
+): Promise<LFThread | null> => {
+  const openai = getOpenAiClient(access_token);
+  const thread = (await openai.beta.threads.retrieve(thread_id)) as LFThread;
+  if (!thread) {
+    return null;
+  }
+  const messagesPage = await openai.beta.threads.messages.list(thread.id);
+  const messages = messagesPage.data as LFMessage[];
+  messages.sort((a, b) => a.created_at - b.created_at);
+  return { ...thread, messages: messages };
+};
diff --git a/src/leapfrogai_ui/src/routes/api/threads/+server.ts b/src/leapfrogai_ui/src/routes/api/threads/+server.ts
new file mode 100644
index 000000000..8158bab7a
--- /dev/null
+++ b/src/leapfrogai_ui/src/routes/api/threads/+server.ts
@@ -0,0 +1,45 @@
+import type { RequestHandler } from './$types';
+import { error, json } from '@sveltejs/kit';
+import type { Profile } from '$lib/types/profile';
+import type { LFThread } from '$lib/types/threads';
+import { getThreadWithMessages } from '../helpers';
+
+export const GET: RequestHandler = async ({ locals: { session, supabase, user } }) => {
+  if (!session) {
+    error(401, 'Unauthorized');
+  }
+
+  const { data: profile, error: profileError } = await supabase
+    .from('profiles')
+    .select(`*`)
+    .eq('id', user?.id)
+    .returns<Profile[]>()
+    .single();
+
+  if (profileError) {
+    console.error(
+      `error getting user profile for user_id: ${user?.id}. ${JSON.stringify(profileError)}`
+    );
+    error(500, 'Internal Error');
+  }
+
+  const threads: LFThread[] = [];
+  if (profile?.thread_ids && profile?.thread_ids.length > 0) {
+    try {
+      const threadPromises = profile.thread_ids.map((thread_id) =>
+        getThreadWithMessages(thread_id, session.access_token)
+      );
+      const results = await Promise.allSettled(threadPromises);
+      results.forEach((result) => {
+        if (result.status === 'fulfilled' && result.value) {
+          threads.push(result.value);
+        }
+      });
+    } catch (e) {
+      console.error(`Error fetching threads: ${e}`);
+      return json([]);
+    }
+  }
+
+  return json(threads);
+};
diff --git a/src/leapfrogai_ui/src/routes/api/threads/[thread_id]/+server.ts b/src/leapfrogai_ui/src/routes/api/threads/[thread_id]/+server.ts
index 0a4a29f76..5c0c9f769 100644
--- a/src/leapfrogai_ui/src/routes/api/threads/[thread_id]/+server.ts
+++ b/src/leapfrogai_ui/src/routes/api/threads/[thread_id]/+server.ts
@@ -1,23 +1,6 @@
 import type { RequestHandler } from './$types';
 import { error, json } from '@sveltejs/kit';
-import { getOpenAiClient } from '$lib/server/constants';
-import type { LFThread } from '$lib/types/threads';
-import type { LFMessage } from '$lib/types/messages';
-
-const getThreadWithMessages = async (
-  thread_id: string,
-  access_token: string
-): Promise<LFThread | null> => {
-  const openai = getOpenAiClient(access_token);
-  const thread = (await openai.beta.threads.retrieve(thread_id)) as LFThread;
-  if (!thread) {
-    return null;
-  }
-  const messagesPage = await openai.beta.threads.messages.list(thread.id);
-  const messages = messagesPage.data as LFMessage[];
-  messages.sort((a, b) => a.created_at - b.created_at);
-  return { ...thread, messages: messages };
-};
+import { getThreadWithMessages } from '../../helpers';
 
 export const GET: RequestHandler = async ({ params, locals: { session } }) => {
   if (!session) {
diff --git a/src/leapfrogai_ui/src/routes/api/threads/server.test.ts b/src/leapfrogai_ui/src/routes/api/threads/server.test.ts
new file mode 100644
index 000000000..34c7dade9
--- /dev/null
+++ b/src/leapfrogai_ui/src/routes/api/threads/server.test.ts
@@ -0,0 +1,125 @@
+import { GET } from './+server';
+import { getLocalsMock } from '$lib/mocks/misc';
+import type { RequestEvent } from '@sveltejs/kit';
+import type { RouteParams } from './$types';
+import {
+  selectSingleReturnsMockError,
+  supabaseFromMockWrapper,
+  supabaseSelectSingleByIdMock
+} from '$lib/mocks/supabase-mocks';
+import { getFakeThread } from '$testUtils/fakeData';
+import { mockOpenAI } from '../../../../vitest-setup';
+import * as apiHelpers from '../helpers';
+
+const request = new Request('http://thisurlhasnoeffect', {
+  method: 'GET'
+});
+
+const thread1 = getFakeThread({ numMessages: 1 });
+const thread2 = getFakeThread({ numMessages: 2 });
+const fakeProfile = { thread_ids: [thread1.id, thread2.id] };
+
+describe('/api/threads', () => {
+  it('returns a 401 when there is no session', async () => {
+    await expect(
+      GET({
+        request,
+        locals: getLocalsMock({ nullSession: true })
+      } as RequestEvent<RouteParams, '/api/threads'>)
+    ).rejects.toMatchObject({
+      status: 401
+    });
+  });
+  it("returns a user's threads", async () => {
+    const thread1WithoutMessages = { ...thread1, messages: undefined };
+    const thread2WithoutMessages = { ...thread2, messages: undefined };
+
+    mockOpenAI.setThreads([thread1WithoutMessages, thread2WithoutMessages]);
+    mockOpenAI.setMessages([...(thread1.messages || []), ...(thread2.messages || [])]);
+
+    const res = await GET({
+      request,
+      locals: getLocalsMock({
+        supabase: supabaseFromMockWrapper({
+          ...supabaseSelectSingleByIdMock(fakeProfile)
+        })
+      })
+    } as RequestEvent<RouteParams, '/api/threads'>);
+
+    expect(res.status).toEqual(200);
+    const resJson = await res.json();
+    // Note - our fake threads already have messages attached, we are checking here that the
+    // API fetched the messages and added them to the threads since real threads don't have messages
+    expect(resJson[0].id).toEqual(thread1.id);
+    expect(resJson[0].messages).toEqual(thread1.messages);
+    expect(resJson[1].id).toEqual(thread2.id);
+    expect(resJson[1].messages).toEqual(thread2.messages);
+  });
+  it('still returns threads that were successfully retrieved when there is an error getting a thread', async () => {
+    mockOpenAI.setThreads([thread2]);
+    mockOpenAI.setError('retrieveThread'); // fail the first thread fetching
+    const res = await GET({
+      request,
+      locals: getLocalsMock({
+        supabase: supabaseFromMockWrapper({
+          ...supabaseSelectSingleByIdMock(fakeProfile)
+        })
+      })
+    } as RequestEvent<RouteParams, '/api/threads'>);
+
+    expect(res.status).toEqual(200);
+    const resJson = await res.json();
+    expect(resJson[0].id).toEqual(thread2.id);
+  });
+  it('still returns threads that were successfully retrieved when there is an error getting messages for a thread', async () => {
+    mockOpenAI.setThreads([thread1, thread2]);
+    mockOpenAI.setError('listMessages'); // fail the first thread's message fetching
+    const res = await GET({
+      request,
+      locals: getLocalsMock({
+        supabase: supabaseFromMockWrapper({
+          ...supabaseSelectSingleByIdMock(fakeProfile)
+        })
+      })
+    } as RequestEvent<RouteParams, '/api/threads'>);
+
+    expect(res.status).toEqual(200);
+    const resJson = await res.json();
+    expect(resJson[0].id).toEqual(thread2.id);
+  });
+  it('returns an empty array if there is an unhandled error fetching threads', async () => {
+    vi.spyOn(apiHelpers, 'getThreadWithMessages').mockImplementationOnce(() => {
+      throw new Error('fake error');
+    });
+    const consoleSpy = vi.spyOn(console, 'error');
+
+    const res = await GET({
+      request,
+      locals: getLocalsMock({
+        supabase: supabaseFromMockWrapper({
+          ...supabaseSelectSingleByIdMock(fakeProfile)
+        })
+      })
+    } as RequestEvent<RouteParams, '/api/threads'>);
+
+    expect(res.status).toEqual(200);
+    const resJson = await res.json();
+    expect(resJson).toEqual([]);
+    // ensure we hit the correct catch block/error case with this test
+    expect(consoleSpy).toHaveBeenCalledWith('Error fetching threads: Error: fake error');
+  });
+  it("returns a 500 is an error getting the user's profile", async () => {
+    await expect(
+      GET({
+        request,
+        locals: getLocalsMock({
+          supabase: supabaseFromMockWrapper({
+            ...selectSingleReturnsMockError()
+          })
+        })
+      } as RequestEvent<RouteParams, '/api/threads'>)
+    ).rejects.toMatchObject({
+      status: 500
+    });
+  });
+});
diff --git a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.svelte b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.svelte
index f082615c5..113883c5a 100644
--- a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.svelte
+++ b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.svelte
@@ -3,7 +3,7 @@
   import { LFTextArea, PoweredByDU } from '$components';
   import { Hr, ToolbarButton } from 'flowbite-svelte';
   import { onMount, tick } from 'svelte';
-  import { threadsStore, toastStore } from '$stores';
+  import { assistantsStore, threadsStore, toastStore } from '$stores';
   import { type Message as VercelAIMessage, useAssistant, useChat } from '@ai-sdk/svelte';
   import { page } from '$app/stores';
   import Message from '$components/Message.svelte';
@@ -13,6 +13,7 @@
   import { twMerge } from 'tailwind-merge';
   import {
     isRunAssistantMessage,
+    refetchThread,
     resetMessages,
     saveMessage,
     stopThenSave
@@ -29,35 +30,32 @@
   import ChatFileUploadForm from '$components/ChatFileUpload.svelte';
   import FileChatActions from '$components/FileChatActions.svelte';
   import LFCarousel from '$components/LFCarousel.svelte';
+  import { ASSISTANT_ERROR_MSG } from '$constants/errors';
+  import { delay } from 'msw';
+  import type { LFThread } from '$lib/types/threads';
 
   export let data;
 
   /** LOCAL VARS **/
   let lengthInvalid: boolean; // bound to child LFTextArea
-  let assistantsList: Array<{ id: string; text: string }>;
   let uploadingFiles = false;
   let attachedFiles: LFFile[] = []; // the actual files uploaded
   let attachedFileMetadata: FileMetadata[] = []; // metadata about the files uploaded, e.g. upload status, extracted text, etc...
+  let activeThread: LFThread | undefined = undefined;
   /** END LOCAL VARS **/
 
   /** REACTIVE STATE **/
   $: componentHasMounted = false;
-  $: $page.params.thread_id, threadsStore.setLastVisitedThreadId($page.params.thread_id);
-  $: $page.params.thread_id,
-    resetMessages({
-      activeThread: data.thread,
-      setChatMessages,
-      setAssistantMessages
-    });
-
-  $: activeThreadMessages =
-    $threadsStore.threads.find((thread) => thread.id === $page.params.thread_id)?.messages || [];
+  $: activeThread = $threadsStore.threads.find(
+    (thread: LFThread) => thread.id === $page.params.thread_id
+  );
+  $: $page.params.thread_id, handleThreadChange();
   $: messageStreaming = $isLoading || $status === 'in_progress';
   $: latestChatMessage = $chatMessages[$chatMessages.length - 1];
   $: latestAssistantMessage = $assistantMessages[$assistantMessages.length - 1];
   $: assistantMode =
-    $threadsStore.selectedAssistantId !== NO_SELECTED_ASSISTANT_ID &&
-    $threadsStore.selectedAssistantId !== 'manage-assistants';
+    $assistantsStore.selectedAssistantId !== NO_SELECTED_ASSISTANT_ID &&
+    $assistantsStore.selectedAssistantId !== 'manage-assistants';
 
   $: if (messageStreaming) threadsStore.setSendingBlocked(true);
 
@@ -76,8 +74,30 @@
     resetFiles(); // attachment of files w/assistants disabled
   }
 
+  $: if ($assistantError) handleAssistantResponseError();
+
   /** END REACTIVE STATE **/
 
+  const handleThreadChange = () => {
+    if ($page.params.thread_id) {
+      if (activeThread) {
+        threadsStore.setLastVisitedThreadId(activeThread.id);
+        resetMessages({
+          activeThread,
+          setChatMessages,
+          setAssistantMessages
+        });
+      }
+    } else {
+      threadsStore.setLastVisitedThreadId('');
+      resetMessages({
+        activeThread,
+        setChatMessages,
+        setAssistantMessages
+      });
+    }
+  };
+
   const resetFiles = () => {
     uploadingFiles = false;
     attachedFileMetadata = [];
@@ -100,13 +120,13 @@
         );
         const message = await messageRes.json();
         // store the assistant id on the user msg to know it's associated with an assistant
-        message.metadata.assistant_id = $threadsStore.selectedAssistantId;
+        message.metadata.assistant_id = $assistantsStore.selectedAssistantId;
         await threadsStore.addMessageToStore(message);
       } else if (latestAssistantMessage?.role !== 'user') {
         // Streamed assistant responses don't contain an assistant_id, so we add it here
         // and also add a createdAt date if not present
         if (!latestAssistantMessage.assistant_id) {
-          latestAssistantMessage.assistant_id = $threadsStore.selectedAssistantId;
+          latestAssistantMessage.assistant_id = $assistantsStore.selectedAssistantId;
         }
 
         if (!latestAssistantMessage.createdAt)
@@ -121,16 +141,62 @@
 
   const handleCompletedAssistantResponse = async () => {
     if (componentHasMounted && $status === 'awaiting_message') {
-      const assistantResponseId = $assistantMessages[$assistantMessages.length - 1].id;
+      if ($assistantError) return;
+      if (latestAssistantMessage.role === 'user') {
+        await handleAssistantResponseError();
+        return;
+      }
+
+      const assistantResponseId = latestAssistantMessage.id;
       const messageRes = await fetch(
         `/api/messages?thread_id=${$page.params.thread_id}&message_id=${assistantResponseId}`
       );
+      if (!messageRes.ok) {
+        //useAssistants onError hook will handle this
+        return;
+      }
+
       const message = await messageRes.json();
-      await threadsStore.addMessageToStore(message);
-      threadsStore.setStreamingMessage(null);
+      if (message && !getMessageText(message)) {
+        // error with response(empty response)/timeout
+        await handleAssistantResponseError();
+      } else {
+        await threadsStore.addMessageToStore(message);
+        threadsStore.setStreamingMessage(null);
+      }
     }
   };
 
+  const createAssistantErrorResponse = async () => {
+    await delay(1000); // ensure error response timestamp is after user's msg
+    const newMessage = await saveMessage({
+      thread_id: data.thread.id,
+      content: ASSISTANT_ERROR_MSG,
+      role: 'assistant',
+      metadata: {
+        assistant_id: latestAssistantMessage.assistant_id || $threadsStore.selectedAssistantId
+      }
+    });
+
+    await threadsStore.addMessageToStore(newMessage);
+  };
+
+  const handleAssistantResponseError = async () => {
+    await refetchThread($page.params.thread_id); // if there was an error in the stream, we need to re-fetch to get the user's msg from the db
+    toastStore.addToast({
+      ...ERROR_GETTING_ASSISTANT_MSG_TOAST()
+    });
+    if (latestAssistantMessage.role === 'assistant') {
+      await threadsStore.deleteMessage($page.params.thread_id, latestAssistantMessage.id);
+      threadsStore.removeMessageFromStore($page.params.thread_id, latestAssistantMessage.id);
+      $assistantMessages = [...$assistantMessages.splice(-1)];
+    }
+    await createAssistantErrorResponse();
+
+    threadsStore.setStreamingMessage(null);
+    await threadsStore.setSendingBlocked(false);
+  };
+
   /** useChat - streams messages with the /api/chat route**/
   const {
     input: chatInput,
@@ -144,10 +210,10 @@
     // Handle completed AI Responses
     onFinish: async (message: VercelAIMessage) => {
       try {
-        if (data.thread?.id) {
+        if (activeThread?.id) {
           // Save with API to db
           const newMessage = await saveMessage({
-            thread_id: data.thread.id,
+            thread_id: activeThread.id,
             content: getMessageText(message),
             role: 'assistant'
           });
@@ -180,24 +246,16 @@
     submitMessage: submitAssistantMessage,
     stop: assistantStop,
     setMessages: setAssistantMessages,
-    append: assistantAppend
+    append: assistantAppend,
+    error: assistantError
   } = useAssistant({
     api: '/api/chat/assistants',
-    threadId: data.thread?.id,
-    onError: async (e) => {
-      // ignore this error b/c it is expected on cancel
-      if (e.message !== 'BodyStreamBuffer was aborted') {
-        toastStore.addToast({
-          ...ERROR_GETTING_ASSISTANT_MSG_TOAST()
-        });
-      }
-      await threadsStore.setSendingBlocked(false);
-    }
+    threadId: activeThread?.id
   });
 
   const sendAssistantMessage = async (e: SubmitEvent | KeyboardEvent) => {
     await threadsStore.setSendingBlocked(true);
-    if (data.thread?.id) {
+    if (activeThread?.id) {
       // assistant mode
       $assistantInput = $chatInput;
       $chatInput = ''; // clear chat input
@@ -206,8 +264,8 @@
         // submit to AI (/api/chat/assistants)
         data: {
           message: $chatInput,
-          assistantId: $threadsStore.selectedAssistantId,
-          threadId: data.thread.id
+          assistantId: $assistantsStore.selectedAssistantId,
+          threadId: activeThread.id
         }
       });
       $assistantInput = '';
@@ -218,13 +276,13 @@
   const sendChatMessage = async (e: SubmitEvent | KeyboardEvent) => {
     try {
       await threadsStore.setSendingBlocked(true);
-      if (data.thread?.id) {
+      if (activeThread?.id) {
         let extractedFilesTextString = JSON.stringify(attachedFileMetadata);
 
         if (attachedFileMetadata.length > 0) {
           // Save the text of the document as its own message before sending actual question
           const contextMsg = await saveMessage({
-            thread_id: data.thread.id,
+            thread_id: activeThread.id,
             content: `${FILE_UPLOAD_PROMPT}: ${extractedFilesTextString}`,
             role: 'user',
             metadata: {
@@ -237,7 +295,7 @@
 
         // Save with API
         const newMessage = await saveMessage({
-          thread_id: data.thread.id,
+          thread_id: activeThread.id,
           content: $chatInput,
           role: 'user',
           ...(attachedFileMetadata.length > 0
@@ -270,11 +328,11 @@
   // setSendingBlocked (when called with the value 'false') automatically handles this delay
   const onSubmit = async (e: SubmitEvent | KeyboardEvent) => {
     e.preventDefault();
-    if (($isLoading || $status === 'in_progress') && data.thread?.id) {
+    if (($isLoading || $status === 'in_progress') && activeThread?.id) {
       const isAssistantChat = $status === 'in_progress';
       // message still sending
       await stopThenSave({
-        activeThreadId: data.thread.id,
+        activeThreadId: activeThread.id,
         messages: isAssistantChat ? $assistantMessages : $chatMessages,
         status: $status,
         isLoading: $isLoading || false,
@@ -285,7 +343,7 @@
       return;
     } else {
       if (sendDisabled) return;
-      if (!data.thread?.id) {
+      if (!activeThread?.id) {
         // create new thread
         await threadsStore.newThread($chatInput);
         await tick(); // allow store to update
@@ -305,19 +363,13 @@
 
   onMount(async () => {
     componentHasMounted = true;
-    assistantsList = [...(data.assistants || [])].map((assistant) => ({
-      id: assistant.id,
-      text: assistant.name || 'unknown'
-    }));
-    assistantsList.unshift({ id: NO_SELECTED_ASSISTANT_ID, text: 'Select assistant...' }); // add dropdown item for no assistant selected
-    assistantsList.unshift({ id: `manage-assistants`, text: 'Manage assistants' }); // add dropdown item for manage assistants button
   });
 
   beforeNavigate(async () => {
-    if (($isLoading || $status === 'in_progress') && data.thread?.id) {
+    if (($isLoading || $status === 'in_progress') && activeThread?.id) {
       const isAssistantChat = $status === 'in_progress';
       await stopThenSave({
-        activeThreadId: data.thread.id,
+        activeThreadId: activeThread.id,
         messages: isAssistantChat ? $assistantMessages : $chatMessages,
         status: $status,
         isLoading: $isLoading || false,
@@ -331,19 +383,21 @@
 <form on:submit={onSubmit} class="flex h-full flex-col">
   <div class="no-scrollbar flex flex-grow flex-col-reverse overflow-auto px-8">
     <div id="messages-container" data-testid="messages-container">
-      {#each activeThreadMessages as message, index (message.id)}
-        {#if message.metadata?.hideMessage !== 'true'}
-          <Message
-            messages={activeThreadMessages}
-            streamedMessages={isRunAssistantMessage(message) ? $assistantMessages : $chatMessages}
-            {message}
-            isLastMessage={!$threadsStore.streamingMessage &&
-              index === activeThreadMessages.length - 1}
-            append={assistantMode ? assistantAppend : chatAppend}
-            setMessages={isRunAssistantMessage(message) ? setAssistantMessages : setChatMessages}
-          />
-        {/if}
-      {/each}
+      {#if activeThread}
+        {#each activeThread.messages as message, index (message.id)}
+          {#if message.metadata?.hideMessage !== 'true'}
+            <Message
+              messages={activeThread.messages}
+              streamedMessages={isRunAssistantMessage(message) ? $assistantMessages : $chatMessages}
+              {message}
+              isLastMessage={!$threadsStore.streamingMessage &&
+                index === activeThread.messages.length - 1}
+              append={assistantMode ? assistantAppend : chatAppend}
+              setMessages={isRunAssistantMessage(message) ? setAssistantMessages : setChatMessages}
+            />
+          {/if}
+        {/each}
+      {/if}
 
       {#if $threadsStore.streamingMessage}
         <Message message={$threadsStore.streamingMessage} isLastMessage />
@@ -352,7 +406,7 @@
   </div>
   <Hr classHr="my-2" />
   <div id="chat-tools" data-testid="chat-tools" class="flex flex-col gap-2 px-8">
-    <SelectAssistantDropdown assistants={data?.assistants || []} />
+    <SelectAssistantDropdown />
 
     <div
       class={twMerge(
@@ -414,7 +468,7 @@
       </div>
       <FileChatActions
         bind:attachedFileMetadata
-        threadId={data.thread?.id}
+        threadId={activeThread?.id}
         bind:attachedFiles
         originalMessages={$chatMessages}
         setMessages={setChatMessages}
diff --git a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.ts b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.ts
deleted file mode 100644
index b3ab7fbec..000000000
--- a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/+page.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-import type { PageLoad } from './$types';
-import { browser } from '$app/environment';
-import type { LFThread } from '$lib/types/threads';
-import { threadsStore } from '$stores';
-
-export const load: PageLoad = async ({ params, fetch }) => {
-  const promises = [fetch('/api/assistants'), fetch('/api/files')];
-
-  if (params.thread_id) promises.push(fetch(`/api/threads/${params.thread_id}`));
-
-  const promiseResponses = await Promise.all(promises);
-
-  const assistants = await promiseResponses[0].json();
-  const files = await promiseResponses[1].json();
-
-  let thread: LFThread | undefined = undefined;
-  if (params.thread_id) {
-    thread = await promiseResponses[2].json();
-  }
-
-  if (browser) {
-    if (thread) {
-      // update store with latest thread fetched by page data
-      threadsStore.updateThread(thread);
-    }
-  }
-
-  return { thread, assistants, files };
-};
diff --git a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage.test.ts b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage.test.ts
index 0a3cefa37..21857b0e8 100644
--- a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage.test.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage.test.ts
@@ -17,7 +17,6 @@ import {
   mockNewMessageError
 } from '$lib/mocks/chat-mocks';
 import { getMessageText } from '$helpers/threads';
-import { load } from './+page';
 import { mockOpenAI } from '../../../../../vitest-setup';
 import { ERROR_GETTING_AI_RESPONSE_TOAST, ERROR_SAVING_MSG_TOAST } from '$constants/toastMessages';
 
@@ -27,7 +26,6 @@ import type { LFAssistant } from '$lib/types/assistants';
 import { delay } from '$helpers/chatHelpers';
 import { mockGetFiles } from '$lib/mocks/file-mocks';
 import { threadsStore } from '$stores';
-import { NO_SELECTED_ASSISTANT_ID } from '$constants';
 
 type LayoutServerLoad = {
   threads: LFThread[];
@@ -60,17 +58,9 @@ describe('when there is an active thread selected', () => {
     mockOpenAI.setMessages(allMessages);
     mockOpenAI.setFiles(files);
 
-    // @ts-expect-error: full mocking of load function params not necessary and is overcomplicated
-    data = await load({
-      fetch: global.fetch,
-      depends: vi.fn(),
-      params: { thread_id: fakeThreads[0].id }
-    });
-
     threadsStore.set({
       threads: fakeThreads,
       lastVisitedThreadId: fakeThreads[0].id,
-      selectedAssistantId: NO_SELECTED_ASSISTANT_ID,
       sendingBlocked: false,
       streamingMessage: null
     });
diff --git a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage_no_thread.test.ts b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage_no_thread.test.ts
index 71242a2b2..6ec9995cb 100644
--- a/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage_no_thread.test.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(dashboard)/[[thread_id]]/chatpage_no_thread.test.ts
@@ -8,7 +8,7 @@ import {
   mockNewMessage,
   mockNewThreadError
 } from '$lib/mocks/chat-mocks';
-import { load } from './+page';
+
 import { mockOpenAI } from '../../../../../vitest-setup';
 import ChatPageWithToast from './ChatPageWithToast.test.svelte';
 import type { LFThread } from '$lib/types/threads';
@@ -34,13 +34,6 @@ describe('when there is NO active thread selected', () => {
     mockOpenAI.setThreads(fakeThreads);
     mockOpenAI.setMessages(allMessages);
     mockOpenAI.setFiles(files);
-
-    // @ts-expect-error: full mocking of load function params not necessary and is overcomplicated
-    data = await load({
-      params: {},
-      fetch: global.fetch,
-      depends: vi.fn()
-    });
   });
 
   afterAll(() => {
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.server.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.server.ts
index 1cc33e4e8..ae0ec066c 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.server.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.server.ts
@@ -30,7 +30,6 @@ export const load: PageServerLoad = async ({ depends, locals: { session } }) =>
   if (!res.ok) {
     return error(500, { message: 'Error fetching API keys' });
   }
-
   keys = (await res.json()) as APIKeyRow[];
   // convert from seconds to milliseconds
   keys.forEach((key) => {
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.svelte b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.svelte
index e854a8e6f..413cf8e23 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.svelte
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/+page.svelte
@@ -137,7 +137,11 @@
       <!-- Button with color="alternative" adds two pixels to btn height, border-box does not prevent this. h-[42px] prevents slight screen jump-->
       <div class="h-[42px]">
         {#if editMode}
-          <div in:fade={{ duration: STANDARD_FADE_DURATION }} class="flex items-center gap-2">
+          <div
+            in:fade={{ duration: STANDARD_FADE_DURATION }}
+            class="flex items-center gap-2"
+            data-testid="table-actions"
+          >
             {#if deleting}
               <Button color="red" disabled>
                 <Spinner class="me-3" size="4" color="white" />Deleting...
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/api-keys.test.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/api-keys.test.ts
index f71ec554e..babc193a8 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/api-keys.test.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/api-keys/api-keys.test.ts
@@ -31,9 +31,8 @@ vi.mock('$app/stores', async (): Promise<typeof stores> => {
     data: {
       threads: fakeThreads,
       assistants: fakeAssistants,
-      assistant: undefined,
       files: [],
-      keys: fakeKeys
+      apiKeys: fakeKeys
     }
   });
 });
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/+page.svelte b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/+page.svelte
index 80d519742..bfaa85902 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/+page.svelte
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/+page.svelte
@@ -5,13 +5,12 @@
   import type { LFAssistant } from '$lib/types/assistants';
   import { Button, Heading, TableSearch } from 'flowbite-svelte';
   import { PlusOutline } from 'flowbite-svelte-icons';
-
-  export let data;
+  import { assistantsStore } from '$stores';
 
   let searchText = '';
   let searchResults: FuseResult<LFAssistant>[];
   let filteredAssistants: LFAssistant[] = [];
-  $: assistantsToDisplay = searchText ? filteredAssistants : data.assistants;
+  $: assistantsToDisplay = searchText ? filteredAssistants : $assistantsStore.assistants;
 
   const options: IFuseOptions<unknown> = {
     keys: ['name', 'description', 'instructions'],
@@ -23,7 +22,7 @@
   };
 
   $: if (searchText) {
-    const fuse = new Fuse(data.assistants, options);
+    const fuse = new Fuse($assistantsStore.assistants, options);
     searchResults = fuse.search(searchText);
     filteredAssistants = searchResults.map((result) => result.item);
   }
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/assistant_form.test.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/assistant_form.test.ts
index 548e01384..6fc83ce9e 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/assistant_form.test.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/assistant_form.test.ts
@@ -26,7 +26,7 @@ import { getLocalsMock } from '$lib/mocks/misc';
 import type { ActionFailure, RequestEvent } from '@sveltejs/kit';
 import type { RouteParams } from './$types';
 import filesStore from '$stores/filesStore';
-import { convertFileObjectToFileRows } from '$helpers/fileHelpers';
+import { convertFileObjectToLFFileObject } from '$helpers/fileHelpers';
 import vectorStatusStore from '$stores/vectorStatusStore';
 
 describe('Assistant Form', () => {
@@ -66,7 +66,7 @@ describe('Assistant Form', () => {
     mockOpenAI.setAssistants([assistant]);
     mockGetAssistants([]);
     mockGetFiles(files);
-    filesStore.setFiles(convertFileObjectToFileRows(files));
+    filesStore.setFiles(convertFileObjectToLFFileObject(files));
 
     vectorStatusStore.set({
       [files[0].id]: { [vectorStore.id]: 'completed' },
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/assistants-management-page.test.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/assistants-management-page.test.ts
index 10da18130..d7c8a8cee 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/assistants-management-page.test.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/assistants-management-page.test.ts
@@ -1,7 +1,7 @@
 import { render, screen } from '@testing-library/svelte';
 import AssistantsManagementPage from './+page.svelte';
-import { getFakeAssistant, getFakeSession } from '$testUtils/fakeData';
-import type { SupabaseClient } from '@supabase/supabase-js';
+import { getFakeAssistant } from '$testUtils/fakeData';
+import { assistantsStore } from '$stores';
 
 describe('Assistants management page', () => {
   // Assistant search tested with e2e
@@ -10,14 +10,8 @@ describe('Assistants management page', () => {
     const assistant1 = getFakeAssistant();
     const assistant2 = getFakeAssistant();
 
-    render(AssistantsManagementPage, {
-      data: {
-        assistants: [assistant1, assistant2],
-        session: getFakeSession(),
-        title: 'Fake title',
-        supabase: {} as unknown as SupabaseClient
-      }
-    });
+    assistantsStore.setAssistants([assistant1, assistant2]);
+    render(AssistantsManagementPage);
 
     screen.getByTestId(`assistant-card-${assistant1.name!}`);
     screen.getByTestId(`assistant-card-${assistant2.name!}`);
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/edit/[assistantId]/+page.server.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/edit/[assistantId]/+page.server.ts
index 551b697c8..697f07e03 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/edit/[assistantId]/+page.server.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/edit/[assistantId]/+page.server.ts
@@ -59,7 +59,7 @@ export const load: PageServerLoad = async ({ params, locals: { session } }) => {
   const form = await superValidate(assistantFormData, yup(editAssistantInputSchema));
   const filesForm = await superValidate({}, yup(filesSchema), { errors: false });
 
-  return { title: 'LeapfrogAI - Edit Assistant', form, filesForm, assistant };
+  return { title: 'LeapfrogAI - Edit Assistant', form, filesForm };
 };
 
 export const actions: Actions = {
@@ -152,9 +152,8 @@ export const actions: Actions = {
         }
       }
     }
-
     // Create assistant object
-    const assistant: AssistantCreateParams = {
+    const updatedAssistantParams: AssistantCreateParams = {
       name: form.data.name,
       description: form.data.description,
       instructions: form.data.instructions,
@@ -177,15 +176,19 @@ export const actions: Actions = {
     };
 
     // Update assistant
+    let updatedAssistant: LFAssistant;
     try {
-      await openai.beta.assistants.update(form.data.id, assistant);
+      updatedAssistant = (await openai.beta.assistants.update(
+        form.data.id,
+        updatedAssistantParams
+      )) as LFAssistant;
     } catch (e) {
       console.error(`Error updating assistant: ${e}`);
       return fail(500, { message: 'Error updating assistant.' });
     }
     return withFiles({
       form,
-      assistant,
+      assistant: updatedAssistant,
       fileIds: data_sources,
       redirectUrl: '/chat/assistants-management'
     });
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/new/+page.server.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/new/+page.server.ts
index 126f5958c..d1fcb2ce2 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/new/+page.server.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/assistants-management/new/+page.server.ts
@@ -102,15 +102,14 @@ export const actions: Actions = {
         console.error('Error saving assistant avatar:', error);
         return fail(500, { message: 'Error saving assistant avatar.' });
       }
-
       // update assistant with saved avatar path
       try {
-        await openai.beta.assistants.update(createdAssistant.id, {
+        createdAssistant = (await openai.beta.assistants.update(createdAssistant.id, {
           metadata: {
             ...(createdAssistant.metadata ? createdAssistant.metadata : undefined),
             avatar: getAssistantAvatarUrl(filePath)
           }
-        });
+        })) as LFAssistant;
       } catch (e) {
         console.error(`Error adding avatar to assistant: ${e}`);
         return fail(500, { message: 'Error adding avatar to assistant.' });
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/+page.server.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/+page.server.ts
index 41c55e07e..068cbe3be 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/+page.server.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/+page.server.ts
@@ -3,7 +3,7 @@ import type { Actions } from './$types';
 import { yup } from 'sveltekit-superforms/adapters';
 import type { FileObject } from 'openai/resources/files';
 import { filesSchema } from '$schemas/files';
-import type { FileRow } from '$lib/types/files';
+import type { LFFileObject, PendingOrErrorFile } from '$lib/types/files';
 import { getUnixSeconds } from '$helpers/dates';
 import { getOpenAiClient } from '$lib/server/constants';
 
@@ -31,6 +31,8 @@ export const actions: Actions = {
     if (form.data.files) {
       const fileId: string | null = null;
 
+      let uploadedFiles: Array<FileObject | LFFileObject | PendingOrErrorFile> = [];
+
       const uploadPromises = form.data.files.map(async (file) => {
         if (file) {
           try {
@@ -43,7 +45,7 @@ export const actions: Actions = {
             return uploadedFile;
           } catch (e) {
             console.error(`Error uploading file ${file.name}: ${e}`);
-            const item: FileRow = {
+            const item: PendingOrErrorFile = {
               id: `${file.name}-error-${new Date()}`,
               filename: file.name,
               created_at: getUnixSeconds(new Date()),
@@ -66,8 +68,8 @@ export const actions: Actions = {
         }
         return null;
       });
-      const uploadedFiles = (await Promise.all(uploadPromises)).filter(
-        (file): file is FileObject | FileRow => file !== null
+      uploadedFiles = (await Promise.all(uploadPromises)).filter(
+        (file): file is FileObject | LFFileObject => file !== null
       );
 
       return withFiles({ form, uploadedFiles });
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/+page.svelte b/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/+page.svelte
index 1b8a36566..0867e7c1c 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/+page.svelte
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/+page.svelte
@@ -18,8 +18,8 @@
   import { convertToMilliseconds, formatDate } from '$helpers/dates';
   import { filesSchema } from '$schemas/files';
   import { filesStore, toastStore, uiStore } from '$stores';
-  import { ACCEPTED_FILE_TYPES, STANDARD_FADE_DURATION } from '$constants';
-  import { afterNavigate, invalidate } from '$app/navigation';
+  import { ACCEPTED_DOC_TYPES, STANDARD_FADE_DURATION } from '$constants';
+  import { beforeNavigate } from '$app/navigation';
   import type { Assistant } from 'openai/resources/beta/assistants';
   import { tableStyles } from '$lib/styles/tables';
   import { filterTable } from '$lib/utils/tables';
@@ -30,11 +30,12 @@
     CloseCircleOutline,
     UploadOutline
   } from 'flowbite-svelte-icons';
-  import type { FileRow } from '$lib/types/files';
+  import type { LFFileObject } from '$lib/types/files';
   import LFFileUploadBtn from '$components/LFFileUploadBtn.svelte';
   import ConfirmFilesDeleteModal from '$components/modals/ConfirmFilesDeleteModal.svelte';
   import { allFilesAndPendingUploads } from '$stores/filesStore';
   import { browser } from '$app/environment';
+  import { onMount } from 'svelte';
 
   export let data;
 
@@ -50,7 +51,7 @@
   // Form error in form action (e.g. validation failure)
   $: $errors._errors && $errors._errors.length > 0 && handleFormError();
 
-  const FILTER_KEYS: Array<keyof FileRow> = ['filename', 'created_at'];
+  const FILTER_KEYS: Array<keyof LFFileObject> = ['filename', 'created_at'];
   const { divClass, innerDivClass, searchClass, classInput, headerClass } = tableStyles;
   const itemsPerPage = 10;
   let pageItems;
@@ -174,7 +175,7 @@
   const handleDelete = async () => {
     affectedAssistantsLoading = true;
     confirmDeleteModalOpen = true;
-    const getAffectedAssistants = await fetch(`/api/files/delete-check/`, {
+    const getAffectedAssistants = await fetch(`/api/files/delete/check`, {
       method: 'POST',
       body: JSON.stringify({ fileIds: $filesStore.selectedFileManagementFileIds })
     });
@@ -232,16 +233,22 @@
       }
     }
   };
-
-  afterNavigate(() => {
-    // Remove files with "uploading" status from store and invalidate the route so files are re-fetched
+  onMount(() => {
+    if ($filesStore.needsUpdate) {
+      filesStore.fetchFiles();
+    }
+  });
+  beforeNavigate(() => {
+    // Remove files with "uploading" status from store and set needsUpdate true so files are re-fetched
     // when the page is loaded again
     // If we want to persist the uploading status, the backend will need to implement this endpoint:
     // https://platform.openai.com/docs/api-reference/uploads
-    filesStore.setPendingUploads(
-      $filesStore.pendingUploads.filter((file) => file.status === 'error')
-    );
-    invalidate('lf:files');
+    if ($filesStore.pendingUploads.length > 0) {
+      filesStore.setPendingUploads(
+        $filesStore.pendingUploads.filter((file) => file.status === 'error')
+      );
+      filesStore.setNeedsUpdate(true);
+    }
   });
 </script>
 
@@ -262,7 +269,11 @@
         <!-- Button with color="alternative" adds two pixels to btn height, border-box does not prevent this. h-[42px] prevents slight screen jump-->
         <div class="h-[42px]">
           {#if editMode}
-            <div in:fade={{ duration: STANDARD_FADE_DURATION }} class="flex items-center gap-2">
+            <div
+              in:fade={{ duration: STANDARD_FADE_DURATION }}
+              class="flex items-center gap-2"
+              data-testid="table-actions"
+            >
               <Button color="blue" on:click={handleDownload}>Download</Button>
               {#if deleting}
                 <Button color="red" disabled>
@@ -284,7 +295,7 @@
                 <LFFileUploadBtn
                   name="files"
                   multiple
-                  accept={ACCEPTED_FILE_TYPES}
+                  accept={ACCEPTED_DOC_TYPES}
                   on:change={(e) => {
                     const fileList = e.detail;
                     handleUpload(fileList);
diff --git a/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/file-management.test.ts b/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/file-management.test.ts
index 3a5764748..9cd7187c5 100644
--- a/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/file-management.test.ts
+++ b/src/leapfrogai_ui/src/routes/chat/(settings)/file-management/file-management.test.ts
@@ -13,7 +13,7 @@ import {
 } from '$lib/mocks/file-mocks';
 import { beforeEach, vi } from 'vitest';
 import { filesStore, toastStore } from '$stores';
-import { convertFileObjectToFileRows } from '$helpers/fileHelpers';
+import { convertFileObjectToLFFileObject } from '$helpers/fileHelpers';
 import { superValidate } from 'sveltekit-superforms';
 import { yup } from 'sveltekit-superforms/adapters';
 import { filesSchema } from '$schemas/files';
@@ -32,7 +32,7 @@ describe('file management', () => {
     const data = await load();
 
     form = await superValidate(yup(filesSchema));
-    filesStore.setFiles(convertFileObjectToFileRows(files));
+    filesStore.setFiles(convertFileObjectToLFFileObject(files));
     filesStore.setSelectedFileManagementFileIds([]);
 
     render(FileManagementPage, {
@@ -40,7 +40,6 @@ describe('file management', () => {
         ...data,
         form,
         session: getFakeSession(),
-        assistants: [],
         supabase: {} as unknown as SupabaseClient
       }
     });
@@ -75,7 +74,7 @@ describe('file management', () => {
     const file2 = getFakeFiles({ numFiles: 1, created_at: yesterday })[0];
 
     // Set different files for this test, await tick so component reflect store update
-    filesStore.setFiles(convertFileObjectToFileRows([file1, file2]));
+    filesStore.setFiles(convertFileObjectToLFFileObject([file1, file2]));
     await tick();
     mockGetFiles([file1, file2]);
 
@@ -219,7 +218,7 @@ describe('table pagination', () => {
     const data = await load();
 
     form = await superValidate(yup(filesSchema));
-    filesStore.setFiles(convertFileObjectToFileRows(files));
+    filesStore.setFiles(convertFileObjectToLFFileObject(files));
     filesStore.setSelectedFileManagementFileIds([]);
 
     render(FileManagementPage, {
@@ -227,7 +226,6 @@ describe('table pagination', () => {
         ...data,
         form,
         session: getFakeSession(),
-        assistants: [],
         supabase: {} as unknown as SupabaseClient
       }
     });
diff --git a/src/leapfrogai_ui/src/routes/chat/+layout.server.ts b/src/leapfrogai_ui/src/routes/chat/+layout.server.ts
index 8d92f474b..8e2cddf7f 100644
--- a/src/leapfrogai_ui/src/routes/chat/+layout.server.ts
+++ b/src/leapfrogai_ui/src/routes/chat/+layout.server.ts
@@ -1,9 +1,5 @@
 import type { LayoutServerLoad } from './$types';
 import { redirect } from '@sveltejs/kit';
-import type { Profile } from '$lib/types/profile';
-import type { LFThread } from '$lib/types/threads';
-import type { LFMessage } from '$lib/types/messages';
-import { getOpenAiClient } from '$lib/server/constants';
 
 /**
  * This file is necessary to ensure protection of all routes in the `chat`
@@ -12,66 +8,10 @@ import { getOpenAiClient } from '$lib/server/constants';
  * Keep it even if there is no code in it.
  **/
 
-const getThreadWithMessages = async (
-  thread_id: string,
-  access_token: string
-): Promise<LFThread | null> => {
-  try {
-    const openai = getOpenAiClient(access_token);
-    const thread = (await openai.beta.threads.retrieve(thread_id)) as LFThread;
-    if (!thread) {
-      return null;
-    }
-    const messagesPage = await openai.beta.threads.messages.list(thread.id);
-    const messages = messagesPage.data as LFMessage[];
-    messages.sort((a, b) => a.created_at - b.created_at);
-    return { ...thread, messages: messages };
-  } catch (e) {
-    console.error(`Error fetching thread or messages: ${e}`);
-    return null;
-  }
-};
-
-export const load: LayoutServerLoad = async ({
-  locals: { supabase, session, user, isUsingOpenAI }
-}) => {
+export const load: LayoutServerLoad = async ({ locals: { session, isUsingOpenAI } }) => {
   if (!session) {
     throw redirect(303, '/');
   }
 
-  const { data: profile, error: profileError } = await supabase
-    .from('profiles')
-    .select(`*`)
-    .eq('id', user?.id)
-    .returns<Profile[]>()
-    .single();
-
-  if (profileError) {
-    console.error(
-      `error getting user profile for user_id: ${user?.id}. ${JSON.stringify(profileError)}`
-    );
-    throw redirect(303, '/');
-  }
-
-  const threads: LFThread[] = [];
-  if (profile?.thread_ids && profile?.thread_ids.length > 0) {
-    try {
-      const threadPromises = profile.thread_ids.map((thread_id) =>
-        getThreadWithMessages(thread_id, session.access_token)
-      );
-      const results = await Promise.allSettled(threadPromises);
-
-      results.forEach((result) => {
-        if (result.status === 'fulfilled' && result.value) {
-          threads.push(result.value);
-        }
-      });
-    } catch (e) {
-      console.error(`Error fetching threads: ${e}`);
-      // fail silently
-      return null;
-    }
-  }
-
-  return { threads, isUsingOpenAI };
+  return { isUsingOpenAI };
 };
diff --git a/src/leapfrogai_ui/src/routes/chat/+layout.ts b/src/leapfrogai_ui/src/routes/chat/+layout.ts
index d5fb482bb..f418e0d74 100644
--- a/src/leapfrogai_ui/src/routes/chat/+layout.ts
+++ b/src/leapfrogai_ui/src/routes/chat/+layout.ts
@@ -1,37 +1,32 @@
 import type { LayoutLoad } from './$types';
 import { browser } from '$app/environment';
-import { filesStore, threadsStore, uiStore } from '$stores';
+import { assistantsStore, filesStore, threadsStore, uiStore } from '$stores';
 import type { LFAssistant } from '$lib/types/assistants';
 import type { FileObject } from 'openai/resources/files';
-import { convertFileObjectToFileRows } from '$helpers/fileHelpers';
-import type { FileRow } from '$lib/types/files';
+import type { LFThread } from '$lib/types/threads';
+import type { LFFileObject } from '$lib/types/files';
+import { convertFileObjectToLFFileObject } from '$helpers/fileHelpers';
 
-// Load the store with the threads fetched by the +layout.server.ts (set store on the client side only)
-// This only runs when the app is first loaded (because it's a higher level layout)
-// After this load, the app keeps the store in sync with data changes and we don't
-// re-fetch all that data from the server
-// The same applies to files, we keep track of them in a store
-export const load: LayoutLoad = async ({ fetch, data, depends }) => {
-  depends('lf:assistants');
-  depends('lf:files');
-
-  const promises: [Promise<Response>, Promise<Response>] = [
+export const load: LayoutLoad = async ({ fetch, data }) => {
+  const promises: Array<Promise<Response>> = [
     fetch('/api/assistants'),
-    fetch('/api/files')
+    fetch('/api/files'),
+    fetch('/api/threads')
   ];
-  const [assistantRes, filesRes] = await Promise.all(promises);
+  const [assistantRes, filesRes, threadsRes] = await Promise.all(promises);
   const assistants = (await assistantRes.json()) as LFAssistant[];
   const files = (await filesRes.json()) as FileObject[];
-
+  const threads = (await threadsRes.json()) as LFThread[];
   if (browser) {
-    let fileRows: FileRow[] = [];
+    uiStore.setIsUsingOpenAI(data?.isUsingOpenAI);
+    // Convert files to LFFileObjects and set in store
+    let lfFileObjects: LFFileObject[] = [];
     if (files && files.length > 0) {
-      fileRows = convertFileObjectToFileRows(files);
+      lfFileObjects = convertFileObjectToLFFileObject(files);
     }
-
-    filesStore.setFiles(fileRows);
-    threadsStore.setThreads(data?.threads || []);
-    uiStore.setIsUsingOpenAI(data?.isUsingOpenAI);
+    filesStore.setFiles(lfFileObjects);
+    threadsStore.setThreads(threads || []);
+    assistantsStore.setAssistants(assistants || []);
   }
-  return { assistants };
+  return { assistants, files, threads };
 };
diff --git a/src/leapfrogai_ui/tests/api-keys.test.ts b/src/leapfrogai_ui/tests/api-keys.test.ts
index 227ce7a7e..077b8428c 100644
--- a/src/leapfrogai_ui/tests/api-keys.test.ts
+++ b/src/leapfrogai_ui/tests/api-keys.test.ts
@@ -1,19 +1,10 @@
 import { expect, test } from './fixtures';
 import { getTableRow } from './helpers/helpers';
 import { loadApiKeyPage, loadChatPage } from './helpers/navigationHelpers';
+import type { Page } from '@playwright/test';
+import { deleteAllTestAPIKeys } from './helpers/apiHelpers';
 
-test('it can navigate to the API key page', async ({ page }) => {
-  await loadChatPage(page);
-
-  await page.getByTestId('header-settings-btn').click();
-  await page.getByText('API Keys').click();
-
-  await expect(page).toHaveTitle('LeapfrogAI - API Keys');
-});
-
-test('it can create and delete an API key', async ({ page }) => {
-  await loadApiKeyPage(page);
-  const keyName = 'new test key';
+const createAPIKey = async (keyName: string, page: Page) => {
   await page.getByLabel('create new').click();
   const createModal = page.getByTestId('create-api-key-modal');
   await createModal.getByRole('textbox').fill(keyName);
@@ -22,14 +13,78 @@ test('it can create and delete an API key', async ({ page }) => {
   await expect(page.getByText(`${keyName} created successfully`)).toBeVisible();
   await page.getByRole('button', { name: 'Close', exact: true }).click({ force: true });
   await expect(page.getByText('Save secret key')).not.toBeVisible();
+};
 
+const initiateDeletion = async (keyName: string, page: Page) => {
   const row = await getTableRow(page, keyName, 'api-keys-table');
   await row.getByRole('checkbox').check();
   const deleteBtn = page.getByRole('button', { name: 'delete' });
   await deleteBtn.click();
+};
+
+const confirmDeletion = async (keyName: string, page: Page) => {
   const deleteModal = page.getByTestId('delete-api-key-modal');
-  await expect(deleteModal.getByText(`Are you sure you want to delete`)).toBeVisible();
+  await expect(deleteModal.getByText(`Are you sure you want to delete ${keyName}`)).toBeVisible();
   const confirmDeleteBtn = deleteModal.getByRole('button', { name: 'delete' });
   await confirmDeleteBtn.click();
+};
+
+test('it can navigate to the API key page', async ({ page }) => {
+  await loadChatPage(page);
+
+  await page.getByTestId('header-settings-btn').click();
+  await page.getByText('API Keys').click();
+
+  await expect(page).toHaveTitle('LeapfrogAI - API Keys');
+});
+
+test('it can create and delete an API key', async ({ page }) => {
+  await loadApiKeyPage(page);
+  const keyName = 'new test key';
+  await createAPIKey(keyName, page);
+  await initiateDeletion(keyName, page);
+  await confirmDeletion(keyName, page);
   await expect(page.getByText('Key Deleted')).toBeVisible();
 });
+
+test('shows an error toast when there is an error deleting an API key and the call throws', async ({
+  page
+}) => {
+  await loadApiKeyPage(page);
+  const keyName = 'new test key';
+  await createAPIKey(keyName, page);
+
+  await page.route('*/**/api/api-keys/delete', async (route) => {
+    await route.abort();
+  });
+
+  await initiateDeletion(keyName, page);
+  await confirmDeletion(keyName, page);
+
+  await expect(page.getByText('Error Deleting Key')).toBeVisible();
+  await expect(page.getByTestId('table-actions')).toBeVisible(); // keeps delete menu open
+
+  //cleanup
+  await deleteAllTestAPIKeys();
+});
+
+test('shows an error toast when there is an error deleting an API key and the res is not 200', async ({
+  page
+}) => {
+  await loadApiKeyPage(page);
+  const keyName = 'new test key';
+  await createAPIKey(keyName, page);
+
+  await page.route('*/**/api/api-keys/delete', async (route) => {
+    await route.fulfill({ status: 500 });
+  });
+
+  await initiateDeletion(keyName, page);
+  await confirmDeletion(keyName, page);
+
+  await expect(page.getByText('Error Deleting Key')).toBeVisible();
+  await expect(page.getByTestId('table-actions')).toBeVisible(); // keeps delete menu open
+
+  //cleanup
+  await deleteAllTestAPIKeys();
+});
diff --git a/src/leapfrogai_ui/tests/assistant-avatars.test.ts b/src/leapfrogai_ui/tests/assistant-avatars.test.ts
index 47232cea0..e6f73864f 100644
--- a/src/leapfrogai_ui/tests/assistant-avatars.test.ts
+++ b/src/leapfrogai_ui/tests/assistant-avatars.test.ts
@@ -5,6 +5,7 @@ import {
   createAssistantWithApi,
   deleteAllAssistants,
   deleteAssistantWithApi,
+  editAssistantCard,
   fillOutRequiredAssistantFields,
   getRandomPictogramName,
   saveAssistant,
@@ -71,6 +72,35 @@ test('it can upload an image as an avatar', async ({ page }) => {
   expect(avatarSrc).toBeDefined();
 });
 
+test('it keeps the avatar when an assistant has been edited', async ({ page }) => {
+  const assistantInput = getFakeAssistantInput();
+
+  await loadNewAssistantPage(page);
+
+  await fillOutRequiredAssistantFields(assistantInput, page);
+
+  await page.getByTestId('mini-avatar-container').click();
+  await uploadAvatar(page);
+
+  await page.getByRole('dialog').getByRole('button', { name: 'Save' }).click();
+
+  await saveAssistant(assistantInput.name, page);
+  const card = page.getByTestId(`assistant-card-${assistantInput.name}`);
+  const avatar = card.getByTestId('assistant-card-avatar');
+  const originalAvatarSrc = await avatar.getAttribute('src');
+
+  expect(originalAvatarSrc).toBeDefined();
+
+  await page.waitForURL('/chat/assistants-management');
+  await expect(page.getByTestId(`assistant-card-${assistantInput.name}`)).toBeVisible();
+  await editAssistantCard(assistantInput.name, page);
+  await page.getByLabel('tagline').fill('new description');
+  await saveAssistant(assistantInput.name, page);
+
+  const avatarSrcAfterUpdate = await avatar.getAttribute('src');
+  expect(avatarSrcAfterUpdate!.split('?v=')[0]).toEqual(originalAvatarSrc?.split('?v=')[0]);
+});
+
 test('it can change an image uploaded as an avatar', async ({ page }) => {
   await loadNewAssistantPage(page);
 
diff --git a/src/leapfrogai_ui/tests/assistant-progress.test.ts b/src/leapfrogai_ui/tests/assistant-progress.test.ts
index b59bc8f66..b414b6ce1 100644
--- a/src/leapfrogai_ui/tests/assistant-progress.test.ts
+++ b/src/leapfrogai_ui/tests/assistant-progress.test.ts
@@ -9,6 +9,7 @@ import {
   uploadFileWithApi
 } from './helpers/fileHelpers';
 import { loadNewAssistantPage } from './helpers/navigationHelpers';
+import type { FileObject } from 'openai/resources/files';
 
 // Note - fully testing the assistant progress toast has proven difficult with Playwright. Sometimes the websocket
 // connection for the Supabase realtime listeners works, and sometimes it does not. Here we test that the
@@ -18,12 +19,17 @@ test('when creating an assistant with files, an assistant progress toast is disp
   openAIClient
 }) => {
   const assistantInput = getFakeAssistantInput();
-  const filename1 = `${faker.word.noun()}-test.pdf`;
-  const filename2 = `${faker.word.noun()}-test.pdf`;
-  await createPDF({ filename: filename1 });
-  await createPDF({ filename: filename2 });
-  const uploadedFile1 = await uploadFileWithApi(filename1, 'application/pdf', openAIClient);
-  const uploadedFile2 = await uploadFileWithApi(filename2, 'application/pdf', openAIClient);
+  const numFiles = 2;
+  const filenames: string[] = [];
+  const uploadedFiles: FileObject[] = [];
+
+  for (let i = 0; i < numFiles; i++) {
+    const filename = `${faker.word.noun()}-test.pdf`;
+    filenames.push(filename);
+    await createPDF({ filename });
+    const uploadedFile = await uploadFileWithApi(filename, 'application/pdf', openAIClient);
+    uploadedFiles.push(uploadedFile);
+  }
 
   await loadNewAssistantPage(page);
 
@@ -33,19 +39,36 @@ test('when creating an assistant with files, an assistant progress toast is disp
 
   await page.getByTestId('file-select-dropdown-btn').click();
   const fileSelectContainer = page.getByTestId('file-select-container');
-  await fileSelectContainer.getByTestId(`${uploadedFile1.id}-checkbox`).check();
-  await fileSelectContainer.getByTestId(`${uploadedFile2.id}-checkbox`).check();
+  for (const file of uploadedFiles) {
+    await fileSelectContainer.getByTestId(`${file.id}-checkbox`).check();
+  }
 
   await page.getByRole('button', { name: 'Save' }).click();
-  await page.waitForURL('/chat/assistants-management');
 
-  await expect(page.getByTestId(`file-${uploadedFile1.id}-vector-in-progress`)).toBeVisible();
-  await expect(page.getByTestId(`file-${uploadedFile2.id}-vector-pending`)).toBeVisible();
+  const inProgressSelector = `file-${uploadedFiles[0].id}-vector-in-progress`;
+  const completedSelector = `file-${uploadedFiles[0].id}-vector-completed`;
+
+  // Second file is pending
+  await expect(page.getByTestId(`file-${uploadedFiles[1].id}-vector-pending`)).toBeVisible();
+
+  // Check for either "in-progress" or "completed" state for the first file, it can happen really fast so this prevents
+  // a flaky test
+  const progressToast = await page.waitForSelector(
+    `[data-testid="${inProgressSelector}"], [data-testid="${completedSelector}"]`,
+    {
+      timeout: 30000
+    }
+  );
+  expect(progressToast).toBeTruthy();
+
+  await page.waitForURL('/chat/assistants-management');
 
   // cleanup
-  deleteFixtureFile(filename1);
-  deleteFixtureFile(filename2);
+  for (const filename of filenames) {
+    deleteFixtureFile(filename);
+  }
+  for (const file of uploadedFiles) {
+    await deleteFileWithApi(file.id, openAIClient);
+  }
   await deleteAssistantCard(assistantInput.name, page);
-  await deleteFileWithApi(uploadedFile1.id, openAIClient);
-  await deleteFileWithApi(uploadedFile2.id, openAIClient);
 });
diff --git a/src/leapfrogai_ui/tests/assistants.test.ts b/src/leapfrogai_ui/tests/assistants.test.ts
index 9a003704d..27521bd7c 100644
--- a/src/leapfrogai_ui/tests/assistants.test.ts
+++ b/src/leapfrogai_ui/tests/assistants.test.ts
@@ -16,6 +16,10 @@ import {
   loadChatPage,
   loadNewAssistantPage
 } from './helpers/navigationHelpers';
+import { ERROR_GETTING_ASSISTANT_MSG_TOAST } from '$constants/toastMessages';
+import { ASSISTANT_ERROR_MSG } from '$constants/errors';
+
+const newMessage1 = getSimpleMathQuestion();
 
 test('it navigates to the assistants page', async ({ page }) => {
   await loadChatPage(page);
@@ -59,13 +63,14 @@ test('it creates an assistant and navigates back to the management page', async
   await deleteAssistantWithApi(assistantId, openAIClient);
 });
 
-test('displays an error toast when there is an error creating an assistant and remains on the assistant page', async ({
+test('displays an error toast when there is an error creating an assistant and remains on the assistant page (form submission returns 200)', async ({
   page
 }) => {
   const assistantInput = getFakeAssistantInput();
 
   await page.route('*/**/chat/assistants-management/new', async (route) => {
     if (route.request().method() === 'POST') {
+      // this returns a 200, but result.type === 'failure'
       const result: ActionResult = {
         type: 'failure',
         status: 500
@@ -82,6 +87,25 @@ test('displays an error toast when there is an error creating an assistant and r
 
   await expect(page.getByText('Error Creating Assistant')).toBeVisible();
 });
+test('displays an error toast when there is an error creating an assistant and remains on the assistant page (form submission returns 500)', async ({
+  page
+}) => {
+  const assistantInput = getFakeAssistantInput();
+
+  await page.route('*/**/chat/assistants-management/new', async (route) => {
+    if (route.request().method() === 'POST') {
+      // this returns a 500 and result.type === 'error'
+      await route.abort();
+    } else {
+      const response = await route.fetch();
+      await route.fulfill({ response });
+    }
+  });
+
+  await createAssistant(assistantInput, page);
+
+  await expect(page.getByText('Error Creating Assistant')).toBeVisible();
+});
 test('displays an error toast when there is an error editing an assistant and remains on the assistant page', async ({
   page,
   openAIClient
@@ -97,12 +121,8 @@ test('displays an error toast when there is an error editing an assistant and re
 
   await page.route(`*/**/chat/assistants-management/edit/${assistant.id}`, async (route) => {
     if (route.request().method() === 'POST') {
-      const result: ActionResult = {
-        type: 'failure',
-        status: 500
-      };
-
-      await route.fulfill({ json: result });
+      // this returns a 500 and result.type === 'error'
+      await route.abort('failed');
     } else {
       const response = await route.fetch();
       await route.fulfill({ response });
@@ -295,3 +315,96 @@ test('it can delete assistants', async ({ page, openAIClient }) => {
 
   await expect(page.getByText(`${assistant.name} Assistant deleted.`)).toBeVisible();
 });
+
+test('displays a toast if there is an error deleting an assistant and the call throws', async ({
+  page,
+  openAIClient
+}) => {
+  const assistant = await createAssistantWithApi({ openAIClient });
+  await loadAssistantsManagementPage(page);
+  await page.route('*/**/api/assistants/delete', async (route) => {
+    await route.abort();
+  });
+  await deleteAssistantCard(assistant.name!, page);
+  await expect(page.getByText('Error deleting Assistant.')).toBeVisible();
+
+  //cleanup
+  await deleteAssistantWithApi(assistant.id, openAIClient);
+});
+test('displays a toast if there is an error deleting an assistant and response is not 200', async ({
+  page,
+  openAIClient
+}) => {
+  const assistant = await createAssistantWithApi({ openAIClient });
+  await loadAssistantsManagementPage(page);
+  await page.route('*/**/api/assistants/delete', async (route) => {
+    await route.fulfill({ status: 500 });
+  });
+  await deleteAssistantCard(assistant.name!, page);
+  await expect(page.getByText('Error deleting Assistant.')).toBeVisible();
+
+  //cleanup
+  await deleteAssistantWithApi(assistant.id, openAIClient);
+});
+
+// Note - these error cases do not test all edge cases. ex. completed response comes back empty, /chat/assistants
+// partially completes then fails, stream fails, etc...
+test('displays an error toast if /chat/assistants throws while getting a response from an assistant', async ({
+  page,
+  openAIClient
+}) => {
+  const assistant = await createAssistantWithApi({ openAIClient });
+  await loadChatPage(page);
+
+  const assistantDropdown = page.getByTestId('assistants-select-btn');
+  await assistantDropdown.click();
+  await page.getByText(assistant!.name!).click();
+
+  await page.route('*/**/chat/assistants', async (route) => {
+    await route.abort('failed');
+  });
+  await sendMessage(page, newMessage1);
+
+  await expect(page.getByText(ERROR_GETTING_ASSISTANT_MSG_TOAST().title)).toBeVisible();
+  await expect(page.getByText(ASSISTANT_ERROR_MSG)).toBeVisible();
+});
+
+test('displays an error toast if /chat/assistants returns a 500 when getting a response from an assistant', async ({
+  page,
+  openAIClient
+}) => {
+  const assistant = await createAssistantWithApi({ openAIClient });
+  await loadChatPage(page);
+
+  const assistantDropdown = page.getByTestId('assistants-select-btn');
+  await assistantDropdown.click();
+  await page.getByText(assistant!.name!).click();
+
+  await page.route('*/**/chat/assistants', async (route) => {
+    await route.fulfill({ status: 500 });
+  });
+  await sendMessage(page, newMessage1);
+
+  await expect(page.getByText(ERROR_GETTING_ASSISTANT_MSG_TOAST().title)).toBeVisible();
+  await expect(page.getByText(ASSISTANT_ERROR_MSG)).toBeVisible();
+});
+
+test('displays an error toast if /chat/assistants returns a 200 with no body when getting a response from an assistant', async ({
+  page,
+  openAIClient
+}) => {
+  const assistant = await createAssistantWithApi({ openAIClient });
+  await loadChatPage(page);
+
+  const assistantDropdown = page.getByTestId('assistants-select-btn');
+  await assistantDropdown.click();
+  await page.getByText(assistant!.name!).click();
+
+  await page.route('*/**/chat/assistants', async (route) => {
+    await route.fulfill({ status: 200 });
+  });
+  await sendMessage(page, newMessage1);
+
+  await expect(page.getByText(ERROR_GETTING_ASSISTANT_MSG_TOAST().title)).toBeVisible();
+  await expect(page.getByText(ASSISTANT_ERROR_MSG)).toBeVisible();
+});
diff --git a/src/leapfrogai_ui/tests/file-chat.test.ts b/src/leapfrogai_ui/tests/file-chat.test.ts
index d2f54c96a..c2c87e0ca 100644
--- a/src/leapfrogai_ui/tests/file-chat.test.ts
+++ b/src/leapfrogai_ui/tests/file-chat.test.ts
@@ -1,5 +1,5 @@
 import { expect, test } from './fixtures';
-import { getSimpleMathQuestion, loadChatPage } from './helpers/helpers';
+import { getSimpleMathQuestion } from './helpers/helpers';
 import {
   createPDF,
   createTextFile,
@@ -17,18 +17,18 @@ import {
 import { faker } from '@faker-js/faker';
 import { getFakeAssistantInput } from '../testUtils/fakeData';
 import { createAssistantWithApi } from './helpers/assistantHelpers';
-import {
-  ERROR_PROCESSING_FILE_MSG_TOAST,
-  MAX_NUM_FILES_UPLOAD_MSG_TOAST
-} from '../src/lib/constants/toastMessages';
-import type { ActionResult } from '@sveltejs/kit';
+import { MAX_NUM_FILES_UPLOAD_MSG_TOAST } from '../src/lib/constants/toastMessages';
 import {
   APPROX_MAX_CHARACTERS,
   FILE_UPLOAD_PROMPT,
   MAX_NUM_FILES_UPLOAD
 } from '../src/lib/constants';
-import { FILE_CONTEXT_TOO_LARGE_ERROR_MSG } from '../src/lib/constants/errors';
+import {
+  ERROR_UPLOADING_FILE_MSG,
+  FILE_CONTEXT_TOO_LARGE_ERROR_MSG
+} from '../src/lib/constants/errors';
 import { shortenFileName } from '../src/lib/helpers/stringHelpers';
+import { loadChatPage } from './helpers/navigationHelpers';
 
 test('it attaches multiple files of different types and creates a hidden message with their content', async ({
   page,
@@ -111,8 +111,9 @@ test('it can remove attached files', async ({ page }) => {
   await expect(page.getByTestId(`${pdfFilename1}-uploaded`)).toBeVisible();
   await expect(page.getByTestId(`${pdfFilename2}-uploaded`)).toBeVisible();
 
-  await page.getByText(pdfFilename2).hover();
-  await page.getByTestId(`${pdfFilename2}-remove-btn`).click();
+  const card = page.getByTestId(`${pdfFilename2}-file-uploaded-card`);
+  await card.getByText(pdfFilename2).hover();
+  await card.getByTestId(`${pdfFilename2}-remove-btn`).click();
 
   await expect(page.getByTestId(`${pdfFilename1}-uploaded`)).toBeVisible();
   await expect(page.getByTestId(`${pdfFilename2}-uploaded`)).not.toBeVisible();
@@ -147,25 +148,13 @@ test('it removes the file btn and attached files when switching to an assistant'
   deleteFixtureFile(filename);
 });
 
-test('it displays a toast and removes files when there is an error processing the pdf', async ({
-  page
-}) => {
+test('it shows an error on the file if there is an error processing a file', async ({ page }) => {
   const filename = createWordFile(); // this file is first converted to pdf which we are mocking to fail
 
   await loadChatPage(page);
 
-  await page.route('*/**/chat', async (route) => {
-    if (route.request().method() === 'POST') {
-      const result: ActionResult = {
-        type: 'failure',
-        status: 500
-      };
-
-      await route.fulfill({ json: result });
-    } else {
-      const response = await route.fetch();
-      await route.fulfill({ response });
-    }
+  await page.route('*/**/api/files/parse-text', async (route) => {
+    await route.abort('failed');
   });
 
   await uploadFiles({
@@ -174,7 +163,7 @@ test('it displays a toast and removes files when there is an error processing th
     testId: 'upload-file-btn'
   });
 
-  await expect(page.getByText(ERROR_PROCESSING_FILE_MSG_TOAST().title)).toBeVisible();
+  await expect(page.getByText(ERROR_UPLOADING_FILE_MSG)).toBeVisible();
 
   // cleanup
   deleteFixtureFile(filename);
diff --git a/src/leapfrogai_ui/tests/file-management.test.ts b/src/leapfrogai_ui/tests/file-management.test.ts
index d93676474..c641861b1 100644
--- a/src/leapfrogai_ui/tests/file-management.test.ts
+++ b/src/leapfrogai_ui/tests/file-management.test.ts
@@ -142,25 +142,49 @@ test('it cancels the delete confirmation modal', async ({ page, openAIClient })
   await deleteFileByName(filename, openAIClient);
 });
 
-test('shows an error toast when there is an error deleting a file', async ({ page }) => {
+test('shows an error toast when there is an error deleting a file when the delete call throws', async ({
+  page,
+  openAIClient
+}) => {
   const filename = await createPDF();
-  let hasBeenCalled = false;
+
+  await uploadFiles({ page, filenames: [filename] });
+
+  await expect(page.getByText(`${filename} imported successfully`)).toBeVisible({ timeout: 15000 });
+  await expect(page.getByText(`${filename} imported successfully`)).not.toBeVisible(); // wait for upload to finish
+
   await page.route('*/**/api/files/delete', async (route) => {
-    if (!hasBeenCalled && route.request().method() === 'DELETE') {
-      if (!hasBeenCalled && route.request().method() === 'DELETE') {
-        hasBeenCalled = true;
-        await route.fulfill({ status: 500 });
-      } else {
-        const response = await route.fetch();
-        await route.fulfill({ response });
-      }
-    }
+    await route.abort();
   });
+
+  const row = await getTableRow(page, filename, 'file-management-table');
+  await row.getByRole('checkbox').check();
+
+  await initiateDeletion(page, filename);
+  await confirmDeletion(page);
+
+  await expect(page.getByText('Error Deleting File')).toBeVisible();
+  await expect(page.getByTestId('table-actions')).toBeVisible(); // keeps delete menu open
+
+  // Cleanup
+  deleteFixtureFile(filename);
+  await deleteFileByName(filename, openAIClient);
+});
+test('shows an error toast when there is an error deleting a file when the delete res is not 200', async ({
+  page,
+  openAIClient
+}) => {
+  const filename = await createPDF();
+
   await uploadFiles({ page, filenames: [filename] });
 
-  await expect(page.getByText(`${filename} imported successfully`)).toBeVisible();
+  await expect(page.getByText(`${filename} imported successfully`)).toBeVisible({ timeout: 15000 });
   await expect(page.getByText(`${filename} imported successfully`)).not.toBeVisible(); // wait for upload to finish
 
+  await page.route('*/**/api/files/delete', async (route) => {
+    await route.fulfill({ status: 500 });
+  });
+
   const row = await getTableRow(page, filename, 'file-management-table');
   await row.getByRole('checkbox').check();
 
@@ -168,14 +192,17 @@ test('shows an error toast when there is an error deleting a file', async ({ pag
   await confirmDeletion(page);
 
   await expect(page.getByText('Error Deleting File')).toBeVisible();
+  await expect(page.getByTestId('table-actions')).toBeVisible(); // keeps delete menu open
+
+  // Cleanup
+  deleteFixtureFile(filename);
+  await deleteFileByName(filename, openAIClient);
 });
 
 test('it shows toast when there is an error submitting the form', async ({ page }) => {
   await page.route('*/**/chat/file-management', async (route) => {
     if (route.request().method() === 'POST') {
-      const json = {};
-
-      await route.fulfill({ json });
+      await route.abort('failed');
     } else {
       const response = await route.fetch();
       await route.fulfill({ response });
diff --git a/tasks.yaml b/tasks.yaml
new file mode 100644
index 000000000..d89f87420
--- /dev/null
+++ b/tasks.yaml
@@ -0,0 +1,133 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/tasks.schema.json
+
+includes:
+  - badge: https://raw.githubusercontent.com/defenseunicorns/uds-common/82e63be82766a2e550a847af904b2d738c9d3478/tasks/badge.yaml
+
+tasks:
+  - name: nightly-uds-badge-verification
+    description: "Runs in a pipeline and produces a report for archiving"
+    actions:
+      - description: "Create Reports Directory"
+        cmd: |
+          mkdir -p reports
+      - description: "Run UDS Badge Verification Task"
+        cmd: |
+          uds run verify-uds-badge-cpu --no-progress 2>&1 | tee ./reports/intermediate-report.txt
+      - description: "Clean Up Final Report"
+        cmd: |
+          python3 .github/scripts/uds_verification_report.py | tee ./reports/final-report.txt
+
+  #############
+  # BADGE TASKS
+  #############
+  - name: verify-uds-badge-cpu
+    description: "Runs through all CPU UDS bundle packages with the UDS badge verification test"
+    actions:
+      - task: verify-uds-badge-api
+      - task: verify-uds-badge-ui
+      - task: verify-uds-badge-llama-cpp-python
+      - task: verify-uds-badge-text-embeddings
+      - task: verify-uds-badge-whisper
+      - task: verify-uds-badge-supabase
+
+  - name: verify-uds-badge-gpu
+    description: "Runs through all GPU UDS bundle packages with the UDS badge verification test"
+    actions:
+      - task: verify-uds-badge-api
+      - task: verify-uds-badge-ui
+      - task: verify-uds-badge-vllm
+      - task: verify-uds-badge-text-embeddings
+      - task: verify-uds-badge-whisper
+      - task: verify-uds-badge-supabase
+
+  #######################
+  # RE-USABLE BADGE TASKS
+  #######################
+
+  - name: verify-uds-badge-api
+    actions:
+      - description: "Verify API"
+        cmd: |
+          uds run badge:verify-badge \
+            --set CHART_PATH="chart" \
+            --set GROUP_NAME="package" \
+            --set COMMON_ZARF="true" \
+            --set PACKAGE_DIR="packages/api" \
+            --no-progress
+
+  - name: verify-uds-badge-ui
+    actions:
+      - description: "Verify UI"
+        cmd: |
+          uds run badge:verify-badge \
+            --set CHART_PATH="chart" \
+            --set GROUP_NAME="package" \
+            --set COMMON_ZARF="false" \
+            --set PACKAGE_DIR="packages/ui" \
+            --no-progress
+
+  - name: verify-uds-badge-llama-cpp-python
+    actions:
+      - description: "Verify LLaMA-CPP-Python"
+        cmd: |
+          uds run badge:verify-badge \
+            --set CHART_PATH="chart" \
+            --set GROUP_NAME="package" \
+            --set COMMON_ZARF="false" \
+            --set PACKAGE_DIR="packages/llama-cpp-python" \
+            --no-progress
+
+  - name: verify-uds-badge-vllm
+    actions:
+      - description: "Verify vLLM"
+        cmd: |
+          uds run badge:verify-badge \
+            --set CHART_PATH="chart" \
+            --set GROUP_NAME="package" \
+            --set COMMON_ZARF="false" \
+            --set PACKAGE_DIR="packages/vllm" \
+            --no-progress
+
+  - name: verify-uds-badge-text-embeddings
+    actions:
+      - description: "Verify Text-Embeddings"
+        cmd: |
+          uds run badge:verify-badge \
+            --set CHART_PATH="chart" \
+            --set GROUP_NAME="package" \
+            --set COMMON_ZARF="false" \
+            --set PACKAGE_DIR="packages/text-embeddings" \
+            --no-progress
+
+  - name: verify-uds-badge-whisper
+    actions:
+      - description: "Verify Whisper"
+        cmd: |
+          uds run badge:verify-badge \
+            --set CHART_PATH="chart" \
+            --set GROUP_NAME="package" \
+            --set COMMON_ZARF="false" \
+            --set PACKAGE_DIR="packages/whisper" \
+            --no-progress
+
+  - name: verify-uds-badge-repeater
+    actions:
+      - description: "Verify Repeater"
+        cmd: |
+          uds run badge:verify-badge \
+            --set CHART_PATH="chart" \
+            --set GROUP_NAME="package" \
+            --set COMMON_ZARF="false" \
+            --set PACKAGE_DIR="packages/repeater" \
+            --no-progress
+
+  - name: verify-uds-badge-supabase
+    actions:
+      - description: "Verify Supabase"
+        cmd: |
+          uds run badge:verify-badge \
+            --set CHART_PATH="chart" \
+            --set GROUP_NAME="package" \
+            --set COMMON_ZARF="false" \
+            --set PACKAGE_DIR="packages/supabase" \
+            --no-progress
diff --git a/tests/Makefile b/tests/Makefile
index 1b22ff443..b62ca37b0 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -74,3 +74,6 @@ test-api-unit:
 
 test-load:
 	python -m locust -f $$(pwd)/tests/load/loadtest.py --web-port 8089
+
+test-conformance:
+	PYTHONPATH=$$(pwd) pytest -vv -s tests/conformance
diff --git a/tests/README.md b/tests/README.md
index 765695d8d..b9d41eda9 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -107,3 +107,23 @@ python -m pytest tests/e2e/test_llama.py -v
 # Cleanup after yourself
 k3d cluster delete uds
 ```
+
+## Conformance Testing
+
+We include a set of conformance tests to verify our spec against OpenAI to guarantee interoperability with tools that support OpenAI's API (MatterMost, Continue.dev, etc.) and SDKs (Vercel, Azure, etc.). To run these tests the environment variables need to be set:
+
+```bash
+LEAPFROGAI_API_KEY="<api key>" # this can be created via the LeapfrogAI UI or Supabase
+LEAPFROGAI_API_URL="https://leapfrogai-api.uds.dev/openai/v1" # This is the default when using a UDS-bundle locally
+LEAPFROGAI_MODEL="vllm" # or whatever model you have installed
+OPENAI_API_KEY="<api key>" # you need a funded OpenAI account for this
+OPENAI_MODEL="gpt-4o-mini" # or whatever model you prefer
+```
+
+To run the tests, from the root directory of the LeapfrogAI project:
+
+```bash
+make install # to ensure all python dependencies are installed
+
+make test-conformance # runs the entire suite
+```
diff --git a/tests/conformance/test_assistants.py b/tests/conformance/test_assistants.py
new file mode 100644
index 000000000..15c937718
--- /dev/null
+++ b/tests/conformance/test_assistants.py
@@ -0,0 +1,53 @@
+import pytest
+from openai.types.beta.assistant import Assistant
+
+from tests.utils.client import client_config_factory
+
+
+@pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
+def test_assistant(client_name):
+    config = client_config_factory(client_name)
+    client = config.client
+
+    vector_store = client.beta.vector_stores.create(name="Test data")
+
+    assistant = client.beta.assistants.create(
+        name="Test Assistant",
+        instructions="You must provide a response based on the attached files.",
+        model=config.model,
+        tools=[{"type": "file_search"}],
+        tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
+    )
+
+    assert isinstance(assistant, Assistant)
+
+    client.beta.assistants.delete(assistant.id)
+
+
+@pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
+def test_modify_assistant(client_name):
+    config = client_config_factory(client_name)
+    client = config.client
+
+    vector_store = client.beta.vector_stores.create(name="Test data")
+
+    assistant = client.beta.assistants.create(
+        name="Test Assistant",
+        instructions="You must provide a response based on the attached files.",
+        model=config.model,
+        tools=[{"type": "file_search"}],
+        tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
+        metadata={"Test": "Testing."},
+    )
+
+    modified_assistant = client.beta.assistants.update(
+        assistant.id,
+        name="Modified Assistant",
+        metadata={"Test 2": "This is the test."},
+    )
+
+    assert modified_assistant.name == "Modified Assistant"
+    assert modified_assistant.id == assistant.id
+    assert modified_assistant.model == assistant.model
+
+    client.beta.assistants.delete(modified_assistant.id)
diff --git a/tests/conformance/test_completions.py b/tests/conformance/test_completions.py
index 6e53dfdcc..d400d985a 100644
--- a/tests/conformance/test_completions.py
+++ b/tests/conformance/test_completions.py
@@ -1,7 +1,7 @@
 import pytest
 from openai.types.beta.threads import Run, Message, TextContentBlock, Text
 
-from .utils import client_config_factory
+from tests.utils.client import client_config_factory
 
 
 def make_mock_message_object(role, message_text):
@@ -37,12 +37,12 @@ def make_mock_message_simple(role, message_text):
 def test_run_completion(client_name, test_messages):
     # Setup
     config = client_config_factory(client_name)
-    client = config["client"]
+    client = config.client
 
     assistant = client.beta.assistants.create(
         name="Test Assistant",
         instructions="You must provide a response based on the attached files.",
-        model=config["model"],
+        model=config.model,
     )
     thread = client.beta.threads.create()
 
diff --git a/tests/conformance/test_conformance_assistants.py b/tests/conformance/test_conformance_assistants.py
deleted file mode 100644
index 1ebcd95b6..000000000
--- a/tests/conformance/test_conformance_assistants.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import pytest
-from openai.types.beta.assistant import Assistant
-
-from ..utils.client import client_config_factory
-
-
-@pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
-def test_assistant(client_name):
-    config = client_config_factory(client_name)
-    client = config.client
-
-    vector_store = client.beta.vector_stores.create(name="Test data")
-
-    assistant = client.beta.assistants.create(
-        name="Test Assistant",
-        instructions="You must provide a response based on the attached files.",
-        model=config.model,
-        tools=[{"type": "file_search"}],
-        tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
-    )
-
-    assert isinstance(assistant, Assistant)
diff --git a/tests/conformance/test_files.py b/tests/conformance/test_files.py
index 02b67530a..18074e259 100644
--- a/tests/conformance/test_files.py
+++ b/tests/conformance/test_files.py
@@ -6,16 +6,17 @@
 )
 from openai.types.beta.vector_stores.vector_store_file import VectorStoreFile
 
-from ..utils.client import client_config_factory, text_file_path
+from tests.utils.client import client_config_factory
+from tests.utils.data_path import data_path, TXT_DATA_FILE
 
 
 @pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
 def test_file_upload(client_name):
     config = client_config_factory(client_name)
-    client = config.client  # shorthand
+    client = config.client
 
     vector_store = client.beta.vector_stores.create(name="Test data")
-    with open(text_file_path(), "rb") as file:
+    with open(data_path(TXT_DATA_FILE), "rb") as file:
         vector_store_file = client.beta.vector_stores.files.upload(
             vector_store_id=vector_store.id, file=file
         )
@@ -24,13 +25,14 @@ def test_file_upload(client_name):
     assert isinstance(vector_store_file, VectorStoreFile)
 
 
+@pytest.mark.xfail(reason="File Batch Upload is not yet implemented in LeapfrogAI")
 @pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
 def test_file_delete(client_name):
     config = client_config_factory(client_name)
     client = config.client
 
     vector_store = client.beta.vector_stores.create(name="Test data")
-    with open(text_file_path(), "rb") as file:
+    with open(data_path(TXT_DATA_FILE), "rb") as file:
         vector_store_file = client.beta.vector_stores.files.upload(
             vector_store_id=vector_store.id, file=file
         )
diff --git a/tests/conformance/test_messages.py b/tests/conformance/test_messages.py
index f58f22b9c..24e1f312f 100644
--- a/tests/conformance/test_messages.py
+++ b/tests/conformance/test_messages.py
@@ -2,7 +2,7 @@
 
 from openai.types.beta.threads.message import Message
 
-from ..utils.client import client_config_factory
+from tests.utils.client import client_config_factory
 
 
 @pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
diff --git a/tests/conformance/test_conformance_runs.py b/tests/conformance/test_runs.py
similarity index 93%
rename from tests/conformance/test_conformance_runs.py
rename to tests/conformance/test_runs.py
index 7a4447bfc..d8039864e 100644
--- a/tests/conformance/test_conformance_runs.py
+++ b/tests/conformance/test_runs.py
@@ -1,7 +1,7 @@
 import pytest
 from openai.types.beta.threads import Run, Message, TextContentBlock, Text
 
-from .utils import client_config_factory
+from tests.utils.client import client_config_factory
 
 
 def make_mock_message_object(role, message_text):
@@ -37,12 +37,12 @@ def make_mock_message_simple(role, message_text):
 def test_run_create(client_name, test_messages):
     # Setup
     config = client_config_factory(client_name)
-    client = config["client"]
+    client = config.client
 
     assistant = client.beta.assistants.create(
         name="Test Assistant",
         instructions="You must provide a response based on the attached files.",
-        model=config["model"],
+        model=config.model,
     )
     thread = client.beta.threads.create()
 
diff --git a/tests/conformance/test_conformance_threads.py b/tests/conformance/test_threads.py
similarity index 80%
rename from tests/conformance/test_conformance_threads.py
rename to tests/conformance/test_threads.py
index 91d17c940..d9d30f65d 100644
--- a/tests/conformance/test_conformance_threads.py
+++ b/tests/conformance/test_threads.py
@@ -2,7 +2,7 @@
 from openai.types.beta.thread import Thread
 from openai.types.beta.threads import Message, TextContentBlock, Text
 
-from ..utils.client import client_config_factory
+from tests.utils.client import client_config_factory
 
 
 def make_mock_message_object(role, message_text):
@@ -39,6 +39,8 @@ def test_thread(client_name, test_messages):
     config = client_config_factory(client_name)
     client = config.client
 
-    thread = client.beta.threads.create(messages=test_messages)
+    thread = client.beta.threads.create(
+        messages=test_messages
+    )  # TODO: Pydantic type problems with LeapfrogAI #https://github.com/defenseunicorns/leapfrogai/issues/1107
 
     assert isinstance(thread, Thread)
diff --git a/tests/conformance/test_conformance_tools.py b/tests/conformance/test_tools.py
similarity index 81%
rename from tests/conformance/test_conformance_tools.py
rename to tests/conformance/test_tools.py
index 9b69193d5..fba4ca428 100644
--- a/tests/conformance/test_conformance_tools.py
+++ b/tests/conformance/test_tools.py
@@ -7,12 +7,13 @@
 from openai.types.beta.threads.message import Message
 import re
 
-from ..utils.client import client_config_factory, text_file_path
+from tests.utils.client import client_config_factory
+from tests.utils.data_path import data_path, TXT_DATA_FILE
 
 
 def make_vector_store_with_file(client):
     vector_store = client.beta.vector_stores.create(name="Test data")
-    with open(text_file_path(), "rb") as file:
+    with open(data_path(TXT_DATA_FILE), "rb") as file:
         client.beta.vector_stores.files.upload(
             vector_store_id=vector_store.id, file=file
         )
@@ -38,15 +39,18 @@ def make_test_run(client, assistant, thread):
 
 
 def validate_annotation_format(annotation):
-    pattern = r"【\d+:\d+†source】"
-    match = re.fullmatch(pattern, annotation)
+    pattern_default = r"【\d+:\d+†source】"
+    pattern = r"【\d+:\d+†" + TXT_DATA_FILE + "】"
+    match = re.fullmatch(pattern, annotation) or re.fullmatch(
+        pattern_default, annotation
+    )
     return match is not None
 
 
 @pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
 def test_thread_file_annotations(client_name):
     config = client_config_factory(client_name)
-    client = config.client  # shorthand
+    client = config.client
 
     vector_store = make_vector_store_with_file(client)
     assistant = make_test_assistant(client, config.model, vector_store.id)
@@ -64,7 +68,7 @@ def test_thread_file_annotations(client_name):
     ).data
 
     # Runs will only have the messages that were generated by the run, not previous messages
-    assert len(messages) == 1
+    # assert len(messages) == 1  # TODO: Compliance mismatch https://github.com/defenseunicorns/leapfrogai/issues/1109
     assert all(isinstance(message, Message) for message in messages)
 
     # Get the response content
diff --git a/tests/conformance/test_conformance_vectorstore.py b/tests/conformance/test_vectorstore.py
similarity index 90%
rename from tests/conformance/test_conformance_vectorstore.py
rename to tests/conformance/test_vectorstore.py
index 25ad52f9d..b9e65b4d0 100644
--- a/tests/conformance/test_conformance_vectorstore.py
+++ b/tests/conformance/test_vectorstore.py
@@ -3,13 +3,13 @@
 from openai.types.beta.vector_store import VectorStore
 from openai.types.beta.vector_store_deleted import VectorStoreDeleted
 
-from ..utils.client import client_config_factory
+from tests.utils.client import client_config_factory
 
 
 @pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
 def test_vector_store_create(client_name):
     config = client_config_factory(client_name)
-    client = config.client  # shorthand
+    client = config.client
 
     vector_store = client.beta.vector_stores.create(name="Test data")
 
@@ -19,7 +19,7 @@ def test_vector_store_create(client_name):
 @pytest.mark.parametrize("client_name", ["openai", "leapfrogai"])
 def test_vector_store_list(client_name):
     config = client_config_factory(client_name)
-    client = config.client  # shorthand
+    client = config.client
 
     client.beta.vector_stores.create(name="Test data")
 
diff --git a/tests/data/test_with_data.txt b/tests/data/test_with_data.txt
index 16ca17288..d02d3d75a 100644
--- a/tests/data/test_with_data.txt
+++ b/tests/data/test_with_data.txt
@@ -1,3 +1,3 @@
-Sam is my borther, he is 5 years old.
+Sam is my brother, he is 5 years old.
 There are seven oranges in the fridge.
 Sam loves oranges.
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 4f498b102..580034011 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -1,12 +1,14 @@
+from openai import OpenAI
 import pytest
 
-from openai import OpenAI
+from tests.utils.client import leapfrogai_client, get_leapfrogai_model
 
-from .utils import create_test_user
+
+@pytest.fixture(scope="module")
+def client() -> OpenAI:
+    return leapfrogai_client()
 
 
 @pytest.fixture(scope="module")
-def client():
-    return OpenAI(
-        base_url="https://leapfrogai-api.uds.dev/openai/v1", api_key=create_test_user()
-    )
+def model_name() -> str:
+    return get_leapfrogai_model()
diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py
index b556954e0..44e533645 100644
--- a/tests/e2e/test_api.py
+++ b/tests/e2e/test_api.py
@@ -5,7 +5,7 @@
 import pytest as pytest
 import requests
 
-from .utils import create_test_user
+from tests.utils.client import create_test_user
 
 logger = logging.getLogger(__name__)
 test_id = str(uuid.uuid4())
diff --git a/tests/e2e/test_llm_generation.py b/tests/e2e/test_llm_generation.py
index 4f54e7f0f..cb309d597 100644
--- a/tests/e2e/test_llm_generation.py
+++ b/tests/e2e/test_llm_generation.py
@@ -1,41 +1,28 @@
-import os
-from pathlib import Path
 from typing import Iterable
-import warnings
 
 import pytest
 from openai import InternalServerError, OpenAI
 from openai.types.chat import ChatCompletionMessageParam
+from tests.utils.data_path import data_path, WAV_FILE
 
-DEFAULT_LEAPFROGAI_MODEL = "llama-cpp-python"
-
-
-def get_model_name():
-    model_name = os.getenv("LEAPFROGAI_MODEL")
-    if model_name is None:
-        warnings.warn(
-            f"LEAPFROGAI_MODEL environment variable not set. Defaulting to '{DEFAULT_LEAPFROGAI_MODEL}'.\n"
-            "Consider setting LEAPFROGAI_MODEL explicitly. Examples: 'vllm', 'repeater', 'llama-cpp-python'."
-        )
-        model_name = DEFAULT_LEAPFROGAI_MODEL
-    return model_name
-
-
-@pytest.fixture
-def model_name():
-    return get_model_name()
+# Test generation parameters
+SYSTEM_PROMPT = "You are a helpful assistant."
+USER_PROMPT = "Only return 1 word"
+MAX_TOKENS = 128
+TEMPERATURE = 0
 
 
 def test_chat_completions(client: OpenAI, model_name: str):
     messages: Iterable[ChatCompletionMessageParam] = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "What is your name?"},
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": USER_PROMPT},
     ]
 
     chat_completion = client.chat.completions.create(
         model=model_name,
         messages=messages,
-        max_tokens=128,
+        max_tokens=MAX_TOKENS,
+        temperature=TEMPERATURE,
     )
     assert chat_completion.model == model_name
     assert len(chat_completion.choices) == 1
@@ -51,8 +38,9 @@ def test_chat_completions(client: OpenAI, model_name: str):
 def test_completions(client: OpenAI, model_name: str):
     completion = client.completions.create(
         model=model_name,
-        prompt="Only return 1 word",
-        max_tokens=128,
+        prompt=USER_PROMPT,
+        max_tokens=MAX_TOKENS,
+        temperature=TEMPERATURE,
     )
     assert completion.model == model_name
     assert len(completion.choices) == 1
@@ -72,7 +60,8 @@ def test_embeddings(client: OpenAI, model_name: str):
 def test_transcriptions(client: OpenAI, model_name: str):
     with pytest.raises(InternalServerError) as excinfo:
         client.audio.transcriptions.create(
-            model=model_name, file=Path("tests/data/0min12sec.wav")
+            model=model_name,
+            file=data_path(WAV_FILE),
         )
 
     assert str(excinfo.value) == "Internal Server Error"
diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py
index 1e98f2ec4..c9302c6be 100644
--- a/tests/e2e/test_supabase.py
+++ b/tests/e2e/test_supabase.py
@@ -17,7 +17,7 @@
 
 from leapfrogai_api.data.crud_vector_store_file import CRUDVectorStoreFile
 
-from .utils import ANON_KEY, create_test_user, SERVICE_KEY
+from tests.utils.client import ANON_KEY, create_test_user, SERVICE_KEY
 from openai.types import FileObject
 
 health_urls = {
diff --git a/tests/e2e/test_text_backend_full.py b/tests/e2e/test_text_backend_full.py
index fdee17172..d1f28bcf4 100644
--- a/tests/e2e/test_text_backend_full.py
+++ b/tests/e2e/test_text_backend_full.py
@@ -21,7 +21,7 @@ def download_arxiv_pdf():
         )
 
 
-def test_run_with_background_task(client: OpenAI):
+def test_run_with_background_task(client: OpenAI, model_name: str):
     """
     This test confirms whether a vector store for an assistant can index files
     while chatting at the same time.
@@ -52,7 +52,7 @@ def test_run_with_background_task(client: OpenAI):
 
     # Create an assistant
     assistant = client.beta.assistants.create(
-        model="llama-cpp-python",
+        model=model_name,
         name="Test Assistant",
         instructions="You are a helpful assistant with access to a knowledge base about AI and machine learning.",
         tools=[{"type": "file_search"}],
diff --git a/tests/e2e/test_text_embeddings.py b/tests/e2e/test_text_embeddings.py
index 1912228e1..23fdb4571 100644
--- a/tests/e2e/test_text_embeddings.py
+++ b/tests/e2e/test_text_embeddings.py
@@ -1,7 +1,6 @@
-from pathlib import Path
-
 import pytest
 from openai import InternalServerError, OpenAI
+from tests.utils.data_path import data_path, WAV_FILE
 
 model_name = "text-embeddings"
 
@@ -41,6 +40,7 @@ def test_embeddings(client: OpenAI):
 def test_transcriptions(client: OpenAI):
     with pytest.raises(InternalServerError) as excinfo:
         client.audio.transcriptions.create(
-            model=model_name, file=Path("tests/data/0min12sec.wav")
+            model=model_name,
+            file=data_path(WAV_FILE),
         )
     assert str(excinfo.value) == "Internal Server Error"
diff --git a/tests/e2e/test_whisper.py b/tests/e2e/test_whisper.py
index bae880039..3b5256f91 100644
--- a/tests/e2e/test_whisper.py
+++ b/tests/e2e/test_whisper.py
@@ -4,6 +4,7 @@
 import pytest
 from openai import InternalServerError, OpenAI
 import unicodedata
+from tests.utils.data_path import data_path, WAV_FILE, WAV_FILE_ARABIC
 
 
 def test_completions(client: OpenAI):
@@ -38,7 +39,7 @@ def test_embeddings(client: OpenAI):
 def test_transcriptions(client: OpenAI):
     transcription = client.audio.transcriptions.create(
         model="whisper",
-        file=Path("tests/data/0min12sec.wav"),
+        file=data_path(WAV_FILE),
         language="en",
         prompt="This is a test transcription.",
         response_format="json",
@@ -53,7 +54,7 @@ def test_transcriptions(client: OpenAI):
 def test_translations(client: OpenAI):
     translation = client.audio.translations.create(
         model="whisper",
-        file=Path("tests/data/arabic-audio.wav"),
+        file=data_path(WAV_FILE_ARABIC),
         prompt="This is a test translation.",
         response_format="json",
         temperature=0.0,
@@ -79,7 +80,7 @@ def test_non_english_transcription(client: OpenAI):
     # Arabic transcription
     arabic_transcription = client.audio.transcriptions.create(
         model="whisper",
-        file=Path("tests/data/arabic-audio.wav"),
+        file=data_path(WAV_FILE_ARABIC),
         response_format="json",
         temperature=0.5,
         timestamp_granularities=["word", "segment"],
diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py
deleted file mode 100644
index 32eb8daff..000000000
--- a/tests/e2e/utils.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import json
-import logging
-import os
-import traceback
-import pytest
-import requests
-
-# This is the anon_key for supabase, it provides access to the endpoints that would otherwise be inaccessible
-ANON_KEY = os.environ["ANON_KEY"]
-SERVICE_KEY = os.environ["SERVICE_KEY"]
-DEFAULT_TEST_EMAIL = "fakeuser1@test.com"
-DEFAULT_TEST_PASSWORD = "password"
-
-
-def create_test_user(
-    anon_key: str = ANON_KEY,
-    email: str = DEFAULT_TEST_EMAIL,
-    password: str = DEFAULT_TEST_PASSWORD,
-) -> str:
-    headers = {
-        "apikey": f"{anon_key}",
-        "Authorization": f"Bearer {anon_key}",
-        "Content-Type": "application/json",
-    }
-
-    try:
-        requests.post(
-            url="https://supabase-kong.uds.dev/auth/v1/signup",
-            headers=headers,
-            json={
-                "email": email,
-                "password": password,
-                "confirmPassword": password,
-            },
-        )
-    except Exception:
-        logging.error(
-            "Error creating user (likely because the user already exists): %s",
-            traceback.format_exc(),
-        )
-
-    return get_jwt_token(anon_key, email, password)
-
-
-def get_jwt_token(
-    api_key: str,
-    test_email: str = DEFAULT_TEST_EMAIL,
-    test_password: str = DEFAULT_TEST_PASSWORD,
-) -> str:
-    url = "https://supabase-kong.uds.dev/auth/v1/token?grant_type=password"
-    headers = {"apikey": f"{api_key}", "Content-Type": "application/json"}
-    data = {"email": test_email, "password": test_password}
-
-    response = requests.post(url, headers=headers, json=data)
-    if response.status_code != 200:
-        pytest.fail(
-            f"Request for the JWT token failed with status code {response.status_code} expected 200",
-            False,
-        )
-
-    return json.loads(response.content)["access_token"]
diff --git a/tests/integration/api/routes/leapfrogai/test_vector_stores.py b/tests/integration/api/routes/leapfrogai/test_vector_stores.py
new file mode 100644
index 000000000..dbd92d60e
--- /dev/null
+++ b/tests/integration/api/routes/leapfrogai/test_vector_stores.py
@@ -0,0 +1,66 @@
+from leapfrogai_api.typedef.vectorstores import SearchItem
+from tests.utils.client import client_config_factory
+from tests.utils.data_path import data_path, TXT_DATA_FILE
+from leapfrogai_api.typedef.vectorstores import SearchResponse
+from leapfrogai_api.typedef.vectorstores import Vector
+import pytest
+from tests.utils.client import LeapfrogAIClient
+from fastapi import status
+
+
+@pytest.fixture(scope="session")
+def leapfrogai_client():
+    return LeapfrogAIClient()
+
+
+@pytest.fixture(scope="session")
+def make_test_vector_store():
+    config = client_config_factory("leapfrogai")
+    client = config.client
+    vector_store = client.beta.vector_stores.create(name="Test data")
+
+    with open(data_path(TXT_DATA_FILE), "rb") as file:
+        client.beta.vector_stores.files.upload(
+            vector_store_id=vector_store.id, file=file
+        )
+
+    yield vector_store
+
+    # Clean up
+    client.beta.vector_stores.delete(vector_store_id=vector_store.id)
+
+
+@pytest.fixture(scope="session")
+def make_test_search_response(leapfrogai_client, make_test_vector_store):
+    params = {
+        "query": "Who is Sam?",
+        "vector_store_id": make_test_vector_store.id,
+    }
+
+    return leapfrogai_client.post(
+        endpoint="/leapfrogai/v1/vector_stores/search", params=params
+    )
+
+
+def test_search(make_test_search_response):
+    """Test that the search endpoint returns a valid response."""
+    search_response = make_test_search_response
+    assert search_response.status_code == status.HTTP_200_OK
+    assert len(search_response.json()) > 0
+    assert SearchResponse.model_validate(search_response.json())
+
+
+def test_get_vector(leapfrogai_client, make_test_search_response):
+    """Test that the get vector endpoint returns a valid response."""
+
+    search_response = SearchResponse.model_validate(make_test_search_response.json())
+    search_item = SearchItem.model_validate(search_response.data[0])
+    vector_id = search_item.id
+
+    get_vector_response = leapfrogai_client.get(
+        f"/leapfrogai/v1/vector_stores/vector/{vector_id}"
+    )
+
+    assert get_vector_response.status_code == status.HTTP_200_OK
+    assert len(get_vector_response.json()) > 0
+    assert Vector.model_validate(get_vector_response.json())
diff --git a/tests/integration/api/test_assistants.py b/tests/integration/api/test_assistants.py
index deb341904..06c0444af 100644
--- a/tests/integration/api/test_assistants.py
+++ b/tests/integration/api/test_assistants.py
@@ -21,6 +21,7 @@
     CreateAssistantRequest,
     ModifyAssistantRequest,
 )
+from tests.utils.data_path import data_path, TXT_FILE
 
 INSTRUCTOR_XL_EMBEDDING_SIZE: int = 768
 
@@ -92,9 +93,7 @@ class MissingEnvironmentVariable(Exception):
 def read_testfile():
     """Read the test file content."""
 
-    with open(
-        os.path.dirname(__file__) + "/../../../tests/data/test.txt", "rb"
-    ) as testfile:
+    with open(data_path(TXT_FILE), "rb") as testfile:
         testfile_content = testfile.read()
 
     return testfile_content
@@ -109,7 +108,7 @@ def create_file(read_testfile):  # pylint: disable=redefined-outer-name, unused-
 
     file_response = files_client.post(
         "/openai/v1/files",
-        files={"file": ("test.txt", read_testfile, "text/plain")},
+        files={"file": (TXT_FILE, read_testfile, "text/plain")},
         data={"purpose": "assistants"},
     )
 
diff --git a/tests/integration/api/test_files.py b/tests/integration/api/test_files.py
index 1e7bb51e4..1a0711184 100644
--- a/tests/integration/api/test_files.py
+++ b/tests/integration/api/test_files.py
@@ -8,6 +8,7 @@
 
 from leapfrogai_api.backend.rag.document_loader import load_file, split
 from leapfrogai_api.routers.openai.files import router
+from tests.utils.data_path import data_path, WAV_FILE, TXT_FILE, PPTX_FILE, XLSX_FILE
 
 file_response: Response
 testfile_content: bytes
@@ -34,7 +35,7 @@ class MissingEnvironmentVariable(Exception):
 def read_testfile():
     """Read the test file content."""
     global testfile_content  # pylint: disable=global-statement
-    with open(os.path.dirname(__file__) + "/../../data/test.txt", "rb") as testfile:
+    with open(data_path(TXT_FILE), "rb") as testfile:
         testfile_content = testfile.read()
 
 
@@ -46,7 +47,7 @@ def create_file(read_testfile):  # pylint: disable=redefined-outer-name, unused-
 
     file_response = client.post(
         "/openai/v1/files",
-        files={"file": ("test.txt", testfile_content, "text/plain")},
+        files={"file": (TXT_FILE, testfile_content, "text/plain")},
         data={"purpose": "assistants"},
     )
 
@@ -132,15 +133,11 @@ def test_get_nonexistent():
 def test_invalid_file_type():
     """Test creating uploading an invalid file type."""
 
-    file_path = "../../../tests/data/0min12sec.wav"
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    relative_file_path = os.path.join(dir_path, file_path)
-
     with pytest.raises(HTTPException) as exception:
-        with open(relative_file_path, "rb") as testfile:
+        with open(data_path(WAV_FILE), "rb") as testfile:
             _ = client.post(
                 "/openai/v1/files",
-                files={"file": ("0min12sec.wav", testfile, "audio/wav")},
+                files={"file": (WAV_FILE, testfile, "audio/wav")},
                 data={"purpose": "assistants"},
             )
             assert exception.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE
@@ -149,16 +146,8 @@ def test_invalid_file_type():
 @pytest.mark.asyncio
 async def test_excel_file_handling():
     """Test handling of an Excel file including upload, retrieval, and deletion."""
-    # Path to the test Excel file
-    excel_file_path = os.path.join(os.path.dirname(__file__), "../../data/test.xlsx")
-
-    # Ensure the file exists
-    assert os.path.exists(
-        excel_file_path
-    ), f"Test Excel file not found at {excel_file_path}"
-
     # Test file loading and splitting
-    documents = await load_file(excel_file_path)
+    documents = await load_file(data_path(XLSX_FILE))
     assert len(documents) > 0, "No documents were loaded from the Excel file"
     assert documents[0].page_content, "The first document has no content"
 
@@ -167,12 +156,12 @@ async def test_excel_file_handling():
     assert split_documents[0].page_content, "The first split document has no content"
 
     # Test file upload via API
-    with open(excel_file_path, "rb") as excel_file:
+    with open(data_path(XLSX_FILE), "rb") as excel_file:
         response = client.post(
             "/openai/v1/files",
             files={
                 "file": (
-                    "test.xlsx",
+                    XLSX_FILE,
                     excel_file,
                     "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                 )
@@ -228,16 +217,9 @@ async def test_excel_file_handling():
 @pytest.mark.asyncio
 async def test_powerpoint_file_handling():
     """Test handling of a PowerPoint file including upload, retrieval, and deletion."""
-    # Path to the test PowerPoint file
-    pptx_file_path = os.path.join(os.path.dirname(__file__), "../../data/test.pptx")
-
-    # Ensure the file exists
-    assert os.path.exists(
-        pptx_file_path
-    ), f"Test PowerPoint file not found at {pptx_file_path}"
 
     # Test file loading and splitting
-    documents = await load_file(pptx_file_path)
+    documents = await load_file(data_path(PPTX_FILE).__str__())
     assert len(documents) > 0, "No documents were loaded from the PowerPoint file"
     assert documents[0].page_content, "The first document has no content"
 
@@ -246,13 +228,13 @@ async def test_powerpoint_file_handling():
     assert split_documents[0].page_content, "The first split document has no content"
 
     # Test file upload via API
-    with open(pptx_file_path, "rb") as pptx_file:
+    with open(data_path(PPTX_FILE), "rb") as file:
         response = client.post(
             "/openai/v1/files",
             files={
                 "file": (
-                    "test.pptx",
-                    pptx_file,
+                    PPTX_FILE,
+                    file,
                     "application/vnd.openxmlformats-officedocument.presentationml.presentation",
                 )
             },
diff --git a/tests/integration/api/test_rag_files.py b/tests/integration/api/test_rag_files.py
index 9ed2ad28c..7520ddbcc 100644
--- a/tests/integration/api/test_rag_files.py
+++ b/tests/integration/api/test_rag_files.py
@@ -1,9 +1,13 @@
 import os
-from pathlib import Path
+from typing import Optional
+
+import requests
 from openai.types.beta.threads.text import Text
 import pytest
+from tests.utils.data_path import data_path
 
-from ...utils.client import client_config_factory
+from leapfrogai_api.typedef.rag.rag_types import ConfigurationPayload
+from tests.utils.client import client_config_factory, get_leapfrogai_api_url_base
 
 
 def make_test_assistant(client, model, vector_store_id):
@@ -33,7 +37,6 @@ def test_rag_needle_haystack():
     client = config.client
 
     vector_store = client.beta.vector_stores.create(name="Test data")
-    file_path = "../../data"
     file_names = [
         "test_rag_1.1.txt",
         "test_rag_1.2.txt",
@@ -44,9 +47,7 @@ def test_rag_needle_haystack():
     ]
     vector_store_files = []
     for file_name in file_names:
-        with open(
-            f"{Path(os.path.dirname(__file__))}/{file_path}/{file_name}", "rb"
-        ) as file:
+        with open(data_path(file_name), "rb") as file:
             vector_store_files.append(
                 client.beta.vector_stores.files.upload(
                     vector_store_id=vector_store.id, file=file
@@ -80,3 +81,66 @@ def test_rag_needle_haystack():
 
     for a in message_content.annotations:
         print(a.text)
+
+
+def configure_rag(
+    enable_reranking: bool,
+    ranking_model: str,
+    rag_top_k_when_reranking: int,
+):
+    """
+    Configures the RAG settings.
+
+    Args:
+        enable_reranking: Whether to enable reranking.
+        ranking_model: The ranking model to use.
+        rag_top_k_when_reranking: The top-k results to return before reranking.
+    """
+    url = f"{get_leapfrogai_api_url_base()}/leapfrogai/v1/rag/configure"
+    configuration = ConfigurationPayload(
+        enable_reranking=enable_reranking,
+        ranking_model=ranking_model,
+        rag_top_k_when_reranking=rag_top_k_when_reranking,
+    )
+
+    try:
+        response = requests.patch(url, json=configuration.model_dump())
+        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+        print("RAG configuration updated successfully.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error configuring RAG: {e}")
+
+
+def get_rag_configuration() -> Optional[ConfigurationPayload]:
+    """
+    Retrieves the current RAG configuration.
+
+    Args:
+        base_url: The base URL of the API.
+
+    Returns:
+        The RAG configuration, or None if there was an error.
+    """
+    url = f"{get_leapfrogai_api_url_base()}/leapfrogai/v1/rag/configure"
+
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        config = ConfigurationPayload.model_validate_json(response.text)
+        print(f"Current RAG configuration: {config}")
+        return config
+    except requests.exceptions.RequestException as e:
+        print(f"Error getting RAG configuration: {e}")
+        return None
+
+
+@pytest.mark.skipif(
+    os.environ.get("LFAI_RUN_NIAH_TESTS") != "true",
+    reason="LFAI_RUN_NIAH_TESTS envvar was not set to true",
+)
+def test_rag_needle_haystack_with_reranking():
+    configure_rag(True, "flashrank", 100)
+    config_result = get_rag_configuration()
+    assert config_result is not None
+    assert config_result.enable_reranking is True
+    test_rag_needle_haystack()
diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py
index f9c69f8e8..9a3be72a4 100644
--- a/tests/integration/api/test_vector_stores.py
+++ b/tests/integration/api/test_vector_stores.py
@@ -1,7 +1,6 @@
 """Test the API endpoints for assistants."""
 
 import json
-import os
 import time
 
 import pytest
@@ -19,6 +18,8 @@
 )
 from leapfrogai_api.routers.openai.vector_stores import router as vector_store_router
 from leapfrogai_api.routers.openai.files import router as files_router
+from tests.utils.client import create_test_user
+from tests.utils.data_path import data_path, TXT_FILE
 
 INSTRUCTOR_XL_EMBEDDING_SIZE: int = 768
 
@@ -36,11 +37,11 @@ class MissingEnvironmentVariable(Exception):
 headers: dict[str, str] = {}
 
 try:
-    headers = {"Authorization": f"Bearer {os.environ['SUPABASE_USER_JWT']}"}
+    headers = {"Authorization": f"Bearer {create_test_user()}"}
 except KeyError as exc:
     raise MissingEnvironmentVariable(
         "SUPABASE_USER_JWT must be defined for the test to pass. "
-        "Please check the api README for instructions on obtaining this token."
+        "Please check the packages/api and src/leapfrogai_api READMEs for instructions on obtaining this token."
     ) from exc
 
 vector_store_client = TestClient(vector_store_router, headers=headers)
@@ -52,9 +53,7 @@ class MissingEnvironmentVariable(Exception):
 def read_testfile():
     """Read the test file content."""
 
-    with open(
-        os.path.dirname(__file__) + "/../../../tests/data/test.txt", "rb"
-    ) as testfile:
+    with open(data_path(TXT_FILE), "rb") as testfile:
         testfile_content = testfile.read()
 
     return testfile_content
@@ -67,7 +66,7 @@ def create_file(read_testfile):  # pylint: disable=redefined-outer-name, unused-
 
     file_response = files_client.post(
         "/openai/v1/files",
-        files={"file": ("test.txt", read_testfile, "text/plain")},
+        files={"file": (TXT_FILE, read_testfile, "text/plain")},
         data={"purpose": "assistants"},
     )
 
diff --git a/tests/load/loadtest.py b/tests/load/loadtest.py
index 1a3bd8faa..745379e4e 100644
--- a/tests/load/loadtest.py
+++ b/tests/load/loadtest.py
@@ -8,6 +8,7 @@
 import warnings
 import tempfile
 import uuid
+from tests.utils.data_path import data_path, MP3_FILE_RUSSIAN
 
 # Suppress SSL-related warnings
 warnings.filterwarnings("ignore", category=Warning)
@@ -59,9 +60,7 @@ def download_arxiv_pdf():
 
 
 def load_audio_file():
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    file_path = os.path.join(script_dir, "..", "data", "russian.mp3")
-    with open(file_path, "rb") as file:
+    with open(data_path(MP3_FILE_RUSSIAN), "rb") as file:
         return file.read()
 
 
@@ -211,14 +210,14 @@ def test_embeddings(self):
     @task
     def test_transcribe(self):
         audio_content = load_audio_file()
-        files = {"file": ("russian.mp3", audio_content, "audio/mpeg")}
+        files = {"file": (MP3_FILE_RUSSIAN, audio_content, "audio/mpeg")}
         data = {"model": "whisper", "language": "ru"}
         self.client.post("/openai/v1/audio/transcriptions", files=files, data=data)
 
     @task
     def test_translate(self):
         audio_content = load_audio_file()
-        files = {"file": ("russian.mp3", audio_content, "audio/mpeg")}
+        files = {"file": (MP3_FILE_RUSSIAN, audio_content, "audio/mpeg")}
         data = {"model": "whisper"}
         self.client.post("/openai/v1/audio/translations", files=files, data=data)
 
diff --git a/tests/pytest/leapfrogai_api/test_api.py b/tests/pytest/leapfrogai_api/test_api.py
index 10fbe698b..ec6460fda 100644
--- a/tests/pytest/leapfrogai_api/test_api.py
+++ b/tests/pytest/leapfrogai_api/test_api.py
@@ -15,6 +15,7 @@
 from leapfrogai_api.typedef.embeddings import CreateEmbeddingRequest
 from leapfrogai_api.main import app
 from leapfrogai_api.routers.supabase_session import init_supabase_client
+from tests.utils.data_path import data_path, WAV_FILE, WAV_FILE_ARABIC
 
 security = HTTPBearer()
 
@@ -31,6 +32,7 @@
 )
 TEXT_INPUT_LEN = len(TEXT_INPUT)
 
+
 #########################
 #########################
 
@@ -65,12 +67,6 @@ async def pack_dummy_bearer_token(request: _CachedRequest, call_next):
     return await call_next(request)
 
 
-def load_audio_file(path: str):
-    file_path = os.path.join("tests", "data", path)
-    with open(file_path, "rb") as file:
-        return file.read()
-
-
 @pytest.fixture
 def dummy_auth_middleware():
     app.dependency_overrides[init_supabase_client] = mock_init_supabase_client
@@ -152,6 +148,7 @@ def test_routes():
         "/openai/v1/files": ["POST"],
         "/openai/v1/assistants": ["POST"],
         "/leapfrogai/v1/count/tokens": ["POST"],
+        "/leapfrogai/v1/rag/configure": ["GET", "PATCH"],
     }
 
     openai_routes = [
@@ -201,10 +198,14 @@ def test_routes():
     ]
 
     actual_routes = app.routes
-    for route in actual_routes:
-        if hasattr(route, "path") and route.path in expected_routes:
-            assert route.methods == set(expected_routes[route.path])
-            del expected_routes[route.path]
+    for expected_route in expected_routes:
+        matching_routes = {expected_route: []}
+        for actual_route in actual_routes:
+            if hasattr(actual_route, "path") and expected_route == actual_route.path:
+                matching_routes[actual_route.path].extend(actual_route.methods)
+        assert set(expected_routes[expected_route]) <= set(
+            matching_routes[expected_route]
+        )
 
     for route, name, methods in openai_routes:
         found = False
@@ -219,8 +220,6 @@ def test_routes():
                 break
         assert found, f"Missing route: {route}, {name}, {methods}"
 
-    assert len(expected_routes) == 0
-
 
 def test_healthz():
     """Test the healthz endpoint."""
@@ -269,13 +268,12 @@ def test_transcription(dummy_auth_middleware):
     expected_transcription = "The repeater model received a transcribe request"
 
     with TestClient(app) as client:
-        audio_filename = "0min12sec.wav"
-        audio_content = load_audio_file(audio_filename)
-        files = {"file": (audio_filename, audio_content, "audio/mpeg")}
-        data = {"model": MODEL}
-        response = client.post(
-            "/openai/v1/audio/transcriptions", files=files, data=data
-        )
+        with open(data_path(WAV_FILE), "rb") as audio_content:
+            files = {"file": (WAV_FILE, audio_content, "audio/mpeg")}
+            data = {"model": MODEL}
+            response = client.post(
+                "/openai/v1/audio/transcriptions", files=files, data=data
+            )
 
         assert response.status_code == 200
 
@@ -292,11 +290,12 @@ def test_translation(dummy_auth_middleware):
     expected_translation = "The repeater model received a translation request"
 
     with TestClient(app) as client:
-        audio_filename = "arabic-audio.wav"
-        audio_content = load_audio_file(audio_filename)
-        files = {"file": (audio_filename, audio_content, "audio/mpeg")}
-        data = {"model": MODEL}
-        response = client.post("/openai/v1/audio/translations", files=files, data=data)
+        with open(data_path(WAV_FILE_ARABIC), "rb") as audio_content:
+            files = {"file": (WAV_FILE_ARABIC, audio_content, "audio/mpeg")}
+            data = {"model": MODEL}
+            response = client.post(
+                "/openai/v1/audio/translations", files=files, data=data
+            )
 
         assert response.status_code == 200
 
@@ -540,3 +539,55 @@ def test_token_count(dummy_auth_middleware):
         assert "token_count" in response_data
         assert isinstance(response_data["token_count"], int)
         assert response_data["token_count"] == len(input_text)
+
+
+@pytest.mark.skipif(
+    os.environ.get("LFAI_RUN_REPEATER_TESTS") != "true"
+    or os.environ.get("DEV") != "true",
+    reason="LFAI_RUN_REPEATER_TESTS envvar was not set to true",
+)
+def test_configure(dummy_auth_middleware):
+    """Test the RAG configuration endpoints."""
+    with TestClient(app) as client:
+        rag_configuration_request = {
+            "enable_reranking": True,
+            "ranking_model": "rankllm",
+            "rag_top_k_when_reranking": 50,
+        }
+        response = client.patch(
+            "/leapfrogai/v1/rag/configure", json=rag_configuration_request
+        )
+        assert response.status_code == 200
+
+        response = client.get("/leapfrogai/v1/rag/configure")
+        assert response.status_code == 200
+        response_data = response.json()
+        assert "enable_reranking" in response_data
+        assert "ranking_model" in response_data
+        assert "rag_top_k_when_reranking" in response_data
+        assert isinstance(response_data["enable_reranking"], bool)
+        assert isinstance(response_data["ranking_model"], str)
+        assert isinstance(response_data["rag_top_k_when_reranking"], int)
+        assert response_data["enable_reranking"] is True
+        assert response_data["ranking_model"] == "rankllm"
+        assert response_data["rag_top_k_when_reranking"] == 50
+
+        # Update only some of the configs to see if the existing ones persist
+        rag_configuration_request = {"ranking_model": "flashrank"}
+        response = client.patch(
+            "/leapfrogai/v1/rag/configure", json=rag_configuration_request
+        )
+        assert response.status_code == 200
+
+        response = client.get("/leapfrogai/v1/rag/configure")
+        assert response.status_code == 200
+        response_data = response.json()
+        assert "enable_reranking" in response_data
+        assert "ranking_model" in response_data
+        assert "rag_top_k_when_reranking" in response_data
+        assert isinstance(response_data["enable_reranking"], bool)
+        assert isinstance(response_data["ranking_model"], str)
+        assert isinstance(response_data["rag_top_k_when_reranking"], int)
+        assert response_data["enable_reranking"] is True
+        assert response_data["ranking_model"] == "flashrank"
+        assert response_data["rag_top_k_when_reranking"] == 50
diff --git a/tests/utils/client.py b/tests/utils/client.py
index 0baf6c0dc..0016f8c4c 100644
--- a/tests/utils/client.py
+++ b/tests/utils/client.py
@@ -1,30 +1,234 @@
+import json
+import logging
+import traceback
+from urllib.parse import urljoin
 from openai import OpenAI
 import os
-from pathlib import Path
+import pytest
+import requests
+from requests import Response
+from fastapi import status
 
+ANON_KEY = os.environ["ANON_KEY"]
+SERVICE_KEY = os.environ["SERVICE_KEY"]
+DEFAULT_TEST_EMAIL = "test-user@test.com"
+DEFAULT_TEST_PASSWORD = "password"
 
-LEAPFROGAI_MODEL = os.getenv("LEAPFROGAI_MODEL", "llama-cpp-python")
-OPENAI_MODEL = "gpt-4o-mini"
 
+def get_supabase_url() -> str:
+    """Get the URL for Supabase.
 
-def text_file_path():
-    return Path(os.path.dirname(__file__) + "/../data/test_with_data.txt")
+    Returns:
+        str: The URL for Supabase. (default: "https://supabase-kong.uds.dev")
+    """
 
+    return os.getenv("SUPABASE_URL", "https://supabase-kong.uds.dev")
 
-def openai_client():
-    return OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
+def create_test_user(
+    anon_key: str = ANON_KEY,
+    email: str = DEFAULT_TEST_EMAIL,
+    password: str = DEFAULT_TEST_PASSWORD,
+) -> str:
+    """
+    Create a test user in the authentication system.
 
-def leapfrogai_client():
+    This function attempts to create a new user with the given email and password using the specified
+    anonymous API key. If the user already exists, the error is logged. It returns the JWT token
+    for the created or existing user.
+
+    Args:
+        anon_key (str): The anonymous API key for authentication service.
+        email (str): The email address of the test user. Default is "fakeuser1@test.com".
+        password (str): The password for the test user. Default is "password".
+
+    Returns:
+        str: The JWT token for the created or existing user.
+    """
+    supabase_base_url = get_supabase_url()
+
+    headers = {
+        "apikey": f"{anon_key}",
+        "Authorization": f"Bearer {anon_key}",
+        "Content-Type": "application/json",
+    }
+
+    try:
+        requests.post(
+            url=f"{supabase_base_url}/auth/v1/signup",
+            headers=headers,
+            json={
+                "email": email,
+                "password": password,
+                "confirmPassword": password,
+            },
+        )
+    except Exception:
+        logging.error(
+            "Error creating user (likely because the user already exists): %s",
+            traceback.format_exc(),
+        )
+
+    return get_jwt_token(supabase_base_url, anon_key, email, password)
+
+
+def get_jwt_token(
+    supabase_base_url: str,
+    api_key: str,
+    test_email: str = DEFAULT_TEST_EMAIL,
+    test_password: str = DEFAULT_TEST_PASSWORD,
+) -> str:
+    """
+    Retrieve a JWT token for a test user using email and password.
+
+    This function sends a request to the authentication service to obtain a JWT token using
+    the provided API key, email, and password.
+
+    Args:
+        api_key (str): The API key for the authentication service.
+        test_email (str): The email address of the test user. Default is "fakeuser1@test.com".
+        test_password (str): The password for the test user. Default is "password".
+
+    Returns:
+        str: The JWT access token for the authenticated user.
+
+    Raises:
+        AssertionError: If the request fails or the response status code is not 200.
+    """
+
+    url = f"{supabase_base_url}/auth/v1/token?grant_type=password"
+    headers = {"apikey": f"{api_key}", "Content-Type": "application/json"}
+    data = {"email": test_email, "password": test_password}
+
+    response = requests.post(url, headers=headers, json=data)
+    if response.status_code != status.HTTP_200_OK:
+        pytest.fail(
+            f"Request for the JWT token failed with status code {response.status_code} expected 200",
+            False,
+        )
+
+    return json.loads(response.content)["access_token"]
+
+
+def get_leapfrogai_model() -> str:
+    """Get the model to use for LeapfrogAI.
+
+    Returns:
+        str: The model to use for LeapfrogAI. (default: "vllm")
+    """
+
+    model = os.getenv("LEAPFROGAI_MODEL")
+
+    if not model:
+        model = "vllm"
+        logging.warning(
+            f"LEAPFROGAI_MODEL is not set, using default model of `{model}`"
+        )
+
+    return model
+
+
+def get_openai_key() -> str:
+    """Get the API key for OpenAI.
+
+    Returns:
+        str: The API key for OpenAI.
+
+    Raises:
+        ValueError: If OPENAI_API_KEY is not set.
+    """
+
+    api_key = os.getenv("OPENAI_API_KEY")
+    if api_key is None:
+        raise ValueError("OPENAI_API_KEY not set")
+
+    return api_key
+
+
+def get_openai_model() -> str:
+    """Get the model to use for OpenAI.
+
+    Returns:
+        str: The model to use for OpenAI. (default: "gpt-4o-mini")
+    """
+
+    return os.getenv("OPENAI_MODEL", "gpt-4o-mini")
+
+
+def get_leapfrogai_api_key() -> str:
+    """Get the API key for the LeapfrogAI API.
+
+    Set via the LEAPFROGAI_API_KEY environment variable or the SUPABASE_USER_JWT environment variable in that order.
+
+    Returns:
+        str: The API key for the LeapfrogAI API.
+
+    Raises:
+        ValueError: If LEAPFROGAI_API_KEY or SUPABASE_USER_JWT is not set.
+    """
+
+    api_key = os.getenv("LEAPFROGAI_API_KEY") or os.getenv("SUPABASE_USER_JWT")
+
+    if not api_key:
+        logging.warning(
+            "LEAPFROGAI_API_KEY or SUPABASE_USER_JWT not set, automatically generating test user."
+        )
+        return create_test_user()
+
+    return api_key
+
+
+def get_leapfrogai_api_url() -> str:
+    """Get the URL for the LeapfrogAI API.
+
+    Returns:
+        str: The URL for the LeapfrogAI API. (default: "https://leapfrogai-api.uds.dev/openai/v1")
+    """
+
+    return os.getenv("LEAPFROGAI_API_URL", "https://leapfrogai-api.uds.dev/openai/v1")
+
+
+def get_leapfrogai_api_url_base() -> str:
+    """Get the base URL for the LeapfrogAI API.
+
+    Set via the LEAPFROGAI_API_URL environment variable.
+
+    If LEAPFROGAI_API_URL is set to "https://leapfrogai-api.uds.dev/openai/v1", this will trim off the "/openai/v1" part.
+
+    Returns:
+        str: The base URL for the LeapfrogAI API. (default: "https://leapfrogai-api.uds.dev")
+    """
+
+    url = os.getenv("LEAPFROGAI_API_URL", "https://leapfrogai-api.uds.dev")
+    if url.endswith("/openai/v1"):
+        return url[:-9]
+    return url
+
+
+def openai_client() -> OpenAI:
+    """Create an OpenAI client using the OPENAI_API_KEY.
+
+    returns:
+        OpenAI: An OpenAI client.
+    """
+    return OpenAI(api_key=get_openai_key())
+
+
+def leapfrogai_client() -> OpenAI:
+    """Create an OpenAI client using the LEAPFROGAI_API_URL and LEAPFROGAI_API_KEY or SUPABASE_USER_JWT.
+
+    returns:
+        OpenAI: An OpenAI client.
+    """
     return OpenAI(
-        base_url=os.getenv(
-            "LEAPFROGAI_API_URL", "https://leapfrogai-api.uds.dev/openai/v1"
-        ),
-        api_key=os.getenv("SUPABASE_USER_JWT"),
+        base_url=get_leapfrogai_api_url(),
+        api_key=get_leapfrogai_api_key(),
     )
 
 
 class ClientConfig:
+    """Configuration for a client that is OpenAI compliant."""
+
     client: OpenAI
     model: str
 
@@ -34,9 +238,54 @@ def __init__(self, client: OpenAI, model: str):
 
 
 def client_config_factory(client_name: str) -> ClientConfig:
+    """Factory function for creating a client configuration that is OpenAI compliant."""
     if client_name == "openai":
-        return ClientConfig(client=openai_client(), model=OPENAI_MODEL)
+        return ClientConfig(client=openai_client(), model=get_openai_model())
     elif client_name == "leapfrogai":
-        return ClientConfig(client=leapfrogai_client(), model=LEAPFROGAI_MODEL)
+        return ClientConfig(client=leapfrogai_client(), model=get_leapfrogai_model())
     else:
         raise ValueError(f"Unknown client name: {client_name}")
+
+
+class LeapfrogAIClient:
+    """Client for handling queries in the LeapfrogAI namespace that are not handled by the OpenAI SDK.
+
+    Wraps the requests library to make HTTP requests to the LeapfrogAI API.
+
+    Raises:
+        requests.HTTPError: If the response status code is not a 2xx status code.
+    """
+
+    def __init__(self, base_url: str | None = None, api_key: str | None = None):
+        self.base_url = base_url or get_leapfrogai_api_url_base()
+        self.api_key = api_key or get_leapfrogai_api_key()
+        self.headers = {
+            "accept": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+
+    def get(self, endpoint, **kwargs) -> Response | None:
+        url = urljoin(self.base_url, endpoint)
+        response = requests.get(url, headers=self.headers, **kwargs)
+        return self._handle_response(response)
+
+    def post(self, endpoint, **kwargs) -> Response | None:
+        url = urljoin(self.base_url, endpoint)
+        response = requests.post(url, headers=self.headers, **kwargs)
+        return self._handle_response(response)
+
+    def put(self, endpoint, **kwargs) -> Response | None:
+        url = urljoin(self.base_url, endpoint)
+        response = requests.put(url, headers=self.headers, **kwargs)
+        return self._handle_response(response)
+
+    def delete(self, endpoint, **kwargs) -> Response | None:
+        url = urljoin(self.base_url, endpoint)
+        response = requests.delete(url, headers=self.headers, **kwargs)
+        return self._handle_response(response)
+
+    def _handle_response(self, response) -> Response | None:
+        response.raise_for_status()
+        if response.content:
+            return response
+        return None
diff --git a/tests/utils/data_path.py b/tests/utils/data_path.py
new file mode 100644
index 000000000..88f6a1db4
--- /dev/null
+++ b/tests/utils/data_path.py
@@ -0,0 +1,35 @@
+import os
+from pathlib import Path
+
+TXT_FILE = "test.txt"
+TXT_DATA_FILE = "test_with_data.txt"
+PPTX_FILE = "test.pptx"
+WAV_FILE = "0min12sec.wav"
+WAV_FILE_ARABIC = "arabic-audio.wav"
+MP3_FILE_RUSSIAN = "russian.mp3"
+XLSX_FILE = "test.xlsx"
+
+
+def data_path(filename: str) -> Path:
+    """Return the path to a test file in the data directory. (See constants for specific files.)
+
+    Args:
+        filename (str): The name of the file to return the path.
+
+    Returns:
+        Path: The path to the file in the data directory.
+
+    Raises:
+        FileNotFoundError: If the file does not exist in the data directory.
+    """
+
+    data_path = Path(
+        os.path.realpath(os.path.dirname(__file__) + f"/../data/{filename}")
+    )
+
+    try:
+        # Check if the file exists
+        with open(data_path, "r"):
+            return data_path
+    except FileNotFoundError:
+        raise FileNotFoundError(f"File not found in data directory: {data_path}")
diff --git a/website/.github/workflows/ci.yaml b/website/.github/workflows/ci.yaml
index fe692d9fc..425d75434 100644
--- a/website/.github/workflows/ci.yaml
+++ b/website/.github/workflows/ci.yaml
@@ -11,16 +11,17 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout Repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
 
       - name: Setup Hugo
-        uses: peaceiris/actions-hugo@v2
+        uses: peaceiris/actions-hugo@16361eb4acea8698b220b76c0d4e84e1fd22c61d # v2.6.0
         with:
           hugo-version: 'latest'
           extended: true
 
-      - name: Run Tests
-        uses: actions/setup-node@v4
+      - name: Setup Node
+        uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
         with:
           node-version: 'lts/Hydrogen'
           cache: 'npm'
diff --git a/website/.github/workflows/release.yaml b/website/.github/workflows/release.yaml
index 916fbd0f3..42b9b2548 100644
--- a/website/.github/workflows/release.yaml
+++ b/website/.github/workflows/release.yaml
@@ -17,7 +17,7 @@ jobs:
     steps:
       - name: Create release tag
         id: tag
-        uses: google-github-actions/release-please-action@v4
+        uses: google-github-actions/release-please-action@e4dc86ba9405554aeba3c6bb2d169500e7d3b4ee # v4.1.1
         with:
           release-type: node
           package-name: uds