Add GitHub Actions health check workflow

Introduces a new GitHub Actions workflow to periodically check service health and version information for specified endpoints. The workflow includes: - Scheduled runs every 5 minutes - Checks version information - Monitors service status - Raises an error if any services are down
mozilla · Jan 29, 2025 · 84cd126 · 84cd126
1 parent 176f50a
commit 84cd126
Show file tree

Hide file tree

Showing 2 changed files with 137 additions and 0 deletions.
diff --git a/.github/workflows/health_check.yml b/.github/workflows/health_check.yml
@@ -0,0 +1,64 @@
+name: Health Check
+
+on:
+  pull_request:
+  workflow_dispatch:
+  schedule:
+    # Every 5 minutes
+    - cron: '*/5 * * * *'
+
+jobs:
+  health-check:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        environment: ['dev', 'stage', 'prod']
+      fail-fast: false
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+
+      - name: Run Health Checks
+        shell: bash
+        run: |
+          set -xe
+
+          environment="${{ matrix.environment }}"
+          output_file="out.json"
+          ./scripts/health_check.py --env $environment --verbose --output $output_file
+
+          version=$(cat $output_file | jq -r '.version')
+          monitors=$(cat $output_file | jq -r '.monitors')
+
+          echo "Version: $version"
+          echo "Monitors: $monitors"
+
+          if [ "$version" = "null" ] || [ "$monitors" = "null" ]; then
+            echo "Environment $environment is not reachable"
+            exit 1
+          fi
+
+          message=""
+
+          data=$(echo $monitors | jq -r 'to_entries[] | select(.value.state == false) | .key')
+          for monitor in $data; do
+            message="$message\n- $monitor: $(echo $monitors | jq -r ".[\"$monitor\"].status")"
+          done
+
+          echo "Environment: $environment"
+          echo "$message"
+
+
+
+
diff --git a/scripts/health_check.py b/scripts/health_check.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+import argparse
+from enum import Enum
+import json
+
+import requests
+
+ENV_ENUM = Enum("ENV", [
+    ("dev", "https://addons-dev.allizom.org"),
+    ("stage", "https://addons.allizom.org"),
+    ("prod", "https://addons.mozilla.org")
+])
+
+class Fetcher:
+    def __init__(self, env: ENV_ENUM, verbose: bool = False):
+        self.environment = ENV_ENUM[env]
+        self.verbose = verbose
+
+    def _fetch(self, path: str) -> dict[str, str] | None:
+        url = f"{self.environment.value}/{path}"
+        if self.verbose:
+            print(f"Requesting {url} for {self.environment.name}")
+
+        try:
+          response = requests.get(url)
+          response.raise_for_status()
+          try:
+              data = response.json()
+          except json.JSONDecodeError as e:
+              if self.verbose:
+                  print(f"Error decoding JSON for {url}: {e}")
+
+        except requests.exceptions.HTTPError as e:
+            if self.verbose:
+                print(f"Error fetching {url}: {e}")
+
+        if self.verbose and data is not None:
+            print(json.dumps(data, indent=2))
+
+        return data
+
+    def version(self):
+        return self._fetch('__version__')
+
+    def monitors(self):
+        return self._fetch('services/monitor.json')
+
+def main(env: ENV_ENUM, verbose: bool = False, output: str | None = None):
+    fetcher = Fetcher(env, verbose)
+
+    version_data = fetcher.version()
+    monitors_data = fetcher.monitors()
+
+    if output:
+        with open(output, 'w') as f:
+            json.dump({
+                'version': version_data,
+                'monitors': monitors_data
+            }, f, indent=2)
+    elif monitors_data is not None:
+        if any(monitor['state'] is False for monitor in monitors_data.values()):
+            raise ValueError(f'Some monitors are failing {monitors_data}')
+
+
+if __name__ == "__main__":
+    args = argparse.ArgumentParser()
+    args.add_argument("--env", type=str, choices=list(ENV_ENUM.__members__.keys()), required=True)
+    args.add_argument("--verbose", action="store_true")
+    args.add_argument("--output", type=str, required=False)
+    args = args.parse_args()
+
+    main(args.env, args.verbose, args.output)