From 3666e151f2168161313a6d4bc9d6ccf021ae8eba Mon Sep 17 00:00:00 2001 From: ivan Date: Wed, 8 Jan 2025 15:53:51 +0100 Subject: [PATCH] workflow --- .github/workflows/watch-repos.yml | 370 +++++++++++++----------------- 1 file changed, 165 insertions(+), 205 deletions(-) diff --git a/.github/workflows/watch-repos.yml b/.github/workflows/watch-repos.yml index d7b98df..d17a70e 100644 --- a/.github/workflows/watch-repos.yml +++ b/.github/workflows/watch-repos.yml @@ -1,5 +1,6 @@ name: Process Repository Changes -'on': + +on: push: branches: - main @@ -14,86 +15,66 @@ name: Process Repository Changes required: true type: boolean default: false + jobs: process-changes: if: >- - github.event_name == 'push' || (github.event_name == 'pull_request' && - github.event.pull_request.merged == true) || github.event_name == - 'workflow_dispatch' + github.event_name == 'push' || + (github.event_name == 'pull_request' && github.event.pull_request.merged == true) || + github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Install yq - run: > - sudo wget - https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 - -O /usr/local/bin/yq + - name: Install yq + run: | + sudo wget https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 -O /usr/local/bin/yq sudo chmod +x /usr/local/bin/yq - yq --version + - name: Load Configuration id: config env: OSIRIS_URL: 'https://osiris-server.vercel.app' - run: > - # Install yq if not present - - # sudo wget - https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 - -O /usr/local/bin/yq - - # sudo chmod +x /usr/local/bin/yq - - - # Get config from API endpoint - + run: | + # 1) Fetch the full config from Osiris CONFIG=$(curl -s "$OSIRIS_URL/api/config") - - - if [ -z "$CONFIG" ]; then - echo "::error::Failed to fetch configuration from osiris-server" + + # 2) Validate that it's valid YAML + echo "==== RAW CONFIG FROM OSIRIS (YAML) ====" + echo "$CONFIG" + echo "=======================================" + + echo "$CONFIG" | yq '.' > /dev/null || { + echo "::error::OSIRIS config is not valid YAML" exit 1 - fi - - - # Parse the YAML into JSON for the repository - - REPO_CONFIG=$(echo "$CONFIG" | yq -o=json ".repositories[\"${{ - github.repository }}\"]") - - + } + + # 3) Extract the repository-specific config as JSON + REPO_CONFIG=$(echo "$CONFIG" | yq -o=json ".repositories[\"${GITHUB_REPOSITORY}\"]") + + # If it's missing or null, skip the rest if [ "$REPO_CONFIG" == "null" ]; then - echo "Repository ${{ github.repository }} not configured for watching" + echo "Repository $GITHUB_REPOSITORY is not configured for watching" + echo "config_exists=false" >> $GITHUB_OUTPUT exit 0 fi - - - # Log for debugging - - echo "Repository config:" - + + # 4) Validate that REPO_CONFIG is valid JSON + echo "==== REPO CONFIG (JSON) ====" echo "$REPO_CONFIG" | jq '.' + echo "============================" + + # 5) If valid, store it as a single line to avoid multiline env issues + SINGLE_LINE_CONFIG=$(echo "$REPO_CONFIG" | jq -c '.') + + # 6) Expose outputs for future steps + echo "config_exists=true" >> $GITHUB_OUTPUT + echo "config=$SINGLE_LINE_CONFIG" >> $GITHUB_OUTPUT + echo "osiris_url=$OSIRIS_URL" >> $GITHUB_OUTPUT - - # Export config - - echo 'CONFIG<> $GITHUB_ENV - - echo "$REPO_CONFIG" >> $GITHUB_ENV - - echo 'EOF' >> $GITHUB_ENV - - - { - echo "config<> $GITHUB_OUTPUT - name: Setup API Helper if: steps.config.outputs.config_exists == 'true' run: | @@ -106,10 +87,10 @@ jobs: local retries=5 local wait=5 local timeout=60 - + for i in $(seq 1 $retries); do echo "DEBUG: API call attempt $i of $retries" - + # Make the API call local response=$(curl -X POST "$url" \ -H "Content-Type: application/json" \ @@ -119,7 +100,7 @@ jobs: --retry 3 \ --retry-delay 2 \ --data-raw "$data") - + echo "DEBUG: Testing if response is valid JSON" if echo "$response" | jq '.' >/dev/null 2>&1; then echo "DEBUG: Response is valid JSON" @@ -129,143 +110,139 @@ jobs: echo "DEBUG: Response is not valid JSON or empty" echo "Raw response: $response" fi - + echo "Waiting ${wait}s before retry..." sleep $wait wait=$((wait * 2)) done - + return 1 } EOF chmod +x api_helper.sh + - name: Full Repository Ingestion - if: >- - steps.config.outputs.config_exists == 'true' && github.event_name == - 'workflow_dispatch' && github.event.inputs.full_ingest == 'true' + if: > + steps.config.outputs.config_exists == 'true' && + github.event_name == 'workflow_dispatch' && + github.event.inputs.full_ingest == 'true' run: | - source ./api_helper.sh - - echo "Starting full repository ingestion..." - echo "DEBUG: Making initial API call..." - - # First construct the REQUEST_BODY using jq - REQUEST_BODY=$(jq -n \ + source ./api_helper.sh + + # Grab the single-line JSON config from step outputs + CONFIG="${{ steps.config.outputs.config }}" + OSIRIS_URL="${{ steps.config.outputs.osiris_url }}" + + echo "Starting full repository ingestion..." + + # Build the request body with jq, passing CONFIG as --argjson + REQUEST_BODY=$(jq -n \ + --argjson config "$CONFIG" \ + --arg repo "${{ github.repository }}" \ + --arg branch "${{ github.ref_name }}" \ + --arg event_type "${{ github.event_name }}" \ + --arg commit_sha "${{ github.sha }}" \ + '{ + "repo": $repo, + "branch": $branch, + "metadata": { + "repository": $repo, + "branch": $branch, + "event_type": $event_type, + "commit_sha": $commit_sha, + "process_timestamp": (now | strftime("%Y-%m-%dT%H:%M:%SZ")), + "config": $config + }, + "maxFileSize": ($config.max_file_size // 100000), + "maxTokens": ($config.max_tokens // 50000), + "forceReplace": true + }' + ) + + echo "==== DEBUG: REQUEST_BODY ====" + echo "$REQUEST_BODY" | jq '.' + echo "=============================" + + # Make the API call to ingest-repo + response=$(call_api "$OSIRIS_URL/api/ingest-repo" "$REQUEST_BODY") + api_status=$? + + if [ $api_status -ne 0 ]; then + echo "::error::API call failed" + exit 1 + fi + + total_batches=$(echo "$response" | jq -r '.totalBatches') + + if [ -z "$total_batches" ] || [ "$total_batches" = "null" ]; then + echo "::error::Could not find totalBatches in response" + echo "Response was: $response" + exit 1 + fi + + echo "Processing $total_batches batches..." + successful_batches=0 + failed_batches=0 + + # Process each batch + for ((batch=0; batch- - steps.config.outputs.config_exists == 'true' && !(github.event_name - == 'workflow_dispatch' && github.event.inputs.full_ingest == 'true') - run: > - source ./api_helper.sh + [ $((batch + 1)) -lt "$total_batches" ] && sleep 2 + done + + echo "Repository ingestion completed:" + echo "- Total batches: $total_batches" + echo "- Successful: $successful_batches" + echo "- Failed: $failed_batches" - # Debug: Print full config at start + if [ "$successful_batches" -eq "$total_batches" ]; then + echo "::notice::Successfully processed all batches" + else + echo "::warning::Completed with $failed_batches failed batches" + [ "$successful_batches" -eq 0 ] && exit 1 + fi - echo "Full Configuration from env:" + - name: Process Incremental Changes + if: > + steps.config.outputs.config_exists == 'true' && + !(github.event_name == 'workflow_dispatch' && github.event.inputs.full_ingest == 'true') + run: | + source ./api_helper.sh + + CONFIG="${{ steps.config.outputs.config }}" + OSIRIS_URL="${{ steps.config.outputs.osiris_url }}" + echo "Full Configuration:" echo "$CONFIG" | jq '.' - # Create extensions file - - echo "$CONFIG" | jq -r '.included_extensions[]' | tr -d '\r' > - included_extensions.txt - - + echo "$CONFIG" | jq -r '.included_extensions[]' | tr -d '\r' > included_extensions.txt echo "Available extensions:" - cat included_extensions.txt - - # Get commit range - + # Determine base/head commit if [ "${{ github.event_name }}" == "push" ]; then BASE_SHA="${{ github.event.before }}" HEAD_SHA="${{ github.event.after }}" @@ -277,35 +254,21 @@ jobs: HEAD_SHA=$(git rev-parse HEAD) fi - echo "Base SHA: $BASE_SHA" - echo "Head SHA: $HEAD_SHA" - - # Process changes with improved debug output - - echo "Starting to process changed files..." - - - # Create temporary directory for processing - + # Collect changed files into a temp directory TEMP_DIR=$(mktemp -d) - trap 'rm -rf "$TEMP_DIR"' EXIT - - # Process each changed file - - git diff --name-status --no-renames $BASE_SHA $HEAD_SHA | while read - -r status filepath; do + git diff --name-status --no-renames $BASE_SHA $HEAD_SHA | while read -r status filepath; do echo "Processing: $filepath (Status: $status)" - [ -z "$filepath" ] && continue - - ext=$(echo "${filepath##*.}" | tr -d '[:space:]') + + ext="${filepath##*.}" echo "File extension: '$ext'" - + + # Check if extension is included if grep -ixFq "$ext" included_extensions.txt; then echo "Extension '$ext' IS included" if [ "$status" = "M" ] || [ "$status" = "A" ]; then @@ -319,13 +282,10 @@ jobs: fi done - - # Process collected changes - if [ -f "$TEMP_DIR/changes.txt" ]; then echo "Found changes to process" - # Build changes object + # Build changes JSON changes_json="{\"added\":[" first=true while IFS=' ' read -r status filepath content; do @@ -334,7 +294,7 @@ jobs: changes_json+="{\"path\":\"$filepath\",\"content\":$content}" fi done < "$TEMP_DIR/changes.txt" - + changes_json+="],\"modified\":[" first=true while IFS=' ' read -r status filepath content; do @@ -343,7 +303,7 @@ jobs: changes_json+="{\"path\":\"$filepath\",\"content\":$content}" fi done < "$TEMP_DIR/changes.txt" - + changes_json+="],\"removed\":[" first=true while IFS=' ' read -r status filepath content; do @@ -352,11 +312,11 @@ jobs: changes_json+="{\"path\":\"$filepath\"}" fi done < "$TEMP_DIR/changes.txt" - + changes_json+="]}" - - # Call ingest-changes endpoint - if ! call_api "${{ steps.config.outputs.osiris_url }}/api/ingest-changes" "{ + + # Ingest changes + if ! call_api "$OSIRIS_URL/api/ingest-changes" "{ \"repository\": { \"fullName\": \"${{ github.repository }}\", \"defaultBranch\": \"${{ github.ref_name }}\" @@ -369,8 +329,8 @@ jobs: \"commit_sha\": \"${{ github.sha }}\", \"base_sha\": \"$BASE_SHA\", \"head_sha\": \"$HEAD_SHA\", - \"max_file_size\": $(echo "$CONFIG" | jq .max_file_size), - \"max_tokens\": $(echo "$CONFIG" | jq .max_tokens), + \"max_file_size\": $(echo \"$CONFIG\" | jq .max_file_size), + \"max_tokens\": $(echo \"$CONFIG\" | jq .max_tokens), \"process_timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\" } }"; then @@ -380,6 +340,7 @@ jobs: else echo "No relevant file changes detected" fi + - name: Report Status if: always() run: | @@ -392,5 +353,4 @@ jobs: echo "::notice::Successfully processed changes" fi else - echo "::error::Failed to process changes" - fi + echo "::error::Failed to process changes" \ No newline at end of file