From 8f70f955f118b1abdd5e6783dc3996f940998403 Mon Sep 17 00:00:00 2001 From: ivan Date: Wed, 8 Jan 2025 15:54:39 +0100 Subject: [PATCH] workflow --- .github/workflows/watch-repos.yml | 370 +++++++++++++++++------------- 1 file changed, 205 insertions(+), 165 deletions(-) diff --git a/.github/workflows/watch-repos.yml b/.github/workflows/watch-repos.yml index d17a70e..d7b98df 100644 --- a/.github/workflows/watch-repos.yml +++ b/.github/workflows/watch-repos.yml @@ -1,6 +1,5 @@ name: Process Repository Changes - -on: +'on': push: branches: - main @@ -15,66 +14,86 @@ on: required: true type: boolean default: false - jobs: process-changes: if: >- - github.event_name == 'push' || - (github.event_name == 'pull_request' && github.event.pull_request.merged == true) || - github.event_name == 'workflow_dispatch' + github.event_name == 'push' || (github.event_name == 'pull_request' && + github.event.pull_request.merged == true) || github.event_name == + 'workflow_dispatch' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Install yq - run: | - sudo wget https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 -O /usr/local/bin/yq + run: > + sudo wget + https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 + -O /usr/local/bin/yq + sudo chmod +x /usr/local/bin/yq - yq --version + yq --version - name: Load Configuration id: config env: OSIRIS_URL: 'https://osiris-server.vercel.app' - run: | - # 1) Fetch the full config from Osiris + run: > + # Install yq if not present + + # sudo wget + https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 + -O /usr/local/bin/yq + + # sudo chmod +x /usr/local/bin/yq + + + # Get config from API endpoint + CONFIG=$(curl -s "$OSIRIS_URL/api/config") - - # 2) Validate that it's valid YAML - echo "==== RAW CONFIG FROM OSIRIS (YAML) ====" - echo "$CONFIG" - echo "=======================================" - - echo "$CONFIG" | yq '.' > /dev/null || { - echo "::error::OSIRIS config is not valid YAML" + + + if [ -z "$CONFIG" ]; then + echo "::error::Failed to fetch configuration from osiris-server" exit 1 - } - - # 3) Extract the repository-specific config as JSON - REPO_CONFIG=$(echo "$CONFIG" | yq -o=json ".repositories[\"${GITHUB_REPOSITORY}\"]") - - # If it's missing or null, skip the rest + fi + + + # Parse the YAML into JSON for the repository + + REPO_CONFIG=$(echo "$CONFIG" | yq -o=json ".repositories[\"${{ + github.repository }}\"]") + + if [ "$REPO_CONFIG" == "null" ]; then - echo "Repository $GITHUB_REPOSITORY is not configured for watching" - echo "config_exists=false" >> $GITHUB_OUTPUT + echo "Repository ${{ github.repository }} not configured for watching" exit 0 fi - - # 4) Validate that REPO_CONFIG is valid JSON - echo "==== REPO CONFIG (JSON) ====" + + + # Log for debugging + + echo "Repository config:" + echo "$REPO_CONFIG" | jq '.' - echo "============================" - - # 5) If valid, store it as a single line to avoid multiline env issues - SINGLE_LINE_CONFIG=$(echo "$REPO_CONFIG" | jq -c '.') - - # 6) Expose outputs for future steps - echo "config_exists=true" >> $GITHUB_OUTPUT - echo "config=$SINGLE_LINE_CONFIG" >> $GITHUB_OUTPUT - echo "osiris_url=$OSIRIS_URL" >> $GITHUB_OUTPUT + + # Export config + + echo 'CONFIG<> $GITHUB_ENV + + echo "$REPO_CONFIG" >> $GITHUB_ENV + + echo 'EOF' >> $GITHUB_ENV + + + { + echo "config<> $GITHUB_OUTPUT - name: Setup API Helper if: steps.config.outputs.config_exists == 'true' run: | @@ -87,10 +106,10 @@ jobs: local retries=5 local wait=5 local timeout=60 - + for i in $(seq 1 $retries); do echo "DEBUG: API call attempt $i of $retries" - + # Make the API call local response=$(curl -X POST "$url" \ -H "Content-Type: application/json" \ @@ -100,7 +119,7 @@ jobs: --retry 3 \ --retry-delay 2 \ --data-raw "$data") - + echo "DEBUG: Testing if response is valid JSON" if echo "$response" | jq '.' >/dev/null 2>&1; then echo "DEBUG: Response is valid JSON" @@ -110,139 +129,143 @@ jobs: echo "DEBUG: Response is not valid JSON or empty" echo "Raw response: $response" fi - + echo "Waiting ${wait}s before retry..." sleep $wait wait=$((wait * 2)) done - + return 1 } EOF chmod +x api_helper.sh - - name: Full Repository Ingestion - if: > - steps.config.outputs.config_exists == 'true' && - github.event_name == 'workflow_dispatch' && - github.event.inputs.full_ingest == 'true' + if: >- + steps.config.outputs.config_exists == 'true' && github.event_name == + 'workflow_dispatch' && github.event.inputs.full_ingest == 'true' run: | - source ./api_helper.sh - - # Grab the single-line JSON config from step outputs - CONFIG="${{ steps.config.outputs.config }}" - OSIRIS_URL="${{ steps.config.outputs.osiris_url }}" - - echo "Starting full repository ingestion..." - - # Build the request body with jq, passing CONFIG as --argjson - REQUEST_BODY=$(jq -n \ - --argjson config "$CONFIG" \ - --arg repo "${{ github.repository }}" \ - --arg branch "${{ github.ref_name }}" \ - --arg event_type "${{ github.event_name }}" \ - --arg commit_sha "${{ github.sha }}" \ - '{ - "repo": $repo, - "branch": $branch, - "metadata": { - "repository": $repo, - "branch": $branch, - "event_type": $event_type, - "commit_sha": $commit_sha, - "process_timestamp": (now | strftime("%Y-%m-%dT%H:%M:%SZ")), - "config": $config - }, - "maxFileSize": ($config.max_file_size // 100000), - "maxTokens": ($config.max_tokens // 50000), - "forceReplace": true - }' - ) - - echo "==== DEBUG: REQUEST_BODY ====" - echo "$REQUEST_BODY" | jq '.' - echo "=============================" - - # Make the API call to ingest-repo - response=$(call_api "$OSIRIS_URL/api/ingest-repo" "$REQUEST_BODY") - api_status=$? - - if [ $api_status -ne 0 ]; then - echo "::error::API call failed" - exit 1 - fi - - total_batches=$(echo "$response" | jq -r '.totalBatches') - - if [ -z "$total_batches" ] || [ "$total_batches" = "null" ]; then - echo "::error::Could not find totalBatches in response" - echo "Response was: $response" - exit 1 - fi - - echo "Processing $total_batches batches..." - successful_batches=0 - failed_batches=0 - - # Process each batch - for ((batch=0; batch- + steps.config.outputs.config_exists == 'true' && !(github.event_name + == 'workflow_dispatch' && github.event.inputs.full_ingest == 'true') + run: > + source ./api_helper.sh - [ $((batch + 1)) -lt "$total_batches" ] && sleep 2 - done - - echo "Repository ingestion completed:" - echo "- Total batches: $total_batches" - echo "- Successful: $successful_batches" - echo "- Failed: $failed_batches" - if [ "$successful_batches" -eq "$total_batches" ]; then - echo "::notice::Successfully processed all batches" - else - echo "::warning::Completed with $failed_batches failed batches" - [ "$successful_batches" -eq 0 ] && exit 1 - fi + # Debug: Print full config at start - - name: Process Incremental Changes - if: > - steps.config.outputs.config_exists == 'true' && - !(github.event_name == 'workflow_dispatch' && github.event.inputs.full_ingest == 'true') - run: | - source ./api_helper.sh - - CONFIG="${{ steps.config.outputs.config }}" - OSIRIS_URL="${{ steps.config.outputs.osiris_url }}" + echo "Full Configuration from env:" - echo "Full Configuration:" echo "$CONFIG" | jq '.' + # Create extensions file - echo "$CONFIG" | jq -r '.included_extensions[]' | tr -d '\r' > included_extensions.txt + + echo "$CONFIG" | jq -r '.included_extensions[]' | tr -d '\r' > + included_extensions.txt + + echo "Available extensions:" + cat included_extensions.txt - # Determine base/head commit + + # Get commit range + if [ "${{ github.event_name }}" == "push" ]; then BASE_SHA="${{ github.event.before }}" HEAD_SHA="${{ github.event.after }}" @@ -254,21 +277,35 @@ jobs: HEAD_SHA=$(git rev-parse HEAD) fi + echo "Base SHA: $BASE_SHA" + echo "Head SHA: $HEAD_SHA" - # Collect changed files into a temp directory + + # Process changes with improved debug output + + echo "Starting to process changed files..." + + + # Create temporary directory for processing + TEMP_DIR=$(mktemp -d) + trap 'rm -rf "$TEMP_DIR"' EXIT - git diff --name-status --no-renames $BASE_SHA $HEAD_SHA | while read -r status filepath; do + + # Process each changed file + + git diff --name-status --no-renames $BASE_SHA $HEAD_SHA | while read + -r status filepath; do echo "Processing: $filepath (Status: $status)" + [ -z "$filepath" ] && continue - - ext="${filepath##*.}" + + ext=$(echo "${filepath##*.}" | tr -d '[:space:]') echo "File extension: '$ext'" - - # Check if extension is included + if grep -ixFq "$ext" included_extensions.txt; then echo "Extension '$ext' IS included" if [ "$status" = "M" ] || [ "$status" = "A" ]; then @@ -282,10 +319,13 @@ jobs: fi done + + # Process collected changes + if [ -f "$TEMP_DIR/changes.txt" ]; then echo "Found changes to process" - # Build changes JSON + # Build changes object changes_json="{\"added\":[" first=true while IFS=' ' read -r status filepath content; do @@ -294,7 +334,7 @@ jobs: changes_json+="{\"path\":\"$filepath\",\"content\":$content}" fi done < "$TEMP_DIR/changes.txt" - + changes_json+="],\"modified\":[" first=true while IFS=' ' read -r status filepath content; do @@ -303,7 +343,7 @@ jobs: changes_json+="{\"path\":\"$filepath\",\"content\":$content}" fi done < "$TEMP_DIR/changes.txt" - + changes_json+="],\"removed\":[" first=true while IFS=' ' read -r status filepath content; do @@ -312,11 +352,11 @@ jobs: changes_json+="{\"path\":\"$filepath\"}" fi done < "$TEMP_DIR/changes.txt" - + changes_json+="]}" - - # Ingest changes - if ! call_api "$OSIRIS_URL/api/ingest-changes" "{ + + # Call ingest-changes endpoint + if ! call_api "${{ steps.config.outputs.osiris_url }}/api/ingest-changes" "{ \"repository\": { \"fullName\": \"${{ github.repository }}\", \"defaultBranch\": \"${{ github.ref_name }}\" @@ -329,8 +369,8 @@ jobs: \"commit_sha\": \"${{ github.sha }}\", \"base_sha\": \"$BASE_SHA\", \"head_sha\": \"$HEAD_SHA\", - \"max_file_size\": $(echo \"$CONFIG\" | jq .max_file_size), - \"max_tokens\": $(echo \"$CONFIG\" | jq .max_tokens), + \"max_file_size\": $(echo "$CONFIG" | jq .max_file_size), + \"max_tokens\": $(echo "$CONFIG" | jq .max_tokens), \"process_timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\" } }"; then @@ -340,7 +380,6 @@ jobs: else echo "No relevant file changes detected" fi - - name: Report Status if: always() run: | @@ -353,4 +392,5 @@ jobs: echo "::notice::Successfully processed changes" fi else - echo "::error::Failed to process changes" \ No newline at end of file + echo "::error::Failed to process changes" + fi