Skip to content

Process Repository Changes #10

Process Repository Changes

Process Repository Changes #10

Workflow file for this run

name: Process Repository Changes
on:
# Run on new commits to configured branches
push:
branches:
- main
- master
# Run when PRs are merged
pull_request:
types:
- closed
# Manual trigger for full repository ingestion
workflow_dispatch:
inputs:
full_ingest:
description: 'Perform full repository ingestion'
required: true
type: boolean
default: false
jobs:
process-changes:
if: >-
github.event_name == 'push' ||
(github.event_name == 'pull_request' && github.event.pull_request.merged == true) ||
github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Changed to fetch complete history for better diff
- name: Install yq
run: |
sudo wget https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 -O /usr/local/bin/yq
sudo chmod +x /usr/local/bin/yq
yq --version
- name: Load Configuration
id: config
env:
OSIRIS_URL: "https://osiris-server.vercel.app"
run: |
# Install yq if not present
# sudo wget https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 -O /usr/local/bin/yq
# sudo chmod +x /usr/local/bin/yq
# Get config from API endpoint
CONFIG=$(curl -s "$OSIRIS_URL/api/config")
if [ -z "$CONFIG" ]; then
echo "::error::Failed to fetch configuration from osiris-server"
exit 1
fi
# Parse the YAML into JSON for the repository
REPO_CONFIG=$(echo "$CONFIG" | yq -o=json ".repositories[\"${{ github.repository }}\"]")
if [ "$REPO_CONFIG" == "null" ]; then
echo "Repository ${{ github.repository }} not configured for watching"
exit 0
fi
# Log for debugging
echo "Repository config:"
echo "$REPO_CONFIG" | jq '.'
# Export config
echo 'CONFIG<<EOF' >> $GITHUB_ENV
echo "$REPO_CONFIG" >> $GITHUB_ENV
echo 'EOF' >> $GITHUB_ENV
{
echo "config<<EOF"
echo "$REPO_CONFIG"
echo "EOF"
echo "osiris_url=$OSIRIS_URL"
echo "config_exists=true"
} >> $GITHUB_OUTPUT
- name: Setup API Helper
if: steps.config.outputs.config_exists == 'true'
run: |
cat << \EOF > api_helper.sh
#!/bin/bash
call_api() {
local url="$1"
local data="$2"
local retries=5
local wait=5
local timeout=60
for i in $(seq 1 $retries); do
echo "API call attempt $i of $retries"
response=$(curl -X POST "$url" \
-H "Content-Type: application/json" \
-H "Accept: application/json" \
--fail \
--silent \
--show-error \
--max-time $timeout \
--retry 3 \
--retry-delay 2 \
--data-raw "$data")
if [ $? -eq 0 ]; then
echo "$response"
return 0
fi
echo "API call failed, waiting ${wait}s before retry..."
sleep $wait
wait=$((wait * 2))
done
echo "::error::API call failed after $retries attempts"
return 1
}
EOF
chmod +x api_helper.sh
- name: Full Repository Ingestion
if: >-
steps.config.outputs.config_exists == 'true' &&
github.event_name == 'workflow_dispatch' &&
github.event.inputs.full_ingest == 'true'
run: |
source ./api_helper.sh
echo "Starting full repository ingestion..."
# Validate required variables
if [ -z "$CONFIG" ]; then
echo "::error::CONFIG variable is empty"
exit 1
fi
if [ -z "${{ steps.config.outputs.osiris_url }}" ]; then
echo "::error::osiris_url is not set"
exit 1
fi
# Debug output to help with troubleshooting
echo "Repository: ${{ github.repository }}"
echo "Branch: ${{ github.ref_name }}"
echo "Event: ${{ github.event_name }}"
# Convert CONFIG from YAML to JSON and validate it's valid JSON
CONFIG_JSON=$(echo "$CONFIG" | yq -o=json '.' | jq -c '.')
if [ $? -ne 0 ]; then
echo "::error::Failed to convert config to JSON"
exit 1
fi
echo "Using configuration:"
echo "$CONFIG_JSON" | jq '.'
# Prepare timestamp
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Initialize ingestion with error handling
echo "Initializing repository ingestion..."
response=$(call_api "${{ steps.config.outputs.osiris_url }}/api/ingest-repo" "{
\"repo\": \"${{ github.repository }}\",
\"branch\": \"${{ github.ref_name }}\",
\"metadata\": {
\"repository\": \"${{ github.repository }}\",
\"branch\": \"${{ github.ref_name }}\",
\"event_type\": \"${{ github.event_name }}\",
\"commit_sha\": \"${{ github.sha }}\",
\"process_timestamp\": \"$TIMESTAMP\",
\"config\": $CONFIG_JSON
}
}")
# Store response in a file for better handling
echo "$response" > response.json
# Log raw response for debugging
echo "Raw response:"
cat response.json
# Validate JSON structure
if ! jq -e '.' response.json >/dev/null 2>&1; then
echo "::error::Response is not valid JSON"
exit 1
fi
# Extract required fields with error checking
total_files=$(jq -r '.totalFiles // empty' response.json)
total_batches=$(jq -r '.totalBatches // empty' response.json)
if [ -z "$total_files" ] || [ -z "$total_batches" ]; then
echo "::error::Response missing required fields"
echo "Response was:"
cat response.json
exit 1
fi
echo "Found $total_files files to process in $total_batches batches"
# Extract and validate batch URLs
mapfile -t batch_urls < <(jq -r '.batchUrls[]' response.json)
if [ ${#batch_urls[@]} -eq 0 ]; then
echo "::error::No batch URLs found in response"
echo "Response was:"
cat response.json
exit 1
fi
if [ ${#batch_urls[@]} -ne $total_batches ]; then
echo "::warning::Batch URL count (${#batch_urls[@]}) doesn't match total batches ($total_batches)"
fi
# Process batches
echo "Starting batch processing..."
successful_batches=0
failed_batches=0
for i in "${!batch_urls[@]}"; do
batch_url="${batch_urls[$i]}"
current_batch=$((i + 1))
echo "Processing batch $current_batch of ${#batch_urls[@]}"
echo "URL: $batch_url"
# Process batch with retries
max_retries=3
retry_count=0
while [ $retry_count -lt $max_retries ]; do
if batch_response=$(curl -s -f "$batch_url"); then
# Validate batch response
if echo "$batch_response" | jq -e '.' >/dev/null 2>&1; then
if echo "$batch_response" | jq -e '.error' >/dev/null 2>&1; then
echo "::warning::Batch $current_batch returned error:"
echo "$batch_response" | jq '.error'
else
echo "Batch $current_batch processed successfully"
successful_batches=$((successful_batches + 1))
break
fi
else
echo "::warning::Invalid JSON response from batch $current_batch"
fi
fi
retry_count=$((retry_count + 1))
if [ $retry_count -lt $max_retries ]; then
echo "Retrying batch $current_batch (attempt $((retry_count + 1))/$max_retries)..."
sleep 2
else
echo "::warning::Failed to process batch $current_batch after $max_retries attempts"
failed_batches=$((failed_batches + 1))
fi
done
# Add delay between batches
if [ $current_batch -lt ${#batch_urls[@]} ]; then
echo "Waiting 2 seconds before next batch..."
sleep 2
fi
done
# Final status report
echo "Repository ingestion completed"
echo "Summary:"
echo "- Total batches: ${#batch_urls[@]}"
echo "- Successful: $successful_batches"
echo "- Failed: $failed_batches"
# Set exit status based on results
if [ $successful_batches -eq ${#batch_urls[@]} ]; then
echo "::notice::Successfully processed all batches"
else
echo "::warning::Completed with $failed_batches failed batches"
# Only exit with error if all batches failed
if [ $successful_batches -eq 0 ]; then
exit 1
fi
fi
- name: Process Incremental Changes
if: >-
steps.config.outputs.config_exists == 'true' &&
!(github.event_name == 'workflow_dispatch' && github.event.inputs.full_ingest == 'true')
run: |
source ./api_helper.sh
# Debug: Print full config at start
echo "Full Configuration from env:"
echo "$CONFIG" | jq '.'
# Create extensions file
echo "$CONFIG" | jq -r '.included_extensions[]' | tr -d '\r' > included_extensions.txt
echo "Available extensions:"
cat included_extensions.txt
# Get commit range
if [ "${{ github.event_name }}" == "push" ]; then
BASE_SHA="${{ github.event.before }}"
HEAD_SHA="${{ github.event.after }}"
elif [ "${{ github.event_name }}" == "pull_request" ]; then
BASE_SHA="${{ github.event.pull_request.base.sha }}"
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
else
BASE_SHA=$(git rev-parse HEAD^)
HEAD_SHA=$(git rev-parse HEAD)
fi
echo "Base SHA: $BASE_SHA"
echo "Head SHA: $HEAD_SHA"
# Process changes with improved debug output
echo "Starting to process changed files..."
# Create temporary directory for processing
TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$TEMP_DIR"' EXIT
# Process each changed file
git diff --name-status --no-renames $BASE_SHA $HEAD_SHA | while read -r status filepath; do
echo "Processing: $filepath (Status: $status)"
[ -z "$filepath" ] && continue
ext=$(echo "${filepath##*.}" | tr -d '[:space:]')
echo "File extension: '$ext'"
if grep -ixFq "$ext" included_extensions.txt; then
echo "Extension '$ext' IS included"
if [ "$status" = "M" ] || [ "$status" = "A" ]; then
content=$(git show "$HEAD_SHA:$filepath" 2>/dev/null | jq -Rs) || continue
echo "$status $filepath $content" >> "$TEMP_DIR/changes.txt"
elif [ "$status" = "D" ]; then
echo "$status $filepath" >> "$TEMP_DIR/changes.txt"
fi
else
echo "Extension '$ext' is NOT included"
fi
done
# Process collected changes
if [ -f "$TEMP_DIR/changes.txt" ]; then
echo "Found changes to process"
# Build changes object
changes_json="{\"added\":["
first=true
while IFS=' ' read -r status filepath content; do
if [ "$status" = "A" ]; then
[ "$first" = true ] && first=false || changes_json+=","
changes_json+="{\"path\":\"$filepath\",\"content\":$content}"
fi
done < "$TEMP_DIR/changes.txt"
changes_json+="],\"modified\":["
first=true
while IFS=' ' read -r status filepath content; do
if [ "$status" = "M" ]; then
[ "$first" = true ] && first=false || changes_json+=","
changes_json+="{\"path\":\"$filepath\",\"content\":$content}"
fi
done < "$TEMP_DIR/changes.txt"
changes_json+="],\"removed\":["
first=true
while IFS=' ' read -r status filepath content; do
if [ "$status" = "D" ]; then
[ "$first" = true ] && first=false || changes_json+=","
changes_json+="{\"path\":\"$filepath\"}"
fi
done < "$TEMP_DIR/changes.txt"
changes_json+="]}"
# Call ingest-changes endpoint
if ! call_api "${{ steps.config.outputs.osiris_url }}/api/ingest-changes" "{
\"repository\": {
\"fullName\": \"${{ github.repository }}\",
\"defaultBranch\": \"${{ github.ref_name }}\"
},
\"changes\": $changes_json,
\"metadata\": {
\"repository\": \"${{ github.repository }}\",
\"branch\": \"${{ github.ref_name }}\",
\"event_type\": \"${{ github.event_name }}\",
\"commit_sha\": \"${{ github.sha }}\",
\"base_sha\": \"$BASE_SHA\",
\"head_sha\": \"$HEAD_SHA\",
\"max_file_size\": $(echo "$CONFIG" | jq .max_file_size),
\"max_tokens\": $(echo "$CONFIG" | jq .max_tokens),
\"process_timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\"
}
}"; then
echo "::error::Failed to process changes"
exit 1
fi
else
echo "No relevant file changes detected"
fi
- name: Report Status
if: always()
run: |
if [ "${{ steps.config.outputs.config_exists }}" != "true" ]; then
echo "::notice::Repository not configured for watching"
elif [ "${{ job.status }}" == "success" ]; then
if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ "${{ github.event.inputs.full_ingest }}" == "true" ]; then
echo "::notice::Successfully completed full repository ingestion"
else
echo "::notice::Successfully processed changes"
fi
else
echo "::error::Failed to process changes"
fi