workflow #33
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Process Repository Changes | |
'on': | |
push: | |
branches: | |
- main | |
- master | |
pull_request: | |
types: | |
- closed | |
workflow_dispatch: | |
inputs: | |
full_ingest: | |
description: Perform full repository ingestion | |
required: true | |
type: boolean | |
default: false | |
jobs: | |
process-changes: | |
if: >- | |
github.event_name == 'push' || (github.event_name == 'pull_request' && | |
github.event.pull_request.merged == true) || github.event_name == | |
'workflow_dispatch' | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Install yq | |
run: > | |
sudo wget | |
https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 | |
-O /usr/local/bin/yq | |
sudo chmod +x /usr/local/bin/yq | |
yq --version | |
- name: Load Configuration | |
id: config | |
env: | |
OSIRIS_URL: 'https://osiris-server.vercel.app' | |
run: > | |
# Install yq if not present | |
# sudo wget | |
https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 | |
-O /usr/local/bin/yq | |
# sudo chmod +x /usr/local/bin/yq | |
# Get config from API endpoint | |
CONFIG=$(curl -s "$OSIRIS_URL/api/config") | |
if [ -z "$CONFIG" ]; then | |
echo "::error::Failed to fetch configuration from osiris-server" | |
exit 1 | |
fi | |
# Parse the YAML into JSON for the repository | |
REPO_CONFIG=$(echo "$CONFIG" | yq -o=json ".repositories[\"${{ | |
github.repository }}\"]") | |
if [ "$REPO_CONFIG" == "null" ]; then | |
echo "Repository ${{ github.repository }} not configured for watching" | |
exit 0 | |
fi | |
# Log for debugging | |
echo "Repository config:" | |
echo "$REPO_CONFIG" | jq '.' | |
# Export config | |
echo 'CONFIG<<EOF' >> $GITHUB_ENV | |
echo "$REPO_CONFIG" >> $GITHUB_ENV | |
echo 'EOF' >> $GITHUB_ENV | |
{ | |
echo "config<<EOF" | |
echo "$REPO_CONFIG" | |
echo "EOF" | |
echo "osiris_url=$OSIRIS_URL" | |
echo "config_exists=true" | |
} >> $GITHUB_OUTPUT | |
- name: Setup API Helper | |
if: steps.config.outputs.config_exists == 'true' | |
run: | | |
cat << \EOF > api_helper.sh | |
#!/bin/bash | |
call_api() { | |
local url="$1" | |
local data="$2" | |
local retries=5 | |
local wait=5 | |
local timeout=60 | |
for i in $(seq 1 $retries); do | |
echo "API call attempt $i of $retries" | |
# Use a temp file for the response | |
local tmp_response=$(mktemp) | |
# Make the API call and capture the response | |
local status_code=$(curl -X POST "$url" \ | |
-H "Content-Type: application/json" \ | |
-H "Accept: application/json" \ | |
--write-out "%{http_code}" \ | |
--silent \ | |
--output "$tmp_response" \ | |
--max-time $timeout \ | |
--retry 3 \ | |
--retry-delay 2 \ | |
--data-raw "$data") | |
if [ "$status_code" = "200" ]; then | |
# Check if response is valid JSON | |
if jq empty < "$tmp_response" 2>/dev/null; then | |
# Success - output the response and clean up | |
cat "$tmp_response" | |
rm "$tmp_response" | |
return 0 | |
fi | |
fi | |
echo "Failed attempt $i. Status code: $status_code" | |
if [ -s "$tmp_response" ]; then | |
echo "Response content:" | |
cat "$tmp_response" | |
fi | |
rm "$tmp_response" | |
# Wait before retry | |
[ $i -lt $retries ] && sleep $wait | |
wait=$((wait * 2)) | |
done | |
return 1 | |
} | |
EOF | |
chmod +x api_helper.sh | |
- name: Full Repository Ingestion | |
if: >- | |
steps.config.outputs.config_exists == 'true' && github.event_name == | |
'workflow_dispatch' && github.event.inputs.full_ingest == 'true' | |
run: > | |
source ./api_helper.sh | |
echo "Starting full repository ingestion..." | |
TEMP_DIR=$(mktemp -d) | |
trap 'rm -rf "$TEMP_DIR"' EXIT | |
echo "DEBUG: Current CONFIG content:" | |
echo "$CONFIG" | jq '.' | |
# Build request body using jq for safe JSON construction | |
REQUEST_BODY=$(jq -n \ | |
--arg repo "${{ github.repository }}" \ | |
--arg branch "${{ github.ref_name }}" \ | |
--arg event_type "${{ github.event_name }}" \ | |
--arg commit_sha "${{ github.sha }}" \ | |
--arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ | |
--argjson config "$CONFIG" \ | |
'{ | |
"repo": $repo, | |
"branch": $branch, | |
"metadata": { | |
"repository": $repo, | |
"branch": $branch, | |
"event_type": $event_type, | |
"commit_sha": $commit_sha, | |
"process_timestamp": $timestamp, | |
"config": $config | |
}, | |
"maxFileSize": ($config.max_file_size // 100000), | |
"maxTokens": ($config.max_tokens // 50000), | |
"forceReplace": true | |
}') | |
echo "DEBUG: Request body before sending:" | |
echo "$REQUEST_BODY" | jq '.' | |
# Make the API call and capture response | |
echo "Making API call to ingest-repo..." | |
response=$(call_api "${{ steps.config.outputs.osiris_url }}/api/ingest-repo" "$REQUEST_BODY") | |
api_status=$? | |
if [ $api_status -ne 0 ]; then | |
echo "::error::API call failed" | |
exit 1 | |
fi | |
echo "DEBUG: Raw API Response:" | |
echo "$response" | |
# Validate response is valid JSON | |
if ! echo "$response" | jq '.' > /dev/null 2>&1; then | |
echo "::error::Invalid JSON response from API" | |
echo "$response" | |
exit 1 | |
fi | |
# Parse response for totalBatches | |
total_batches=$(echo "$response" | jq -r '.totalBatches') | |
if [ -z "$total_batches" ] || [ "$total_batches" = "null" ]; then | |
echo "::error::Could not extract totalBatches from response" | |
echo "Response was: $response" | |
exit 1 | |
fi | |
echo "Processing $total_batches batches..." | |
successful_batches=0 | |
failed_batches=0 | |
# Process batches | |
for ((batch=0; batch<total_batches; batch++)); do | |
echo "Processing batch $((batch + 1)) of $total_batches" | |
batch_request=$(jq -n \ | |
--arg repo "${{ github.repository }}" \ | |
--arg branch "${{ github.ref_name }}" \ | |
--argjson start "$((batch * 10))" \ | |
--argjson size "10" \ | |
'{ | |
"repo": $repo, | |
"branch": $branch, | |
"batchStart": $start, | |
"batchSize": $size | |
}') | |
echo "DEBUG: Batch request body:" | |
echo "$batch_request" | jq '.' | |
if batch_response=$(call_api "${{ steps.config.outputs.osiris_url }}/api/process-batch" "$batch_request"); then | |
echo "DEBUG: Batch response:" | |
echo "$batch_response" | jq '.' | |
echo "Batch $((batch + 1)) processed successfully" | |
successful_batches=$((successful_batches + 1)) | |
else | |
echo "::warning::Failed to process batch $((batch + 1))" | |
echo "DEBUG: Failed batch response:" | |
echo "$batch_response" | |
failed_batches=$((failed_batches + 1)) | |
fi | |
# Add small delay between batches | |
[ $((batch + 1)) -lt "$total_batches" ] && sleep 2 | |
done | |
echo "Repository ingestion completed:" | |
echo "- Total batches: $total_batches" | |
echo "- Successful: $successful_batches" | |
echo "- Failed: $failed_batches" | |
if [ "$successful_batches" -eq "$total_batches" ]; then | |
echo "::notice::Successfully processed all batches" | |
else | |
echo "::warning::Completed with $failed_batches failed batches" | |
[ "$successful_batches" -eq 0 ] && exit 1 | |
fi | |
- name: Process Incremental Changes | |
if: >- | |
steps.config.outputs.config_exists == 'true' && !(github.event_name | |
== 'workflow_dispatch' && github.event.inputs.full_ingest == 'true') | |
run: > | |
source ./api_helper.sh | |
# Debug: Print full config at start | |
echo "Full Configuration from env:" | |
echo "$CONFIG" | jq '.' | |
# Create extensions file | |
echo "$CONFIG" | jq -r '.included_extensions[]' | tr -d '\r' > | |
included_extensions.txt | |
echo "Available extensions:" | |
cat included_extensions.txt | |
# Get commit range | |
if [ "${{ github.event_name }}" == "push" ]; then | |
BASE_SHA="${{ github.event.before }}" | |
HEAD_SHA="${{ github.event.after }}" | |
elif [ "${{ github.event_name }}" == "pull_request" ]; then | |
BASE_SHA="${{ github.event.pull_request.base.sha }}" | |
HEAD_SHA="${{ github.event.pull_request.head.sha }}" | |
else | |
BASE_SHA=$(git rev-parse HEAD^) | |
HEAD_SHA=$(git rev-parse HEAD) | |
fi | |
echo "Base SHA: $BASE_SHA" | |
echo "Head SHA: $HEAD_SHA" | |
# Process changes with improved debug output | |
echo "Starting to process changed files..." | |
# Create temporary directory for processing | |
TEMP_DIR=$(mktemp -d) | |
trap 'rm -rf "$TEMP_DIR"' EXIT | |
# Process each changed file | |
git diff --name-status --no-renames $BASE_SHA $HEAD_SHA | while read | |
-r status filepath; do | |
echo "Processing: $filepath (Status: $status)" | |
[ -z "$filepath" ] && continue | |
ext=$(echo "${filepath##*.}" | tr -d '[:space:]') | |
echo "File extension: '$ext'" | |
if grep -ixFq "$ext" included_extensions.txt; then | |
echo "Extension '$ext' IS included" | |
if [ "$status" = "M" ] || [ "$status" = "A" ]; then | |
content=$(git show "$HEAD_SHA:$filepath" 2>/dev/null | jq -Rs) || continue | |
echo "$status $filepath $content" >> "$TEMP_DIR/changes.txt" | |
elif [ "$status" = "D" ]; then | |
echo "$status $filepath" >> "$TEMP_DIR/changes.txt" | |
fi | |
else | |
echo "Extension '$ext' is NOT included" | |
fi | |
done | |
# Process collected changes | |
if [ -f "$TEMP_DIR/changes.txt" ]; then | |
echo "Found changes to process" | |
# Build changes object | |
changes_json="{\"added\":[" | |
first=true | |
while IFS=' ' read -r status filepath content; do | |
if [ "$status" = "A" ]; then | |
[ "$first" = true ] && first=false || changes_json+="," | |
changes_json+="{\"path\":\"$filepath\",\"content\":$content}" | |
fi | |
done < "$TEMP_DIR/changes.txt" | |
changes_json+="],\"modified\":[" | |
first=true | |
while IFS=' ' read -r status filepath content; do | |
if [ "$status" = "M" ]; then | |
[ "$first" = true ] && first=false || changes_json+="," | |
changes_json+="{\"path\":\"$filepath\",\"content\":$content}" | |
fi | |
done < "$TEMP_DIR/changes.txt" | |
changes_json+="],\"removed\":[" | |
first=true | |
while IFS=' ' read -r status filepath content; do | |
if [ "$status" = "D" ]; then | |
[ "$first" = true ] && first=false || changes_json+="," | |
changes_json+="{\"path\":\"$filepath\"}" | |
fi | |
done < "$TEMP_DIR/changes.txt" | |
changes_json+="]}" | |
# Call ingest-changes endpoint | |
if ! call_api "${{ steps.config.outputs.osiris_url }}/api/ingest-changes" "{ | |
\"repository\": { | |
\"fullName\": \"${{ github.repository }}\", | |
\"defaultBranch\": \"${{ github.ref_name }}\" | |
}, | |
\"changes\": $changes_json, | |
\"metadata\": { | |
\"repository\": \"${{ github.repository }}\", | |
\"branch\": \"${{ github.ref_name }}\", | |
\"event_type\": \"${{ github.event_name }}\", | |
\"commit_sha\": \"${{ github.sha }}\", | |
\"base_sha\": \"$BASE_SHA\", | |
\"head_sha\": \"$HEAD_SHA\", | |
\"max_file_size\": $(echo "$CONFIG" | jq .max_file_size), | |
\"max_tokens\": $(echo "$CONFIG" | jq .max_tokens), | |
\"process_timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\" | |
} | |
}"; then | |
echo "::error::Failed to process changes" | |
exit 1 | |
fi | |
else | |
echo "No relevant file changes detected" | |
fi | |
- name: Report Status | |
if: always() | |
run: | | |
if [ "${{ steps.config.outputs.config_exists }}" != "true" ]; then | |
echo "::notice::Repository not configured for watching" | |
elif [ "${{ job.status }}" == "success" ]; then | |
if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ "${{ github.event.inputs.full_ingest }}" == "true" ]; then | |
echo "::notice::Successfully completed full repository ingestion" | |
else | |
echo "::notice::Successfully processed changes" | |
fi | |
else | |
echo "::error::Failed to process changes" | |
fi |