workflow #41
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Process Repository Changes | |
on: | |
push: | |
branches: | |
- main | |
- master | |
pull_request: | |
types: | |
- closed | |
workflow_dispatch: | |
inputs: | |
full_ingest: | |
description: Perform full repository ingestion | |
required: true | |
type: boolean | |
default: false | |
jobs: | |
process-changes: | |
if: >- | |
github.event_name == 'push' || | |
(github.event_name == 'pull_request' && github.event.pull_request.merged == true) || | |
github.event_name == 'workflow_dispatch' | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Install yq | |
run: | | |
sudo wget https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 -O /usr/local/bin/yq | |
sudo chmod +x /usr/local/bin/yq | |
yq --version | |
- name: Load Configuration | |
id: config | |
env: | |
OSIRIS_URL: 'https://osiris-server.vercel.app' | |
run: | | |
# 1) Fetch the full config from Osiris | |
CONFIG=$(curl -s "$OSIRIS_URL/api/config") | |
# 2) Validate that it's valid YAML | |
echo "==== RAW CONFIG FROM OSIRIS (YAML) ====" | |
echo "$CONFIG" | |
echo "=======================================" | |
echo "$CONFIG" | yq '.' > /dev/null || { | |
echo "::error::OSIRIS config is not valid YAML" | |
exit 1 | |
} | |
# 3) Extract the repository-specific config as JSON | |
REPO_CONFIG=$(echo "$CONFIG" | yq -o=json ".repositories[\"${GITHUB_REPOSITORY}\"]") | |
# If it's missing or null, skip the rest | |
if [ "$REPO_CONFIG" == "null" ]; then | |
echo "Repository $GITHUB_REPOSITORY is not configured for watching" | |
echo "config_exists=false" >> $GITHUB_OUTPUT | |
exit 0 | |
fi | |
# 4) Validate that REPO_CONFIG is valid JSON | |
echo "==== REPO CONFIG (JSON) ====" | |
echo "$REPO_CONFIG" | jq '.' | |
echo "============================" | |
# 5) If valid, store it as a single line to avoid multiline env issues | |
SINGLE_LINE_CONFIG=$(echo "$REPO_CONFIG" | jq -c '.') | |
# 6) Expose outputs for future steps | |
echo "config_exists=true" >> $GITHUB_OUTPUT | |
echo "config=$SINGLE_LINE_CONFIG" >> $GITHUB_OUTPUT | |
echo "osiris_url=$OSIRIS_URL" >> $GITHUB_OUTPUT | |
- name: Setup API Helper | |
if: steps.config.outputs.config_exists == 'true' | |
run: | | |
cat << \EOF > api_helper.sh | |
#!/bin/bash | |
call_api() { | |
local url="$1" | |
local data="$2" | |
local retries=5 | |
local wait=5 | |
local timeout=60 | |
for i in $(seq 1 $retries); do | |
echo "DEBUG: API call attempt $i of $retries" | |
# Make the API call | |
local response=$(curl -X POST "$url" \ | |
-H "Content-Type: application/json" \ | |
-H "Accept: application/json" \ | |
--silent \ | |
--max-time $timeout \ | |
--retry 3 \ | |
--retry-delay 2 \ | |
--data-raw "$data") | |
echo "DEBUG: Testing if response is valid JSON" | |
if echo "$response" | jq '.' >/dev/null 2>&1; then | |
echo "DEBUG: Response is valid JSON" | |
echo "$response" | |
return 0 | |
else | |
echo "DEBUG: Response is not valid JSON or empty" | |
echo "Raw response: $response" | |
fi | |
echo "Waiting ${wait}s before retry..." | |
sleep $wait | |
wait=$((wait * 2)) | |
done | |
return 1 | |
} | |
EOF | |
chmod +x api_helper.sh | |
- name: Full Repository Ingestion | |
if: > | |
steps.config.outputs.config_exists == 'true' && | |
github.event_name == 'workflow_dispatch' && | |
github.event.inputs.full_ingest == 'true' | |
run: | | |
source ./api_helper.sh | |
# Grab the single-line JSON config from step outputs | |
CONFIG="${{ steps.config.outputs.config }}" | |
OSIRIS_URL="${{ steps.config.outputs.osiris_url }}" | |
echo "Starting full repository ingestion..." | |
# Build the request body with jq, passing CONFIG as --argjson | |
REQUEST_BODY=$(jq -n \ | |
--argjson config "$CONFIG" \ | |
--arg repo "${{ github.repository }}" \ | |
--arg branch "${{ github.ref_name }}" \ | |
--arg event_type "${{ github.event_name }}" \ | |
--arg commit_sha "${{ github.sha }}" \ | |
'{ | |
"repo": $repo, | |
"branch": $branch, | |
"metadata": { | |
"repository": $repo, | |
"branch": $branch, | |
"event_type": $event_type, | |
"commit_sha": $commit_sha, | |
"process_timestamp": (now | strftime("%Y-%m-%dT%H:%M:%SZ")), | |
"config": $config | |
}, | |
"maxFileSize": ($config.max_file_size // 100000), | |
"maxTokens": ($config.max_tokens // 50000), | |
"forceReplace": true | |
}' | |
) | |
echo "==== DEBUG: REQUEST_BODY ====" | |
echo "$REQUEST_BODY" | jq '.' | |
echo "=============================" | |
# Make the API call to ingest-repo | |
response=$(call_api "$OSIRIS_URL/api/ingest-repo" "$REQUEST_BODY") | |
api_status=$? | |
if [ $api_status -ne 0 ]; then | |
echo "::error::API call failed" | |
exit 1 | |
fi | |
total_batches=$(echo "$response" | jq -r '.totalBatches') | |
if [ -z "$total_batches" ] || [ "$total_batches" = "null" ]; then | |
echo "::error::Could not find totalBatches in response" | |
echo "Response was: $response" | |
exit 1 | |
fi | |
echo "Processing $total_batches batches..." | |
successful_batches=0 | |
failed_batches=0 | |
# Process each batch | |
for ((batch=0; batch<total_batches; batch++)); do | |
echo "Processing batch $((batch + 1)) of $total_batches" | |
batch_request=$(jq -n \ | |
--arg repo "${{ github.repository }}" \ | |
--arg branch "${{ github.ref_name }}" \ | |
--argjson batchStart "$((batch * 10))" \ | |
--argjson batchSize "10" \ | |
'{ | |
repo: $repo, | |
branch: $branch, | |
batchStart: $batchStart, | |
batchSize: $batchSize | |
}' | |
) | |
if batch_response=$(call_api "$OSIRIS_URL/api/process-batch" "$batch_request"); then | |
echo "Batch $((batch + 1)) processed successfully" | |
successful_batches=$((successful_batches + 1)) | |
else | |
echo "::warning::Failed to process batch $((batch + 1))" | |
failed_batches=$((failed_batches + 1)) | |
fi | |
[ $((batch + 1)) -lt "$total_batches" ] && sleep 2 | |
done | |
echo "Repository ingestion completed:" | |
echo "- Total batches: $total_batches" | |
echo "- Successful: $successful_batches" | |
echo "- Failed: $failed_batches" | |
if [ "$successful_batches" -eq "$total_batches" ]; then | |
echo "::notice::Successfully processed all batches" | |
else | |
echo "::warning::Completed with $failed_batches failed batches" | |
[ "$successful_batches" -eq 0 ] && exit 1 | |
fi | |
- name: Process Incremental Changes | |
if: > | |
steps.config.outputs.config_exists == 'true' && | |
!(github.event_name == 'workflow_dispatch' && github.event.inputs.full_ingest == 'true') | |
run: | | |
source ./api_helper.sh | |
CONFIG="${{ steps.config.outputs.config }}" | |
OSIRIS_URL="${{ steps.config.outputs.osiris_url }}" | |
echo "Full Configuration:" | |
echo "$CONFIG" | jq '.' | |
# Create extensions file | |
echo "$CONFIG" | jq -r '.included_extensions[]' | tr -d '\r' > included_extensions.txt | |
echo "Available extensions:" | |
cat included_extensions.txt | |
# Determine base/head commit | |
if [ "${{ github.event_name }}" == "push" ]; then | |
BASE_SHA="${{ github.event.before }}" | |
HEAD_SHA="${{ github.event.after }}" | |
elif [ "${{ github.event_name }}" == "pull_request" ]; then | |
BASE_SHA="${{ github.event.pull_request.base.sha }}" | |
HEAD_SHA="${{ github.event.pull_request.head.sha }}" | |
else | |
BASE_SHA=$(git rev-parse HEAD^) | |
HEAD_SHA=$(git rev-parse HEAD) | |
fi | |
echo "Base SHA: $BASE_SHA" | |
echo "Head SHA: $HEAD_SHA" | |
# Collect changed files into a temp directory | |
TEMP_DIR=$(mktemp -d) | |
trap 'rm -rf "$TEMP_DIR"' EXIT | |
git diff --name-status --no-renames $BASE_SHA $HEAD_SHA | while read -r status filepath; do | |
echo "Processing: $filepath (Status: $status)" | |
[ -z "$filepath" ] && continue | |
ext="${filepath##*.}" | |
echo "File extension: '$ext'" | |
# Check if extension is included | |
if grep -ixFq "$ext" included_extensions.txt; then | |
echo "Extension '$ext' IS included" | |
if [ "$status" = "M" ] || [ "$status" = "A" ]; then | |
content=$(git show "$HEAD_SHA:$filepath" 2>/dev/null | jq -Rs) || continue | |
echo "$status $filepath $content" >> "$TEMP_DIR/changes.txt" | |
elif [ "$status" = "D" ]; then | |
echo "$status $filepath" >> "$TEMP_DIR/changes.txt" | |
fi | |
else | |
echo "Extension '$ext' is NOT included" | |
fi | |
done | |
if [ -f "$TEMP_DIR/changes.txt" ]; then | |
echo "Found changes to process" | |
# Build changes JSON | |
changes_json="{\"added\":[" | |
first=true | |
while IFS=' ' read -r status filepath content; do | |
if [ "$status" = "A" ]; then | |
[ "$first" = true ] && first=false || changes_json+="," | |
changes_json+="{\"path\":\"$filepath\",\"content\":$content}" | |
fi | |
done < "$TEMP_DIR/changes.txt" | |
changes_json+="],\"modified\":[" | |
first=true | |
while IFS=' ' read -r status filepath content; do | |
if [ "$status" = "M" ]; then | |
[ "$first" = true ] && first=false || changes_json+="," | |
changes_json+="{\"path\":\"$filepath\",\"content\":$content}" | |
fi | |
done < "$TEMP_DIR/changes.txt" | |
changes_json+="],\"removed\":[" | |
first=true | |
while IFS=' ' read -r status filepath content; do | |
if [ "$status" = "D" ]; then | |
[ "$first" = true ] && first=false || changes_json+="," | |
changes_json+="{\"path\":\"$filepath\"}" | |
fi | |
done < "$TEMP_DIR/changes.txt" | |
changes_json+="]}" | |
# Ingest changes | |
if ! call_api "$OSIRIS_URL/api/ingest-changes" "{ | |
\"repository\": { | |
\"fullName\": \"${{ github.repository }}\", | |
\"defaultBranch\": \"${{ github.ref_name }}\" | |
}, | |
\"changes\": $changes_json, | |
\"metadata\": { | |
\"repository\": \"${{ github.repository }}\", | |
\"branch\": \"${{ github.ref_name }}\", | |
\"event_type\": \"${{ github.event_name }}\", | |
\"commit_sha\": \"${{ github.sha }}\", | |
\"base_sha\": \"$BASE_SHA\", | |
\"head_sha\": \"$HEAD_SHA\", | |
\"max_file_size\": $(echo \"$CONFIG\" | jq .max_file_size), | |
\"max_tokens\": $(echo \"$CONFIG\" | jq .max_tokens), | |
\"process_timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\" | |
} | |
}"; then | |
echo "::error::Failed to process changes" | |
exit 1 | |
fi | |
else | |
echo "No relevant file changes detected" | |
fi | |
- name: Report Status | |
if: always() | |
run: | | |
if [ "${{ steps.config.outputs.config_exists }}" != "true" ]; then | |
echo "::notice::Repository not configured for watching" | |
elif [ "${{ job.status }}" == "success" ]; then | |
if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ "${{ github.event.inputs.full_ingest }}" == "true" ]; then | |
echo "::notice::Successfully completed full repository ingestion" | |
else | |
echo "::notice::Successfully processed changes" | |
fi | |
else | |
echo "::error::Failed to process changes" |