Run Scraper #22
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Run Scraper and Commit Results | |
on: | |
schedule: | |
# Runs every day at 4AM UTC (midnight in Brasília) | |
- cron: '0 4 * * *' | |
workflow_dispatch: | |
# Allows manual triggering of the workflow from GitHub Actions UI | |
jobs: | |
run-scraper: | |
runs-on: ubuntu-latest | |
permissions: | |
contents: write | |
steps: | |
# Step 1: Checkout the repository | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
# Step 2: Set up Python | |
- name: Set up Python 3.x | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.x' | |
# Step 3: Install dependencies | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
# Step 4: Get the date for the previous day | |
- name: Set date variable for yesterday | |
run: echo "YESTERDAY=$(date -d 'yesterday' +'%Y-%m-%d')" >> $GITHUB_ENV | |
# Step 5: Get the current date for commit message | |
- name: Set date variable for today | |
run: echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV | |
# Step 6: Run the scraper for the previous day | |
- name: Run the scraper for yesterday | |
run: | | |
python news_scraper.py $YESTERDAY | |
# Step 7: Consolidate JSON files into a CSV and compress | |
- name: Consolidate JSON into CSV and ZIP | |
run: | | |
python consolidate_json_to_csv.py | |
# Step 8: Commit and push changes if there are any new files in raw_extractions | |
- name: Commit and push changes | |
uses: stefanzweifel/git-auto-commit-action@v5 | |
with: | |
commit_message: "Auto-update: Scraped data on ${{ env.TODAY }} for ${{ env.YESTERDAY }}" | |
branch: ${{ github.head_ref }} |