Run Scraper #151
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Run Scraper | |
on: | |
# schedule: | |
# # Runs every day at 4AM UTC (midnight in Brasília) | |
# - cron: '0 4 * * *' | |
workflow_dispatch: | |
inputs: | |
agency: | |
description: 'Agency to scrape (leave empty to scrape all)' | |
required: false | |
default: '' | |
jobs: | |
run-scraper: | |
runs-on: ubuntu-latest | |
steps: | |
# Step 1: Checkout the repository | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
# Step 2: Set up Python 3.13 | |
- name: Set up Python 3.13 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.13.0' | |
# Step 3: Install Poetry | |
- name: Install Poetry | |
uses: snok/install-poetry@v1 | |
with: | |
poetry-version: '1.8.4' | |
# Step 4: Install dependencies using Poetry | |
- name: Install dependencies | |
run: | | |
poetry install --no-root | |
# Step 5: Get the date for the previous day | |
- name: Set date variable for yesterday | |
run: | | |
echo "YESTERDAY=$(date -d 'yesterday' +'%Y-%m-%d')" >> $GITHUB_ENV | |
# Step 6: Run the scraper for the previous day | |
- name: Run the scraper for yesterday | |
env: | |
HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
run: | | |
poetry run python src/main.py scrape --min-date 2024-01-01 --sequential --allow-update ${{ inputs.agency && '--agency ' }}${{ inputs.agency }} |