Run Scraper #158

	name: Run Scraper

	on:
	# schedule:
	# # Runs every day at 4AM UTC (midnight in Brasília)
	# - cron: '0 4 * * *'
	workflow_dispatch:
	inputs:
	agencies:
	description: 'Comma-separated list of agencies to scrape (leave empty to scrape all)'
	required: false
	default: ''

	jobs:
	run-scraper:
	runs-on: ubuntu-latest

	# 1) Use the published Docker image as the job container
	container:
	image: ghcr.io/nitaibezerra/govbrnews-scraper:latest
	options: --workdir /app # This ensures the container starts in the correct directory
	steps:
	# Step 1: Set date variable for yesterday (still done in the runner's environment)
	- name: Set date variable for yesterday
	run: \|
	echo "YESTERDAY=$(date -d 'yesterday' +'%Y-%m-%d')" >> $GITHUB_ENV

	# Step 2: Run the scraper (inside the Docker container)
	- name: Run the scraper
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	run: \|
	# Ensure working directory is correct
	cd /app

	# Construct agencies argument if provided
	AGENCIES_ARG=""
	if [ ! -z "${{ inputs.agencies }}" ]; then
	AGENCIES_ARG="--agencies ${{ inputs.agencies }}"
	fi

	# Run scraper without re-installing dependencies
	python src/main.py scrape \
	--min-date $YESTERDAY \
	--sequential \
	--allow-update \
	$AGENCIES_ARG

Provide feedback