-
Notifications
You must be signed in to change notification settings - Fork 0
50 lines (42 loc) · 1.37 KB
/
scraper.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
name: Run Scraper
on:
# schedule:
# # Runs every day at 4AM UTC (midnight in Brasília)
# - cron: '0 4 * * *'
workflow_dispatch:
inputs:
agency:
description: 'Agency to scrape (leave empty to scrape all)'
required: false
default: ''
jobs:
run-scraper:
runs-on: ubuntu-latest
steps:
# Step 1: Checkout the repository
- name: Checkout repository
uses: actions/checkout@v4
# Step 2: Set up Python 3.13
- name: Set up Python 3.13
uses: actions/setup-python@v4
with:
python-version: '3.13.0'
# Step 3: Install Poetry
- name: Install Poetry
uses: snok/install-poetry@v1
with:
poetry-version: '1.8.4'
# Step 4: Install dependencies using Poetry
- name: Install dependencies
run: |
poetry install --no-root
# Step 5: Get the date for the previous day
- name: Set date variable for yesterday
run: |
echo "YESTERDAY=$(date -d 'yesterday' +'%Y-%m-%d')" >> $GITHUB_ENV
# Step 6: Run the scraper for the previous day
- name: Run the scraper for yesterday
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
poetry run python src/main.py scrape --min-date 2024-01-01 --sequential --allow-update ${{ inputs.agency && '--agency ' }}${{ inputs.agency }}