-
Notifications
You must be signed in to change notification settings - Fork 0
58 lines (48 loc) · 1.75 KB
/
scraper.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
name: Run Scraper and Commit Results
on:
schedule:
# Runs every day at 4AM UTC (midnight in Brasília)
- cron: '0 4 * * *'
workflow_dispatch:
# Allows manual triggering of the workflow from GitHub Actions UI
jobs:
run-scraper:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
# Step 1: Checkout the repository
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
# Step 2: Set up Python
- name: Set up Python 3.x
uses: actions/setup-python@v4
with:
python-version: '3.x'
# Step 3: Install dependencies
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
# Step 4: Get the date for the previous day
- name: Set date variable for yesterday
run: echo "YESTERDAY=$(date -d 'yesterday' +'%Y-%m-%d')" >> $GITHUB_ENV
# Step 5: Get the current date for commit message
- name: Set date variable for today
run: echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
# Step 6: Run the scraper for the previous day
- name: Run the scraper for yesterday
run: |
python news_scraper.py $YESTERDAY
# Step 7: Consolidate JSON files into a CSV and compress
- name: Consolidate JSON into CSV and ZIP
run: |
python consolidate_json_to_csv.py
# Step 8: Commit and push changes if there are any new files in raw_extractions
- name: Commit and push changes
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: "Auto-update: Scraped data on ${{ env.TODAY }} for ${{ env.YESTERDAY }}"
branch: ${{ github.head_ref }}