Skip to content

Commit

Permalink
run scraper.yaml with sample
Browse files Browse the repository at this point in the history
  • Loading branch information
sborms committed Oct 25, 2023
1 parent 8a7c7b8 commit c8ecdeb
Show file tree
Hide file tree
Showing 6 changed files with 665 additions and 13 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
.ruff_cache/
.vscode/
.streamlit/
data/
data/*
!data/_coordinates.csv
spielerei/
logs/

Expand Down
628 changes: 628 additions & 0 deletions data/_coordinates.csv

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions scraper/config/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"url_base": "https://www.lzvcup.be",
"steps": {
"historical_players": true,
"csv": true
"csv": false
},
"areas": {
"ANTWERPEN": "results/1",
Expand All @@ -24,5 +24,5 @@
"palmares": [["team", "seizoen", "reeks", "positie"], ["area", "region", "competition"]]
},
"database": "database/futsalfriend.db",
"dir_last_updated_date": "webapp/last_updated_date.txt"
"dir_last_updated": "webapp/last_updated.txt"
}
23 changes: 23 additions & 0 deletions scraper/config/config_for_testing.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"url_base": "https://www.lzvcup.be",
"steps": {
"historical_players": true,
"csv": false
},
"areas": {
"BRUSSELS GEWEST": "results/2"
},
"dir_output": "data",
"postprocessing": {
"competitions": [["area", "region", "competition"], []],
"teams": [["area", "region", "competition", "team"], []],
"sportshalls": [["area", "region", "sportshall", "url_sportshall"], []],
"stats_players": [["name", "team", "number", "url"], ["area", "region", "competition"]],
"stats_players_historical": [["name", "team", "seizoen", "reeks", "stand"], []],
"schedules": [["area", "region", "competition", "sportshall"], []],
"standings": [["area", "region", "competition", "team"], []],
"palmares": [["team", "seizoen", "reeks", "positie"], ["area", "region", "competition"]]
},
"database": "database/futsalfriend.db",
"dir_last_updated": "webapp/last_updated.txt"
}
18 changes: 9 additions & 9 deletions scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,13 @@ def scrape(config, log_main):
log_main.info(f"Area {area} successfully processed")

# gather lists into single DataFrames
df_competitions_urls_all = pd.concat(list_competitions)
df_teams_all = pd.concat(list_teams)
df_schedules_all = pd.concat(list_schedules)
df_standings_all = pd.concat(list_standings)
df_stats_players_all = pd.concat(list_stats)
df_palmares_all = pd.concat(list_palmares)
df_sportshalls_all = pd.concat(list_sportshalls)
df_competitions_urls_all = pd.concat(list_competitions).reset_index(drop=True)
df_teams_all = pd.concat(list_teams).reset_index(drop=True)
df_schedules_all = pd.concat(list_schedules).reset_index(drop=True)
df_standings_all = pd.concat(list_standings).reset_index(drop=True)
df_stats_players_all = pd.concat(list_stats).reset_index(drop=True)
df_palmares_all = pd.concat(list_palmares).reset_index(drop=True)
df_sportshalls_all = pd.concat(list_sportshalls).reset_index(drop=True)

# get historical player statistics if enabled in config
if config["steps"]["historical_players"] is True:
Expand Down Expand Up @@ -158,7 +158,7 @@ def store(config, dict_tables, log_main):

log.info(f"Running script from {DIR_SCRIPT}")

config = DataStorage.load_json(f"{DIR_SCRIPT}/config/config.json")
config = DataStorage.load_json(f"{DIR_SCRIPT}/config/config_for_testing.json")
log.info("Config loaded")

dict_tables = scrape(config, log_main=log)
Expand All @@ -170,5 +170,5 @@ def store(config, dict_tables, log_main):
store(config, dict_tables, log_main=log)
log.info("Data stored")

write_current_date_to_file(config["dir_last_updated_date"])
write_current_date_to_file(config["dir_last_updated"])
log.info("Refresh date updated.")
2 changes: 1 addition & 1 deletion webapp/01_Home.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
add_socials_to_sidebar()

with st.sidebar:
with open("webapp/last_updated_date.txt", "r") as f:
with open("webapp/last_updated.txt", "r") as f:
last_updated = f.read()
st.markdown(f"**Last updated:** {last_updated}")

Expand Down

0 comments on commit c8ecdeb

Please sign in to comment.