-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
16c2e51
commit 0c6d80a
Showing
9 changed files
with
1,181 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
|
||
#--- ESPN WBB Data ----- | ||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "espn_womens_college_basketball_schedules", | ||
name = "espn_womens_college_basketball_schedules", | ||
body = "NCAA Women's College Basketball Schedules Data (from ESPN)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "espn_womens_college_basketball_team_boxscores", | ||
name = "espn_womens_college_basketball_team_boxscores", | ||
body = "NCAA Women's College Basketball Team Boxscores Data (from ESPN)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "espn_womens_college_basketball_player_boxscores", | ||
name = "espn_womens_college_basketball_player_boxscores", | ||
body = "NCAA Women's College Basketball Player Boxscores Data (from ESPN)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "espn_womens_college_basketball_pbp", | ||
name = "espn_womens_college_basketball_pbp", | ||
body = "NCAA Women's College Basketball Play-by-Play Data (from ESPN)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
#--- ESPN WNBA Data ----- | ||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "espn_wnba_schedules", | ||
name = "espn_wnba_schedules", | ||
body = "WNBA Schedules Data (from ESPN)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "espn_wnba_team_boxscores", | ||
name = "espn_wnba_team_boxscores", | ||
body = "WNBA Team Boxscores Data (from ESPN)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "espn_wnba_player_boxscores", | ||
name = "espn_wnba_player_boxscores", | ||
body = "WNBA Player Boxscores Data (from ESPN)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "espn_wnba_pbp", | ||
name = "espn_wnba_pbp", | ||
body = "WNBA Play-by-Play Data (from ESPN)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
|
||
#--- WNBA Stats Data ----- | ||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "wnba_stats_schedules", | ||
name = "wnba_stats_schedules", | ||
body = "WNBA Schedules Data (from stats.wnba.com)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "wnba_stats_team_boxscores", | ||
name = "wnba_stats_team_boxscores", | ||
body = "WNBA Team Boxscores Data (from stats.wnba.com)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "wnba_stats_player_boxscores", | ||
name = "wnba_stats_player_boxscores", | ||
body = "WNBA Player Boxscores Data (from stats.wnba.com)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
|
||
|
||
piggyback::pb_release_create( | ||
repo = "sportsdataverse/sportsdataverse-data", | ||
tag = "wnba_stats_pbp", | ||
name = "wnba_stats_pbp", | ||
body = "WNBA Play-by-Play Data (from stats.wnba.com)", | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
lib_path <- Sys.getenv("R_LIBS") | ||
if (!requireNamespace("pacman", quietly = TRUE)){ | ||
install.packages("pacman",lib=Sys.getenv("R_LIBS"), repos="http://cran.us.r-project.org") | ||
} | ||
suppressPackageStartupMessages(suppressMessages(library(dplyr, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(magrittr, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(jsonlite, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(purrr, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(progressr, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(data.table, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(qs, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(arrow, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(glue, lib.loc=lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(optparse, lib.loc=lib_path))) | ||
|
||
|
||
|
||
sched_list <- list.files(path = glue::glue("wbb/schedules/rds/")) | ||
sched_g <- purrr::map(sched_list, function(x) { | ||
sched <- readRDS(paste0("wbb/schedules/rds/", x)) %>% | ||
dplyr::mutate( | ||
id = as.integer(.data$id), | ||
game_id = as.integer(.data$game_id), | ||
status_display_clock = as.character(.data$status_display_clock) | ||
) | ||
|
||
sched <- sched %>% | ||
wehoop:::make_wehoop_data("ESPN WBB Schedule from wehoop data repository", Sys.time()) | ||
y <- stringr::str_extract(x, "\\d+") | ||
sportsdataversedata::sportsdataverse_save( | ||
data_frame = sched, | ||
file_name = glue::glue("wbb_schedule_{y}"), | ||
sportsdataverse_type = "schedule data", | ||
release_tag = "espn_womens_college_basketball_schedules", | ||
file_types = c("rds", "csv", "parquet"), | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
}) | ||
rm(sched_g) | ||
|
||
pbp_list <- list.files(path = glue::glue("wbb/pbp/rds/")) | ||
pbp_g <- purrr::map(pbp_list, function(x) { | ||
pbp <- readRDS(paste0("wbb/pbp/rds/", x)) | ||
|
||
pbp <- pbp %>% | ||
wehoop:::make_wehoop_data("ESPN WBB Play-by-Play from wehoop data repository", Sys.time()) | ||
y <- stringr::str_extract(x, "\\d+") | ||
sportsdataversedata::sportsdataverse_save( | ||
data_frame = pbp, | ||
file_name = glue::glue("play_by_play_{y}"), | ||
sportsdataverse_type = "Play-by-Play data", | ||
release_tag = "espn_womens_college_basketball_pbp", | ||
file_types = c("rds", "csv", "parquet"), | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
}) | ||
rm(pbp_g) | ||
|
||
team_box_list <- list.files(path = glue::glue("wbb/team_box/rds/")) | ||
team_box_g <- purrr::map(team_box_list, function(x) { | ||
team_box <- readRDS(paste0("wbb/team_box/rds/", x)) | ||
team_box <- team_box %>% | ||
wehoop:::make_wehoop_data("ESPN WBB Team Boxscores from wehoop data repository", Sys.time()) | ||
y <- stringr::str_extract(x, "\\d+") | ||
sportsdataversedata::sportsdataverse_save( | ||
data_frame = team_box, | ||
file_name = glue::glue("team_box_{y}"), | ||
sportsdataverse_type = "Team Boxscores data", | ||
release_tag = "espn_womens_college_basketball_team_boxscores", | ||
file_types = c("rds", "csv", "parquet"), | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
}) | ||
|
||
rm(team_box_g) | ||
|
||
player_box_list <- list.files(path = glue::glue("wbb/player_box/rds/")) | ||
player_box_g <- purrr::map(player_box_list, function(x) { | ||
player_box <- readRDS(paste0("wbb/player_box/rds/", x)) | ||
player_box <- player_box %>% | ||
wehoop:::make_wehoop_data("ESPN WBB Player Boxscores from wehoop data repository", Sys.time()) | ||
y <- stringr::str_extract(x, "\\d+") | ||
sportsdataversedata::sportsdataverse_save( | ||
data_frame = player_box, | ||
file_name = glue::glue("player_box_{y}"), | ||
sportsdataverse_type = "Player Boxscores data", | ||
release_tag = "espn_womens_college_basketball_player_boxscores", | ||
file_types = c("rds", "csv", "parquet"), | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
}) | ||
|
||
rm(player_box_g) | ||
|
||
sched_list <- list.files(path = glue::glue("wnba/schedules/rds/")) | ||
sched_g <- purrr::map(sched_list, function(x) { | ||
sched <- readRDS(paste0("wnba/schedules/rds/", x)) %>% | ||
dplyr::mutate( | ||
id = as.integer(.data$id), | ||
game_id = as.integer(.data$game_id), | ||
status_display_clock = as.character(.data$status_display_clock) | ||
) | ||
|
||
sched <- sched %>% | ||
wehoop:::make_wehoop_data("ESPN WNBA Schedule from wehoop data repository", Sys.time()) | ||
y <- stringr::str_extract(x, "\\d+") | ||
sportsdataversedata::sportsdataverse_save( | ||
data_frame = sched, | ||
file_name = glue::glue("wnba_schedule_{y}"), | ||
sportsdataverse_type = "schedule data", | ||
release_tag = "espn_wnba_schedules", | ||
file_types = c("rds", "csv", "parquet"), | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
}) | ||
rm(sched_g) | ||
|
||
pbp_list <- list.files(path = glue::glue("wnba/pbp/rds/")) | ||
pbp_g <- purrr::map(pbp_list, function(x) { | ||
pbp <- readRDS(paste0("wnba/pbp/rds/", x)) | ||
|
||
pbp <- pbp %>% | ||
wehoop:::make_wehoop_data("ESPN WNBA Play-by-Play from wehoop data repository", Sys.time()) | ||
y <- stringr::str_extract(x, "\\d+") | ||
sportsdataversedata::sportsdataverse_save( | ||
data_frame = pbp, | ||
file_name = glue::glue("play_by_play_{y}"), | ||
sportsdataverse_type = "Play-by-Play data", | ||
release_tag = "espn_wnba_pbp", | ||
file_types = c("rds", "csv", "parquet"), | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
}) | ||
rm(pbp_g) | ||
|
||
team_box_list <- list.files(path = glue::glue("wnba/team_box/rds/")) | ||
team_box_g <- purrr::map(team_box_list, function(x) { | ||
team_box <- readRDS(paste0("wnba/team_box/rds/", x)) | ||
team_box <- team_box %>% | ||
wehoop:::make_wehoop_data("ESPN WNBA Team Boxscores from wehoop data repository", Sys.time()) | ||
y <- stringr::str_extract(x, "\\d+") | ||
sportsdataversedata::sportsdataverse_save( | ||
data_frame = team_box, | ||
file_name = glue::glue("team_box_{y}"), | ||
sportsdataverse_type = "Team Boxscores data", | ||
release_tag = "espn_wnba_team_boxscores", | ||
file_types = c("rds", "csv", "parquet"), | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
}) | ||
|
||
rm(team_box_g) | ||
|
||
player_box_list <- list.files(path = glue::glue("wnba/player_box/rds/")) | ||
player_box_g <- purrr::map(player_box_list, function(x) { | ||
player_box <- readRDS(paste0("wnba/player_box/rds/", x)) | ||
player_box <- player_box %>% | ||
wehoop:::make_wehoop_data("ESPN WNBA Player Boxscores from wehoop data repository", Sys.time()) | ||
y <- stringr::str_extract(x, "\\d+") | ||
sportsdataversedata::sportsdataverse_save( | ||
data_frame = player_box, | ||
file_name = glue::glue("player_box_{y}"), | ||
sportsdataverse_type = "Player Boxscores data", | ||
release_tag = "espn_wnba_player_boxscores", | ||
file_types = c("rds", "csv", "parquet"), | ||
.token = Sys.getenv("GITHUB_PAT") | ||
) | ||
}) | ||
|
||
rm(player_box_g) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
|
||
|
||
team_box_list <- list.files(path = glue::glue('wbb/json/final/')) | ||
team_box_game_ids <- as.integer(gsub('.json', '', team_box_list)) | ||
|
||
future::plan("multisession") | ||
espn_df <- furrr::future_map_dfr(team_box_game_ids, function(x){ | ||
resp <- glue::glue('wbb/json/final/{x}.json') %>% | ||
jsonlite::fromJSON() | ||
jsonlite::write_json(resp, glue::glue('wbb/json/final/{x}.json'), prettify = 0) | ||
|
||
return(NULL) | ||
}, .options = furrr::furrr_options(seed = TRUE)) | ||
|
||
|
||
team_box_list <- list.files(path = glue::glue('wbb/json/raw/')) | ||
team_box_game_ids <- as.integer(gsub('.json', '', team_box_list)) | ||
|
||
future::plan("multisession") | ||
espn_df <- furrr::future_map_dfr(team_box_game_ids, function(x){ | ||
resp <- glue::glue('wbb/json/raw/{x}.json') %>% | ||
jsonlite::fromJSON() | ||
jsonlite::write_json(resp, glue::glue('wbb/json/raw/{x}.json'), prettify = 0) | ||
|
||
return(NULL) | ||
}, .options = furrr::furrr_options(seed = TRUE)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
lib_path <- Sys.getenv("R_LIBS") | ||
|
||
suppressPackageStartupMessages(suppressMessages(library(dplyr, lib.loc = lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(httr, lib.loc = lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(jsonlite, lib.loc = lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(glue, lib.loc = lib_path))) | ||
suppressPackageStartupMessages(suppressMessages(library(purrr, lib.loc = lib_path))) | ||
get_proxy_bonanza_ips <- function( | ||
api_key = Sys.getenv("PROXY_BONANZA_KEY"), | ||
user_package = Sys.getenv("PROXY_BONANZA_USERPKG")){ | ||
res <- httr::RETRY( | ||
"GET", | ||
glue::glue("https://proxybonanza.com/api/v1/userpackages/{user_package}.json"), | ||
httr::add_headers(Authorization = paste(api_key))) %>% | ||
httr::content(as = "text", encoding = "UTF-8") | ||
|
||
resp <- res %>% | ||
jsonlite::fromJSON() %>% | ||
purrr::pluck("data") | ||
|
||
login <- resp$login | ||
password <- resp$password | ||
ips <- resp$ippacks | ||
|
||
ips$login <- login | ||
ips$password <- password | ||
proxies <- ips %>% | ||
dplyr::select("ip","port_http","login", "password") | ||
return(proxies) | ||
} | ||
|
||
select_proxy <- function(proxies = get_proxy_bonanza_ips()) { | ||
proxy <- sample(proxies$ip, 1) # pick a random proxy from the list above | ||
proxy_selected <- proxies %>% | ||
dplyr::filter(.data$ip == proxy) | ||
my_proxy <- httr::use_proxy(url = proxy_selected$ip, | ||
port = proxy_selected$port, | ||
username = proxy_selected$login, | ||
password = proxy_selected$password) | ||
return(my_proxy) | ||
} | ||
|
||
|
||
rejoin_schedules <- function(df){ | ||
df <- df %>% | ||
dplyr::mutate( | ||
HOME_AWAY = ifelse(stringr::str_detect(.data$MATCHUP,"@"),"AWAY","HOME")) %>% | ||
dplyr::select(-.data$WL,.data$MATCHUP) | ||
away_df <- df %>% | ||
dplyr::filter(.data$HOME_AWAY == "AWAY") %>% | ||
dplyr::select(-.data$HOME_AWAY) %>% | ||
dplyr::select(.data$SEASON_ID, .data$GAME_ID, .data$GAME_DATE, .data$MATCHUP, tidyr::everything()) | ||
colnames(away_df)[5:ncol(away_df)]<-paste0("AWAY_", colnames(away_df)[5:ncol(away_df)]) | ||
home_df <- df %>% | ||
dplyr::filter(.data$HOME_AWAY == "HOME") %>% | ||
dplyr::select(-.data$HOME_AWAY, -.data$MATCHUP) %>% | ||
dplyr::select(.data$SEASON_ID, .data$GAME_ID, .data$GAME_DATE, tidyr::everything()) | ||
colnames(home_df)[4:ncol(home_df)]<-paste0("HOME_", colnames(home_df)[4:ncol(home_df)]) | ||
sched_df <- away_df %>% | ||
dplyr::left_join(home_df, by=c("GAME_ID", "SEASON_ID", "GAME_DATE")) | ||
return(sched_df) | ||
} |
Oops, something went wrong.