diff --git a/DESCRIPTION b/DESCRIPTION index 6f6f47fa..e4e35d81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9008 +Version: 2.2.0 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 203d6819..84e7dc85 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# GitStats (development version) +# GitStats 2.2.0 + +This release brings some substantial improvements with making it possible to scan whole organizations and particular repositories for one host at the same time, boosting function to prepare commits statistics and simplifying workflow for getting files. ## Features: @@ -9,7 +11,7 @@ - adding `yearly` aggregation to `time_aggregation` parameter, - changing basic input from `GitStats` to `commits_data` object which allows to build workflow in one pipeline (`create_gitstats() |> set_*_host() |> get_commits() |> get_commits_stats()`). - Merged two functions `get_files_content()` and `get_files_structure()` into one `get_files()` ([#564](https://github.com/r-world-devs/GitStats/issues/564)). -- Add `.show_error` parameter to the `set_*_host()` functions to control if error should pop up when wrong input is passed ([#547](https://github.com/r-world-devs/GitStats/issues/547)). +- Add `.error` parameter to the `set_*_host()` functions to control if error should pop up when wrong input is passed ([#547](https://github.com/r-world-devs/GitStats/issues/547)). ## Fixes: diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index 5a1f536a..60cbdcef 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -9,9 +9,11 @@ EngineGraphQLGitHub <- R6::R6Class( initialize = function(gql_api_url, token, scan_all = FALSE) { - super$initialize(gql_api_url = gql_api_url, - token = token, - scan_all = scan_all) + super$initialize( + gql_api_url = gql_api_url, + token = token, + scan_all = scan_all + ) self$gql_query <- GQLQueryGitHub$new() }, @@ -53,7 +55,7 @@ EngineGraphQLGitHub <- R6::R6Class( response$errors[[1]]$message ) } - orgs_list <- purrr::map(response$data$search$edges, ~stringr::str_match(.$node$url, "[^\\/]*$")) + orgs_list <- purrr::map(response$data$search$edges, ~ stringr::str_match(.$node$url, "[^\\/]*$")) full_orgs_list <- append(full_orgs_list, orgs_list) has_next_page <- response$data$search$pageInfo$hasNextPage end_cursor <- response$data$search$pageInfo$endCursor @@ -98,7 +100,7 @@ EngineGraphQLGitHub <- R6::R6Class( prepare_repos_table = function(repos_list, org) { if (length(repos_list) > 0) { repos_table <- purrr::map(repos_list, function(repo) { - repo$default_branch <- if (!is.null(repo$default_branch)) { + repo[["default_branch"]] <- if (!is.null(repo$default_branch)) { repo$default_branch$name } else { "" @@ -107,13 +109,13 @@ EngineGraphQLGitHub <- R6::R6Class( if (length(last_activity_at) == 0) { last_activity_at <- gts_to_posixt(repo$created_at) } - repo$languages <- purrr::map_chr(repo$languages$nodes, ~ .$name) %>% + repo[["languages"]] <- purrr::map_chr(repo$languages$nodes, ~ .$name) |> paste0(collapse = ", ") - repo$created_at <- gts_to_posixt(repo$created_at) - repo$issues_open <- repo$issues_open$totalCount - repo$issues_closed <- repo$issues_closed$totalCount - repo$last_activity_at <- last_activity_at - repo$organization <- repo$organization$login + repo[["created_at"]] <- gts_to_posixt(repo$created_at) + repo[["issues_open"]] <- repo$issues_open$totalCount + repo[["issues_closed"]] <- repo$issues_closed$totalCount + repo[["last_activity_at"]] <- last_activity_at + repo[["organization"]] <- repo$organization$login repo <- data.frame(repo) %>% dplyr::relocate( default_branch, @@ -200,7 +202,7 @@ EngineGraphQLGitHub <- R6::R6Class( repos, file_paths = NULL, host_files_structure = NULL, - verbose = TRUE, + verbose = TRUE, progress = TRUE) { repo_data <- private$get_repos_data( org = org, @@ -265,11 +267,11 @@ EngineGraphQLGitHub <- R6::R6Class( def_branches <- repo_data[["def_branches"]] files_structure <- purrr::map2(repositories, def_branches, function(repo, def_branch) { private$get_files_structure_from_repo( - org = org, - repo = repo, + org = org, + repo = repo, def_branch = def_branch, - pattern = pattern, - depth = depth + pattern = pattern, + depth = depth ) }, .progress = progress) names(files_structure) <- repositories @@ -293,7 +295,8 @@ EngineGraphQLGitHub <- R6::R6Class( user_data[["web_url"]] <- user_data$web_url %||% "" user_table <- tibble::as_tibble(user_data) %>% dplyr::relocate(c(commits, issues, pull_requests, reviews), - .after = starred_repos) + .after = starred_repos + ) } else { user_table <- NULL } @@ -431,27 +434,30 @@ EngineGraphQLGitHub <- R6::R6Class( commits_by_org_query <- self$gql_query$commits_from_repo( commits_cursor = commits_cursor ) - response <- tryCatch({ - self$gql_response( - gql_query = commits_by_org_query, - vars = list( - "org" = org, - "repo" = repo, - "since" = date_to_gts(since), - "until" = date_to_gts(until) + response <- tryCatch( + { + self$gql_response( + gql_query = commits_by_org_query, + vars = list( + "org" = org, + "repo" = repo, + "since" = date_to_gts(since), + "until" = date_to_gts(until) + ) ) - ) - }, error = function(e) { - self$gql_response( - gql_query = commits_by_org_query, - vars = list( - "org" = org, - "repo" = repo, - "since" = date_to_gts(since), - "until" = date_to_gts(until) + }, + error = function(e) { + self$gql_response( + gql_query = commits_by_org_query, + vars = list( + "org" = org, + "repo" = repo, + "since" = date_to_gts(since), + "until" = date_to_gts(until) + ) ) - ) - }) + } + ) return(response) }, diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index e806439e..a6ba5a76 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -24,7 +24,7 @@ EngineGraphQLGitLab <- R6::R6Class( response <- self$gql_response( gql_query = user_or_org_query, vars = list( - "username" = owner, + "username" = owner, "grouppath" = owner ) ) @@ -352,10 +352,10 @@ EngineGraphQLGitLab <- R6::R6Class( repositories <- repo_data[["repositories"]] files_structure <- purrr::map(repositories, function(repo) { private$get_files_structure_from_repo( - org = org, - repo = repo, + org = org, + repo = repo, pattern = pattern, - depth = depth + depth = depth ) }, .progress = progress) names(files_structure) <- repositories @@ -367,18 +367,20 @@ EngineGraphQLGitLab <- R6::R6Class( prepare_user_table = function(user_response) { if (!is.null(user_response$data$user)) { user_data <- user_response$data$user - user_data$name <- user_data$name %||% "" - user_data$starred_repos <- user_data$starred_repos$count - user_data$pull_requests <- user_data$pull_requests$count - user_data$reviews <- user_data$reviews$count - user_data$email <- user_data$email %||% "" - user_data$location <- user_data$location %||% "" - user_data$web_url <- user_data$web_url %||% "" - user_table <- tibble::as_tibble(user_data) %>% + user_data[["name"]] <- user_data$name %||% "" + user_data[["starred_repos"]] <- user_data$starred_repos$count + user_data[["pull_requests"]] <- user_data$pull_requests$count + user_data[["reviews"]] <- user_data$reviews$count + user_data[["email"]] <- user_data$email %||% "" + user_data[["location"]] <- user_data$location %||% "" + user_data[["web_url"]] <- user_data$web_url %||% "" + user_table <- tibble::as_tibble(user_data) |> dplyr::mutate(commits = NA, - issues = NA) %>% - dplyr::relocate(c(commits, issues), - .after = starred_repos) + issues = NA) |> + dplyr::relocate( + c(commits, issues), + .after = starred_repos + ) } else { user_table <- NULL } diff --git a/R/EngineRestGitHub.R b/R/EngineRestGitHub.R index db6d95dd..d9540ef5 100644 --- a/R/EngineRestGitHub.R +++ b/R/EngineRestGitHub.R @@ -195,11 +195,7 @@ EngineRestGitHub <- R6::R6Class( error = function(e) { NA }) - }, .progress = if (progress) { - "[GitHost:GitHub] Pulling contributors..." - } else { - FALSE - }) + }, .progress = progress) } return(repos_table) } diff --git a/R/EngineRestGitLab.R b/R/EngineRestGitLab.R index fcccc21c..46571dc5 100644 --- a/R/EngineRestGitLab.R +++ b/R/EngineRestGitLab.R @@ -246,11 +246,7 @@ EngineRestGitLab <- R6::R6Class( NA }) return(contributors_vec) - }, .progress = if (progress) { - "[GitHost:GitLab] Pulling contributors..." - } else { - FALSE - }) + }, .progress = progress) } return(repos_table) }, diff --git a/R/GitHost.R b/R/GitHost.R index 58e54e74..54f1e3cb 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -496,7 +496,7 @@ GitHost <- R6::R6Class( } } return(org) - }) |> + }, .progress = verbose) |> purrr::keep(~ length(.) > 0) if (length(orgs) == 0) { return(NULL) @@ -693,7 +693,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = org, + scope = set_repo_scope(org, private), information = "Pulling repositories" ) } @@ -813,7 +813,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "rest", - scope = org, + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = "Pulling repositories (URLs)" ) } @@ -1150,7 +1150,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = org, + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = glue::glue("Pulling files content: [{paste0(file_path, collapse = ', ')}]") ) } @@ -1280,7 +1280,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = org, + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = user_info ) } @@ -1383,7 +1383,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = paste0(org, "/", private$orgs_repos[[org]]), + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = "Pulling release logs" ) } diff --git a/R/GitStats.R b/R/GitStats.R index 9bd78f4b..e3ca54bb 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -9,7 +9,7 @@ GitStats <- R6::R6Class( orgs = NULL, repos = NULL, verbose = TRUE, - .show_error = TRUE) { + .error = TRUE) { new_host <- NULL new_host <- GitHostGitHub$new( orgs = orgs, @@ -17,7 +17,7 @@ GitStats <- R6::R6Class( token = token, host = host, verbose = verbose, - .error = .show_error + .error = .error ) private$add_new_host(new_host) }, @@ -27,7 +27,7 @@ GitStats <- R6::R6Class( orgs = NULL, repos = NULL, verbose = TRUE, - .show_error = TRUE) { + .error = TRUE) { new_host <- NULL new_host <- GitHostGitLab$new( orgs = orgs, @@ -35,7 +35,7 @@ GitStats <- R6::R6Class( token = token, host = host, verbose = verbose, - .error = .show_error + .error = .error ) private$add_new_host(new_host) }, @@ -72,7 +72,7 @@ GitStats <- R6::R6Class( progress = progress ) %>% private$set_object_class( - class = "repos_table", + class = "gitstats_repos", attr_list = args_list ) private$save_to_storage( @@ -122,7 +122,7 @@ GitStats <- R6::R6Class( if (!is.null(repos_urls)) { repos_urls <- private$set_object_class( object = repos_urls, - class = "repos_urls", + class = "gitstats_repos_urls", attr_list = args_list ) private$save_to_storage( @@ -163,7 +163,7 @@ GitStats <- R6::R6Class( progress = progress ) %>% private$set_object_class( - class = "commits_data", + class = "gitstats_commits", attr_list = args_list ) private$save_to_storage( @@ -190,7 +190,7 @@ GitStats <- R6::R6Class( if (trigger) { users <- private$get_users_from_hosts(logins) %>% private$set_object_class( - class = "users_data", + class = "gitstats_users", attr_list = args_list ) private$save_to_storage(users) @@ -231,7 +231,7 @@ GitStats <- R6::R6Class( if (nrow(files) > 0) { files <- private$set_object_class( object = files, - class = "files_data", + class = "gitstats_files", attr_list = args_list ) private$save_to_storage(files) @@ -270,7 +270,7 @@ GitStats <- R6::R6Class( progress = progress ) %>% private$set_object_class( - class = "release_logs", + class = "gitstats_releases", attr_list = args_list ) private$save_to_storage(release_logs) @@ -313,7 +313,7 @@ GitStats <- R6::R6Class( (split_output && any(purrr::map_lgl(R_package_usage, ~ nrow(.) > 0)))) { R_package_usage <- private$set_object_class( object = R_package_usage, - class = "R_package_usage", + class = "gitstats_package_usage", attr_list = args_list ) private$save_to_storage(R_package_usage) @@ -915,13 +915,7 @@ GitStats <- R6::R6Class( item_to_print <- purrr::map_vec(item_to_print, function(element) { URLdecode(element) }) - if (length(item_to_print) < 10) { - list_items <- paste0(item_to_print, collapse = ", ") - } else { - item_to_print_cut <- item_to_print[1:10] - list_items <- paste0(item_to_print_cut, collapse = ", ") %>% - paste0("... and ", length(item_to_print) - 10, " more") - } + list_items <- cut_item_to_print(item_to_print) item_to_print <- paste0("[", cli::col_green(length(item_to_print)), "] ", list_items) } cat(paste0( diff --git a/R/get_commits_stats.R b/R/get_commits_stats.R index c73a0d08..9e4b7eef 100644 --- a/R/get_commits_stats.R +++ b/R/get_commits_stats.R @@ -28,9 +28,9 @@ get_commits_stats <- function(commits, time_aggregation = c("year", "month", "week", "day"), group_var) { - if (!inherits(commits, "commits_data")) { + if (!inherits(commits, "gitstats_commits")) { cli::cli_abort(c( - "x" = "`commits` must be a `commits_data` object.", + "x" = "`commits` must be a `gitstats_commits` object.", "i" = "Pull first your commits with `get_commits()` function." )) } @@ -62,7 +62,7 @@ get_commits_stats <- function(commits, set_commits_stats_class <- function(object, time_aggregation) { stopifnot(inherits(object, "grouped_df")) object <- dplyr::ungroup(object) - class(object) <- append(class(object), "commits_stats") + class(object) <- append(class(object), "gitstats_commits_stats") attr(object, "time_aggregation") <- time_aggregation object } diff --git a/R/message_handler.R b/R/message_handler.R index 7d578126..87094eff 100644 --- a/R/message_handler.R +++ b/R/message_handler.R @@ -13,6 +13,7 @@ show_message <- function(host, } else if (engine == "both") { paste0(msg_rest, "&", msg_graphql) } + information <- cli::col_br_blue(information) message <- if (is.null(scope)) { glue::glue("[Host:{host}][Engine:{engine_msg}] {information}...") } else { @@ -20,3 +21,18 @@ show_message <- function(host, } cli::cli_alert_info(message) } + +set_repo_scope <- function(org, private) { + cut_item_to_print(paste0(org, "/", private$orgs_repos[[org]])) +} + +cut_item_to_print <- function(item_to_print) { + if (length(item_to_print) < 10) { + list_items <- paste0(item_to_print, collapse = ", ") + } else { + item_to_print_cut <- item_to_print[1:10] + list_items <- paste0(item_to_print_cut, collapse = ", ") %>% + paste0("... and ", length(item_to_print) - 10, " more") + } + return(list_items) +} diff --git a/R/set_host.R b/R/set_host.R index 9bab0d3f..63c07d2b 100644 --- a/R/set_host.R +++ b/R/set_host.R @@ -11,7 +11,7 @@ #' pass it, `orgs` parameter should stay `NULL`. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing #' output is switched off. -#' @param .show_error A logical to control if passing wrong input +#' @param .error A logical to control if passing wrong input #' (`repositories` and `organizations`) should end with an error or not. #' @details If you do not define `orgs` and `repos`, `GitStats` will be set to #' scan whole Git platform (such as enterprise version of GitHub or GitLab), @@ -33,14 +33,14 @@ set_github_host <- function(gitstats, orgs = NULL, repos = NULL, verbose = is_verbose(gitstats), - .show_error = TRUE) { + .error = TRUE) { gitstats$set_github_host( host = host, token = token, orgs = orgs, repos = repos, verbose = verbose, - .show_error = .show_error + .error = .error ) return(invisible(gitstats)) @@ -70,14 +70,14 @@ set_gitlab_host <- function(gitstats, orgs = NULL, repos = NULL, verbose = is_verbose(gitstats), - .show_error = TRUE) { + .error = TRUE) { gitstats$set_gitlab_host( host = host, token = token, orgs = orgs, repos = repos, verbose = verbose, - .show_error = .show_error + .error = .error ) return(invisible(gitstats)) diff --git a/README.Rmd b/README.Rmd index 776f072b..3da5747d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -7,6 +7,7 @@ output: github_document ```{r, include = FALSE} knitr::opts_chunk$set( message = FALSE, + warning = FALSE, collapse = TRUE, comment = "#>", fig.path = "man/figures/README-" @@ -27,7 +28,6 @@ With GitStats you can pull git data in a uniform way (table format) from GitHub * commits, * users, * release logs, -* repository files structure, * text files content, * R package usage. @@ -39,15 +39,81 @@ From CRAN: install.packages("GitStats") ``` -Or development version: +From GitHub: ```r devtools::install_github("r-world-devs/GitStats") ``` -## GitStats workflow +## Examples: -On how to use GitStats, refer to the [documentation](https://r-world-devs.github.io/GitStats/index.html). +Setup your `GitStats`: + +```{r} +library(GitStats) + +git_stats <- create_gitstats() |> + set_gitlab_host( + repos = "mbtests/gitstatstesting" + ) |> + set_github_host( + orgs = "r-world-devs", + repos = "openpharma/DataFakeR" + ) +``` + +Get commits: + +```{r} +commits <- git_stats |> + get_commits( + since = "2022-01-01" + ) + +commits + +commits |> + get_commits_stats( + time_aggregation = "month", + group_var = author + ) +``` + +Get repositories with specific code: + +```{r} +git_stats |> + get_repos( + with_code = "shiny", + add_contributors = FALSE + ) +``` + +Get files: + +```{r} +git_stats |> + get_files( + pattern = "\\.md", + depth = 2L + ) +``` + +Get package usage: + +```{r} +git_stats |> + get_R_package_usage( + packages = c("shiny", "purrr"), + split_output = TRUE + ) +``` + +Print `GitStats` to see what it stores: + +```{r} +git_stats +``` ## Acknowledgement diff --git a/README.md b/README.md index 0d3003f3..13ea41e6 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,6 @@ GitHub and GitLab. For the time-being you can get data on: - commits, - users, - release logs, -- repository files structure, - text files content, - R package usage. @@ -30,16 +29,179 @@ From CRAN: install.packages("GitStats") ``` -Or development version: +From GitHub: ``` r devtools::install_github("r-world-devs/GitStats") ``` -## GitStats workflow +## Examples: -On how to use GitStats, refer to the -[documentation](https://r-world-devs.github.io/GitStats/index.html). +Setup your `GitStats`: + +``` r +library(GitStats) + +git_stats <- create_gitstats() |> + set_gitlab_host( + repos = "mbtests/gitstatstesting" + ) |> + set_github_host( + orgs = "r-world-devs", + repos = "openpharma/DataFakeR" + ) +``` + +Get commits: + +``` r +commits <- git_stats |> + get_commits( + since = "2022-01-01" + ) + +commits +#> # A tibble: 2,178 × 11 +#> id committed_date author author_login author_name additions deletions +#> +#> 1 7f48… 2024-09-10 11:12:59 Macie… maciekbanas Maciej Ban… 0 0 +#> 2 9c66… 2024-09-10 10:35:37 Macie… maciekbanas Maciej Ban… 0 0 +#> 3 fca2… 2024-09-10 10:31:24 Macie… maciekbanas Maciej Ban… 0 0 +#> 4 e8f2… 2023-03-30 14:15:33 Macie… maciekbanas Maciej Ban… 1 0 +#> 5 7e87… 2023-02-10 09:48:55 Macie… maciekbanas Maciej Ban… 1 1 +#> 6 62c4… 2023-02-10 09:17:24 Macie… maciekbanas Maciej Ban… 2 87 +#> 7 55cf… 2023-02-10 09:07:54 Macie… maciekbanas Maciej Ban… 92 0 +#> 8 C_kw… 2023-05-08 09:43:31 Kryst… krystian8207 Krystian I… 18 0 +#> 9 C_kw… 2023-04-28 12:30:40 Kamil… Kamil Kozi… 18 0 +#> 10 C_kw… 2023-03-01 15:05:10 Kryst… krystian8207 Krystian I… 296 153 +#> # ℹ 2,168 more rows +#> # ℹ 4 more variables: repository , organization , repo_url , +#> # api_url + +commits |> + get_commits_stats( + time_aggregation = "month", + group_var = author + ) +#> # A tibble: 228 × 4 +#> stats_date githost author stats +#> +#> 1 2022-01-01 00:00:00 github Admin_mschuemi 1 +#> 2 2022-01-01 00:00:00 github Gowtham Rao 5 +#> 3 2022-01-01 00:00:00 github Krystian Igras 1 +#> 4 2022-01-01 00:00:00 github Martijn Schuemie 1 +#> 5 2022-02-01 00:00:00 github Hadley Wickham 3 +#> 6 2022-02-01 00:00:00 github Martijn Schuemie 2 +#> 7 2022-02-01 00:00:00 github Maximilian Girlich 13 +#> 8 2022-02-01 00:00:00 github Reijo Sund 1 +#> 9 2022-02-01 00:00:00 github eitsupi 1 +#> 10 2022-03-01 00:00:00 github Maximilian Girlich 14 +#> # ℹ 218 more rows +``` + +Get repositories: + +``` r +git_stats |> + get_repos( + with_code = "shiny", + add_contributors = FALSE + ) +#> # A tibble: 6 × 16 +#> repo_id repo_name organization fullname platform repo_url api_url +#> +#> 1 627452680 hypothesis r-world-devs r-world-d… github https:/… https:… +#> 2 604718884 shinyTimelines r-world-devs r-world-d… github https:/… https:… +#> 3 495151911 shinyCohortBuilder r-world-devs r-world-d… github https:/… https:… +#> 4 495144469 cohortBuilder r-world-devs r-world-d… github https:/… https:… +#> 5 884789327 GitAI r-world-devs r-world-d… github https:/… https:… +#> 6 586903986 GitStats r-world-devs r-world-d… github https:/… https:… +#> # ℹ 9 more variables: created_at , last_activity_at , +#> # last_activity , default_branch , stars , forks , +#> # languages , issues_open , issues_closed +``` + +Get files: + +``` r +git_stats |> + get_files( + pattern = "\\.md", + depth = 2L + ) +#> # A tibble: 51 × 8 +#> repo_name repo_id organization file_path file_content file_size repo_url +#> +#> 1 GitStats Test… gid://… mbtests README.md "# GitStats… 122 https:/… +#> 2 shinyGizmo R_kgDO… r-world-devs NEWS.md "# shinyGiz… 2186 https:/… +#> 3 shinyGizmo R_kgDO… r-world-devs README.md "\n# shinyG… 2337 https:/… +#> 4 shinyGizmo R_kgDO… r-world-devs cran-com… "## Test en… 1700 https:/… +#> 5 cohortBuilder R_kgDO… r-world-devs NEWS.md "# cohortBu… 917 https:/… +#> 6 cohortBuilder R_kgDO… r-world-devs README.md "\n# cohort… 15828 https:/… +#> 7 shinyCohortBu… R_kgDO… r-world-devs NEWS.md "# shinyCoh… 2018 https:/… +#> 8 shinyCohortBu… R_kgDO… r-world-devs README.md "\n# shinyC… 3355 https:/… +#> 9 cohortBuilder… R_kgDO… r-world-devs README.md "\n# cohort… 3472 https:/… +#> 10 GitStats R_kgDO… r-world-devs LICENSE.… "# MIT Lice… 1075 https:/… +#> # ℹ 41 more rows +#> # ℹ 1 more variable: api_url +``` + +Get package usage: + +``` r +git_stats |> + get_R_package_usage( + packages = c("shiny", "purrr"), + split_output = TRUE + ) +#> $shiny +#> # A tibble: 5 × 11 +#> package package_usage repo_id repo_fullname repo_name default_branch +#> +#> 1 shiny import 495144469 r-world-devs/cohor… cohortBu… dev +#> 2 shiny import, library 495151911 r-world-devs/shiny… shinyCoh… dev +#> 3 shiny import, library 604718884 r-world-devs/shiny… shinyTim… master +#> 4 shiny import, library 884789327 r-world-devs/GitAI GitAI main +#> 5 shiny import, library 627452680 r-world-devs/hypot… hypothes… master +#> # ℹ 5 more variables: created_at , organization , repo_url , +#> # api_url , platform +#> +#> $purrr +#> # A tibble: 6 × 11 +#> package package_usage repo_id repo_fullname repo_name default_branch +#> +#> 1 purrr import 495144469 r-world-devs/cohortB… cohortBu… dev +#> 2 purrr import 495151911 r-world-devs/shinyCo… shinyCoh… dev +#> 3 purrr import 586903986 r-world-devs/GitStats GitStats master +#> 4 purrr import 884789327 r-world-devs/GitAI GitAI main +#> 5 purrr import 627452680 r-world-devs/hypothe… hypothes… master +#> 6 purrr import 402384343 openpharma/DataFakeR DataFakeR master +#> # ℹ 5 more variables: created_at , organization , repo_url , +#> # api_url , platform +#> +#> attr(,"class") +#> [1] "R_package_usage" "list" +#> attr(,"packages") +#> [1] "shiny" "purrr" +#> attr(,"only_loading") +#> [1] FALSE +``` + +Print `GitStats` to see what it stores: + +``` r +git_stats +#> A GitStats object for 2 hosts: +#> Hosts: https://gitlab.com/api/v4, https://api.github.com +#> Scanning scope: +#> Organizations: [1] r-world-devs +#> Repositories: [2] mbtests/gitstatstesting, openpharma/DataFakeR +#> Storage: +#> Repositories: 6 +#> Commits: 2178 [date range: 2022-01-01 - 2025-01-10] +#> Files: 51 [file pattern: \.md] +#> R_package_usage: 2 [packages: shiny, purrr] +``` ## Acknowledgement diff --git a/_pkgdown.yml b/_pkgdown.yml index 987071a6..1848e5b5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -9,9 +9,23 @@ reference: - starts_with("set_") - matches("verbose") - matches("show_orgs") -- title: Get data + - matches("get_storage") +- title: Get git data + desc: > + Functions pulling git data in a tibble format. + contents: + - get_repos + - get_repos_urls + - get_commits + - get_release_logs + - get_files + - get_users +- title: Get statistics + desc: > + Functions summarizing git data. - contents: - - starts_with("get_") + - get_commits_stats + - get_R_package_usage articles: - title: Articles navbar: ~ @@ -19,5 +33,3 @@ articles: - set_hosts - get_and_store_data - get_repos_with_code - - get_files - diff --git a/inst/set_hosts.R b/inst/set_hosts.R index 80819bc0..f25078f6 100644 --- a/inst/set_hosts.R +++ b/inst/set_hosts.R @@ -5,13 +5,13 @@ git_stats <- create_gitstats() |> orgs = c("r-world-devs", "openparma"), repos = c("openpharma/DataFakR", "r-world-devs/GitAI"), token = Sys.getenv("GITHUB_PAT"), - .show_error = FALSE + .error = FALSE ) |> set_gitlab_host( orgs = c("mbtests", "makbest"), repos = c("makbest/something", "mbtests/gitstatstesting", "krystianigras/gitlab-test"), token = Sys.getenv("GITLAB_PAT_PUBLIC"), - .show_error = FALSE + .error = FALSE ) git_stats diff --git a/inst/set_many_repos.R b/inst/set_many_repos.R new file mode 100644 index 00000000..5330da93 --- /dev/null +++ b/inst/set_many_repos.R @@ -0,0 +1,51 @@ +pharma_stats <- create_gitstats() |> + set_github_host( + orgs = "pharmaverse" + ) + +pharma_repos <- pharma_stats |> + get_repos( + add_contributors = FALSE + ) + +pharma_stats <- create_gitstats() |> + set_github_host( + repos = pharma_repos$fullname + ) + +pharma_stats + +pharma_stats |> + get_release_logs( + since = "2020-01-01", + cache = FALSE + ) + +pharma_stats |> + get_commits( + since = "2020-01-01", + cache = FALSE + ) + +pharma_stats |> + get_repos( + cache = FALSE + ) + +# very slow, better to run it when whole orgs are set +pharma_stats |> + get_repos( + with_code = "shiny", + cache = FALSE + ) + +pharma_stats |> + get_repos_urls( + cache = FALSE + ) + +pharma_stats |> + get_files( + pattern = "\\.md", + depth = 1L + ) diff --git a/man/set_github_host.Rd b/man/set_github_host.Rd index 6d10db94..79e7d49b 100644 --- a/man/set_github_host.Rd +++ b/man/set_github_host.Rd @@ -11,7 +11,7 @@ set_github_host( orgs = NULL, repos = NULL, verbose = is_verbose(gitstats), - .show_error = TRUE + .error = TRUE ) } \arguments{ @@ -32,7 +32,7 @@ pass it, \code{orgs} parameter should stay \code{NULL}.} \item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing output is switched off.} -\item{.show_error}{A logical to control if passing wrong input +\item{.error}{A logical to control if passing wrong input (\code{repositories} and \code{organizations}) should end with an error or not.} } \value{ diff --git a/man/set_gitlab_host.Rd b/man/set_gitlab_host.Rd index c9df9756..17f5d304 100644 --- a/man/set_gitlab_host.Rd +++ b/man/set_gitlab_host.Rd @@ -11,7 +11,7 @@ set_gitlab_host( orgs = NULL, repos = NULL, verbose = is_verbose(gitstats), - .show_error = TRUE + .error = TRUE ) } \arguments{ @@ -32,7 +32,7 @@ pass it, \code{orgs} parameter should stay \code{NULL}.} \item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing output is switched off.} -\item{.show_error}{A logical to control if passing wrong input +\item{.error}{A logical to control if passing wrong input (\code{repositories} and \code{organizations}) should end with an error or not.} } \value{ diff --git a/tests/testthat/_snaps/01-get_repos-GitHub.md b/tests/testthat/_snaps/01-get_repos-GitHub.md index 043a672d..fc543d7c 100644 --- a/tests/testthat/_snaps/01-get_repos-GitHub.md +++ b/tests/testthat/_snaps/01-get_repos-GitHub.md @@ -58,7 +58,7 @@ gh_repos_individual <- github_testhost_priv$get_repos_from_repos(verbose = TRUE, progress = FALSE) Message - i [Host:GitHub][Engine:GraphQl][Scope:test_org] Pulling repositories... + i [Host:GitHub][Engine:GraphQl][Scope:test_org/TestRepo] Pulling repositories... # `get_all_repos()` is set to scan whole git host diff --git a/tests/testthat/_snaps/get_commits_stats.md b/tests/testthat/_snaps/get_commits_stats.md index a457c06b..08184c3a 100644 --- a/tests/testthat/_snaps/get_commits_stats.md +++ b/tests/testthat/_snaps/get_commits_stats.md @@ -1,5 +1,5 @@ # get_commits_stats returns error when commits is not commits_data object - x `commits` must be a `commits_data` object. + x `commits` must be a `gitstats_commits` object. i Pull first your commits with `get_commits()` function. diff --git a/tests/testthat/_snaps/get_urls_repos-GitHub.md b/tests/testthat/_snaps/get_urls_repos-GitHub.md index 34365484..7fd4dff0 100644 --- a/tests/testthat/_snaps/get_urls_repos-GitHub.md +++ b/tests/testthat/_snaps/get_urls_repos-GitHub.md @@ -12,7 +12,7 @@ gh_repos_urls <- github_testhost_priv$get_repos_urls_from_repos(type = "web", verbose = TRUE, progress = FALSE) Message - i [Host:GitHub][Engine:REST][Scope:test_org] Pulling repositories (URLs)... + i [Host:GitHub][Engine:REST][Scope:test_org/TestRepo] Pulling repositories (URLs)... # get_all_repos_urls prepares web repo_urls vector diff --git a/tests/testthat/_snaps/set_host.md b/tests/testthat/_snaps/set_host.md index 60ec7dc7..69ddcd36 100644 --- a/tests/testthat/_snaps/set_host.md +++ b/tests/testthat/_snaps/set_host.md @@ -129,7 +129,7 @@ Code test_gitstats <- create_gitstats() %>% set_github_host(orgs = c("openpharma", "r_world_devs"), repos = c("r-world-devs/GitStats", "r-world-devs/GitMetrics"), - verbose = TRUE, .show_error = FALSE) + verbose = TRUE, .error = FALSE) Message i Using PAT from GITHUB_PAT envar. i Checking owners... diff --git a/tests/testthat/test-02-get_commits-GitStats.R b/tests/testthat/test-02-get_commits-GitStats.R index 63474844..1a183e87 100644 --- a/tests/testthat/test-02-get_commits-GitStats.R +++ b/tests/testthat/test-02-get_commits-GitStats.R @@ -56,7 +56,7 @@ test_that("get_commits() works", { ) expect_s3_class( commits_data, - "commits_data" + "gitstats_commits" ) test_mocker$cache(commits_data) }) diff --git a/tests/testthat/test-get_commits_stats.R b/tests/testthat/test-get_commits_stats.R index 46cf7f45..b5ef8cfa 100644 --- a/tests/testthat/test-get_commits_stats.R +++ b/tests/testthat/test-get_commits_stats.R @@ -4,7 +4,7 @@ test_that("get_commits_stats method works", { time_aggregation = "month", group_var = organization ) - expect_s3_class(commits_stats, "commits_stats") + expect_s3_class(commits_stats, "gitstats_commits_stats") expect_equal( colnames(commits_stats), c("stats_date", "githost", "organization", "stats") @@ -18,7 +18,7 @@ test_that("get_commits_stats method works", { ) expect_equal(commits_stats_yearly$stats_date, as.POSIXct(c(rep("2023-01-01", 2), "2024-01-01"), tz = 'UTC')) - expect_s3_class(commits_stats_yearly, "commits_stats") + expect_s3_class(commits_stats_yearly, "gitstats_commits_stats") expect_equal( colnames(commits_stats_yearly), c("stats_date", "githost", "stats") diff --git a/tests/testthat/test-get_storage.R b/tests/testthat/test-get_storage.R index d434fb80..d419ef39 100644 --- a/tests/testthat/test-get_storage.R +++ b/tests/testthat/test-get_storage.R @@ -27,7 +27,7 @@ test_that("get_storage retrieves one table", { ) expect_s3_class( gitstats_storage, - "commits_data" + "gitstats_commits" ) expect_commits_table( gitstats_storage @@ -45,6 +45,6 @@ test_that("get_storage retrieves one table", { ) expect_s3_class( gitstats_storage, - "files_data" + "gitstats_files" ) }) diff --git a/tests/testthat/test-get_usage_R_package.R b/tests/testthat/test-get_usage_R_package.R index 050cd2de..3c341680 100644 --- a/tests/testthat/test-get_usage_R_package.R +++ b/tests/testthat/test-get_usage_R_package.R @@ -117,6 +117,6 @@ test_that("get_R_package_usage works", { expect_package_usage_table(R_package_usage_table) expect_s3_class( R_package_usage_table, - "R_package_usage" + "gitstats_package_usage" ) }) diff --git a/tests/testthat/test-set_host.R b/tests/testthat/test-set_host.R index 17b91c99..76122725 100644 --- a/tests/testthat/test-set_host.R +++ b/tests/testthat/test-set_host.R @@ -165,7 +165,7 @@ test_that("When wrong orgs and repos are passed they are excluded but host is cr orgs = c("openpharma", "r_world_devs"), repos = c("r-world-devs/GitStats", "r-world-devs/GitMetrics"), verbose = TRUE, - .show_error = FALSE + .error = FALSE ) ) }) diff --git a/vignettes/get_files.Rmd b/vignettes/get_files.Rmd deleted file mode 100644 index 1aed18bc..00000000 --- a/vignettes/get_files.Rmd +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: "Get files content" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Get files content} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.width = 7, - fig.height = 4 -) -``` - -Set connections to hosts. - -> Example workflow makes use of public GitHub and GitLab, but it is plausible, that you will use your internal git platforms, where you need to define `host` parameter. See `vignette("set_hosts")` article on that. - -```{r} -library(GitStats) - -git_stats <- create_gitstats() %>% - set_github_host( - orgs = c("r-world-devs", "openpharma"), - token = Sys.getenv("GITHUB_PAT") - ) %>% - set_gitlab_host( - orgs = c("mbtests"), - token = Sys.getenv("GITLAB_PAT_PUBLIC") - ) -``` - -With `GitStats` you can get the content of all text files in repo that are of your interest. You can pull specific types of files, by setting `pattern` with regular expression and `depth` with integer, which defines level of directories to look for the files. - -```{r} -files_structure <- get_files( - gitstats = git_stats, - pattern = "\\.md", - depth = 1L, - progress = FALSE -) -dplyr::glimpse(files_structure) -```