Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to release #581

2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GitStats
Title: Standardized Git Repository Data
Version: 2.2.0
Version: 2.1.0.9010
Authors@R: c(
person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")),
person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"),
Expand Down
2 changes: 1 addition & 1 deletion R/EngineGraphQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ EngineGraphQL <- R6::R6Class(
},

filter_files_by_pattern = function(files_structure, pattern) {
files_structure[grepl(pattern, files_structure)]
files_structure[grepl(paste0(pattern, collapse = "|"), files_structure)]
},

get_path_from_files_structure = function(host_files_structure,
Expand Down
8 changes: 6 additions & 2 deletions R/GitHost.R
Original file line number Diff line number Diff line change
Expand Up @@ -1226,7 +1226,9 @@
files_structure_list <- purrr::map(private$orgs, function(org) {
if (verbose) {
user_info <- if (!is.null(pattern)) {
glue::glue("Pulling files structure...[files matching pattern: '{pattern}']")
glue::glue(
"Pulling files structure...[files matching pattern: '{paste0(pattern, collapse = '|')}']"

Check warning on line 1230 in R/GitHost.R

View check run for this annotation

Codecov / codecov/patch

R/GitHost.R#L1229-L1230

Added lines #L1229 - L1230 were not covered by tests
)
} else {
glue::glue("Pulling files structure...")
}
Expand Down Expand Up @@ -1273,7 +1275,9 @@
files_structure_list <- purrr::map(orgs, function(org) {
if (verbose) {
user_info <- if (!is.null(pattern)) {
glue::glue("Pulling files structure...[files matching pattern: '{pattern}']")
glue::glue(
"Pulling files structure...[files matching pattern: '{paste0(pattern, collapse = '|')}']"

Check warning on line 1279 in R/GitHost.R

View check run for this annotation

Codecov / codecov/patch

R/GitHost.R#L1278-L1279

Added lines #L1278 - L1279 were not covered by tests
)
} else {
glue::glue("Pulling files structure...")
}
Expand Down
4 changes: 2 additions & 2 deletions R/GitStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ GitStats <- R6::R6Class(
progress = verbose) {
private$check_for_host()
args_list <- list(
"file_pattern" = paste0(file_path, pattern),
"file_pattern" = paste(file_path, pattern),
"depth" = depth
)
trigger <- private$trigger_pulling(
Expand Down Expand Up @@ -1007,7 +1007,7 @@ GitStats <- R6::R6Class(
}
attr_data <- attr_data %>% paste0(collapse = separator)
}
return(cli::col_grey(glue::glue("[{attr_name}: {attr_data}]")))
return(cli::col_grey(glue::glue("[{attr_name}: {trimws(attr_data)}]")))
} else {
return("")
}
Expand Down
46 changes: 46 additions & 0 deletions R/get_commits_stats.R → R/get_commits.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,49 @@
#' @title Get data on commits
#' @name get_commits
#' @description List all commits from all repositories for an organization or a
#' vector of repositories.
#' @param gitstats A GitStats object.
#' @param since A starting date.
#' @param until An end date.
#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last
#' result from its storage.
#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing
#' output is switched off.
#' @param progress A logical, by default set to `verbose` value. If `FALSE` no
#' `cli` progress bar will be displayed.
#' @return A data.frame.
#' @examples
#' \dontrun{
#' my_gitstats <- create_gitstats() %>%
#' set_github_host(
#' token = Sys.getenv("GITHUB_PAT"),
#' repos = c("openpharma/DataFakeR", "openpharma/visR")
#' ) %>%
#' set_gitlab_host(
#' token = Sys.getenv("GITLAB_PAT_PUBLIC"),
#' orgs = "mbtests"
#' )
#' get_commits(my_gitstats, since = "2018-01-01")
#' }
#' @export
get_commits <- function(gitstats,
since = NULL,
until = Sys.Date() + lubridate::days(1),
cache = TRUE,
verbose = is_verbose(gitstats),
progress = verbose) {
if (is.null(since)) {
cli::cli_abort(cli::col_red("You need to pass date to `since` parameter."), call = NULL)
}
gitstats$get_commits(
since = since,
until = until,
cache = cache,
verbose = verbose,
progress = progress
)
}

#' @title Get commits statistics
#' @name get_commits_stats
#' @description Prepare statistics from the pulled commits data.
Expand Down
64 changes: 50 additions & 14 deletions R/get_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,74 @@
#' @name get_files
#' @description Pulls text files and their content.
#' @param gitstats A `GitStats` object.
#' @param pattern A regular expression. If defined, it pulls file structure for
#' a repository matching this pattern. Can be defined if `file_path` stays
#' @param pattern A regular expression. If defined, it pulls content of all
#' files in a repository matching this pattern reaching to the level of
#' directories defined by `depth` parameter. Works only if `file_path` stays
#' `NULL`.
#' @param depth An optional integer. Defines level of directories to retrieve
#' files from. E.g. if set to `0`, it will pull files only from root, if `1L`,
#' will take data from `root` directory and directories visible in `root`
#' directory. If left with no argument, will pull files from all directories.
#' @param file_path Optional. A standardized path to file(s) in repositories.
#' May be a character vector if multiple files are to be pulled. Can be
#' defined if `pattern` stays `NULL`.
#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last
#' @param depth Defines level of directories to retrieve files from. E.g. if set
#' to `0`, it will pull files only from `root`, if `1L`, will take data from
#' `root` directory and directories visible in `root` directory. If left with
#' no argument, will pull files from all directories.
#' @param file_path A specific path to file(s) in repositories. May be a
#' character vector if multiple files are to be pulled. If defined, the
#' function pulls content of this specific `file_path`. Can be defined only if
#' `pattern` stays `NULL`.
#' @param cache A logical, if set to `TRUE` `GitStats` will retrieve the last
#' result from its storage.
#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing
#' output is switched off.
#' @param progress A logical, by default set to `verbose` value. If `FALSE` no
#' `cli` progress bar will be displayed.
#' @details `get_files()` may be used in two ways: either with `pattern` (with
#' optional `depth`) or `file_path` argument defined.
#'
#' In the first scenario, `GitStats` will pull first a files structure
#' responding to the passed `pattern` and `depth` arguments and afterwards
#' files content for all of these files. In the second scenario, `GitStats`
#' will pull only the content of files for the specific `file_path` of the
#' repository.
#'
#' If user wants to pull a particular file or files, a `file_path` approach
#' seems more reasonable, as it is a faster way since it omits pulling the
#' whole file structure from the repo.
#'
#' For example, if user wants to pull content of `README.md` and/or `NEWS.md`
#' files placed in the `root` directories of the repositories, he should take
#' the `file_path` approach as he already knows precisely paths of the files.
#'
#' On the other hand, if user wants to pull specific type of files (e.g. all
#' `.md` or `.Rmd` files in the repository), without knowing their path, it is
#' recommended to use a `pattern` approach, which will trigger `GitStats` to
#' find all the files in the repository on the given level of directories
#' (`pattern` argument) and afterwards pull their content.
#'
#' The latter approach is slower than the former but may be more useful
#' depending on users' goals. Both approaches return data in the same format:
#' `tibble` with data on `files`, namely their `path` and their `content`.
#'
#' @examples
#' \dontrun{
#' rmd_files <- create_gitstats() |>
#' git_stats <- create_gitstats() |>
#' set_github_host(
#' token = Sys.getenv("GITHUB_PAT"),
#' orgs = c("r-world-devs")
#' ) %>%
#' ) |>
#' set_gitlab_host(
#' token = Sys.getenv("GITLAB_PAT_PUBLIC"),
#' orgs = "mbtests"
#' ) |>
#' get_files(
#' )
#'
#' rmd_files <- get_files(
#' gitstats = git_stats,
#' pattern = "\\.Rmd",
#' depth = 2L
#' )
#'
#' app_files <- get_files(
#' gitstats = git_stats,
#' file_path = c("R/app.R", "R/ui.R", "R/server.R")
#' )
#'
#' }
#' @return A data.frame.
#' @export
Expand Down
121 changes: 121 additions & 0 deletions R/get_repos.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#' @title Get data on repositories
#' @name get_repos
#' @description Pulls data on all repositories for an organization, individual
#' user or those with a given text in code blobs (`with_code` parameter) or a
#' file (`with_files` parameter) and parse it into table format.
#' @param gitstats A GitStats object.
#' @param add_contributors A logical parameter to decide whether to add
#' information about repositories' contributors to the repositories output
#' (table). If set to `FALSE` it makes function run faster as, in the case of
#' `org` search mode, it reaches only `GraphQL` endpoint with a query on
#' repositories, and in the case of `code` search mode it reaches only
#' `repositories REST API` endpoint. However, the pitfall is that the result
#' does not convey information on contributors. \cr\cr When set to `TRUE` (by
#' default), `GitStats` iterates additionally over pulled repositories and
#' reaches to the `contributors APIs`, which makes it slower, but gives
#' additional information.
#' @param with_code A character vector, if defined, GitStats will pull
#' repositories with specified code phrases in code blobs.
#' @param in_files A character vector of file names. Works when `with_code` is
#' set - then it searches code blobs only in files passed to `in_files`
#' parameter.
#' @param with_files A character vector, if defined, GitStats will pull
#' repositories with specified files.
#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last
#' result from its storage.
#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing
#' output is switched off.
#' @param progress A logical, by default set to `verbose` value. If `FALSE` no
#' `cli` progress bar will be displayed.
#' @return A data.frame.
#' @examples
#' \dontrun{
#' my_gitstats <- create_gitstats() %>%
#' set_github_host(
#' token = Sys.getenv("GITHUB_PAT"),
#' orgs = c("r-world-devs", "openpharma")
#' ) %>%
#' set_gitlab_host(
#' token = Sys.getenv("GITLAB_PAT_PUBLIC"),
#' orgs = "mbtests"
#' )
#' get_repos(my_gitstats)
#' get_repos(my_gitstats, add_contributors = FALSE)
#' get_repos(my_gitstats, with_code = "Shiny", in_files = "renv.lock")
#' get_repos(my_gitstats, with_files = "DESCRIPTION")
#' }
#' @export
get_repos <- function(gitstats,
add_contributors = TRUE,
with_code = NULL,
in_files = NULL,
with_files = NULL,
cache = TRUE,
verbose = is_verbose(gitstats),
progress = verbose) {
gitstats$get_repos(
add_contributors = add_contributors,
with_code = with_code,
in_files = in_files,
with_files = with_files,
cache = cache,
verbose = verbose,
progress = progress
)
}

#' @title Get repository URLS
#' @name get_repos_urls
#' @description Pulls a vector of repositories URLs (web or API): either all for
#' an organization or those with a given text in code blobs (`with_code`
#' parameter) or a file (`with_files` parameter).
#' @param gitstats A GitStats object.
#' @param type A character, choose if `api` or `web` (`html`) URLs should be
#' returned.
#' @param with_code A character vector, if defined, GitStats will pull
#' repositories with specified code phrases in code blobs.
#' @param in_files A character vector of file names. Works when `with_code` is
#' set - then it searches code blobs only in files passed to `in_files`
#' parameter.
#' @param with_files A character vector, if defined, GitStats will pull
#' repositories with specified files.
#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last
#' result from its storage.
#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing
#' output is switched off.
#' @param progress A logical, by default set to `verbose` value. If `FALSE` no
#' `cli` progress bar will be displayed.
#' @return A character vector.
#' @examples
#' \dontrun{
#' my_gitstats <- create_gitstats() %>%
#' set_github_host(
#' token = Sys.getenv("GITHUB_PAT"),
#' orgs = c("r-world-devs", "openpharma")
#' ) %>%
#' set_gitlab_host(
#' token = Sys.getenv("GITLAB_PAT_PUBLIC"),
#' orgs = "mbtests"
#' )
#' get_repos_urls(my_gitstats, type = "api")
#' get_repos_urls(my_gitstats, with_files = c("DESCRIPTION", "LICENSE"))
#' }
#' @export
get_repos_urls <- function(gitstats,
type = "web",
with_code = NULL,
in_files = NULL,
with_files = NULL,
cache = TRUE,
verbose = is_verbose(gitstats),
progress = verbose) {
gitstats$get_repos_urls(
type = type,
with_code = with_code,
in_files = in_files,
with_files = with_files,
cache = cache,
verbose = verbose,
progress = progress
)
}
Loading
Loading