Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

341 fix and speed up pulling package usage #344

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GitStats
Title: Get Statistics from GitHub and GitLab
Version: 1.0.0.9006
Version: 1.0.0.9007
Authors@R: c(
person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")),
person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"),
Expand Down
4 changes: 2 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# GitStats 1.0.0.9006
# GitStats 1.0.0.9007

## New functions:

- `pull_R_package_usage()` with `get_R_package_usage()` functions to pull repositories where package name is found in DESCRIPTION or NAMESPACE files or code blobs with phrases related to using an R package (`library(package)`, `package::`) ([#326](https://github.com/r-world-devs/GitStats/issues/326)),
- `pull_R_package_usage()` with `get_R_package_usage()` functions to pull repositories where package name is found in DESCRIPTION or NAMESPACE files or code blobs with phrases related to using an R package (`library(package)`, `require(package)`) ([#326](https://github.com/r-world-devs/GitStats/issues/326), [#341](https://github.com/r-world-devs/GitStats/issues/341)),
- `pull_files()` with `get_files()` to pull content of text files ([#200](https://github.com/r-world-devs/GitStats/issues/200)).

## New features:
Expand Down
6 changes: 4 additions & 2 deletions R/EngineGraphQLGitHub.R
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,10 @@ EngineGraphQLGitHub <- R6::R6Class("EngineGraphQLGitHub",
repositories <- purrr::map(repos_list, ~ .$repo_name)
def_branches <- purrr::map(repos_list, ~ .$default_branch$name)
} else {
repositories <- pulled_repos$repo_name
def_branches <- pulled_repos$default_branch
repos_table <- pulled_repos %>%
dplyr::filter(organization == org)
repositories <- repos_table$repo_name
def_branches <- repos_table$default_branch
}
files_list <- purrr::map(file_path, function(file_path) {
files_list <- purrr::map2(repositories, def_branches, function(repository, def_branch) {
Expand Down
34 changes: 20 additions & 14 deletions R/EngineGraphQLGitLab.R
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,14 @@ EngineGraphQLGitLab <- R6::R6Class("EngineGraphQLGitLab",
# argument to iterate over it when pulling files.
# @return A response in a list form.
pull_file_from_org = function(org, file_path, pulled_repos = NULL) {
org <- gsub("%2f", "/", org)
if (!is.null(pulled_repos)) {
full_files_list <- private$pull_file_from_repos(
file_path = file_path,
repos_table = pulled_repos
)
repos_table <- pulled_repos %>%
dplyr::filter(organization == org)
full_files_list <- private$pull_file_from_repos(
file_path = file_path,
repos_table = repos_table
)
} else {
full_files_list <- list()
next_page <- TRUE
Expand Down Expand Up @@ -305,16 +308,19 @@ EngineGraphQLGitLab <- R6::R6Class("EngineGraphQLGitLab",
prepare_files_table = function(files_response, org, file_path) {
if (!is.null(files_response)) {
files_table <- purrr::map(files_response, function(project) {
data.frame(
"repo_name" = project$name,
"repo_id" = project$id,
"organization" = org,
"file_path" = project$repository$blobs$nodes[[1]]$name,
"file_content" = project$repository$blobs$nodes[[1]]$rawBlob,
"file_size" = as.integer(project$repository$blobs$nodes[[1]]$size),
"repo_url" = project$webUrl,
"api_url" = self$gql_api_url
)
purrr::map(project$repository$blobs$nodes, function(file) {
data.frame(
"repo_name" = project$name,
"repo_id" = project$id,
"organization" = org,
"file_path" = file$name,
"file_content" = file$rawBlob,
"file_size" = as.integer(file$size),
"repo_url" = project$webUrl,
"api_url" = self$gql_api_url
)
}) %>%
purrr::list_rbind()
}) %>%
purrr::list_rbind()
} else {
Expand Down
7 changes: 7 additions & 0 deletions R/EngineRestGitHub.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ EngineRestGitHub <- R6::R6Class("EngineRestGitHub",
repos_table <- private$search_repos_by_phrase(
org = org,
phrase = settings$phrase,
files = settings$files,
language = settings$language
) %>%
private$tailor_repos_info() %>%
Expand Down Expand Up @@ -194,6 +195,7 @@ EngineRestGitHub <- R6::R6Class("EngineRestGitHub",
# @return A list of repositories.
search_repos_by_phrase = function(phrase,
org,
files,
language,
byte_max = "384000") {
user_query <- if (!private$scan_all) {
Expand All @@ -213,6 +215,11 @@ EngineRestGitHub <- R6::R6Class("EngineRestGitHub",
total_n = total_n,
byte_max = byte_max
)
if (!is.null(files)) {
repos_list <- purrr::keep(repos_list, function(repository) {
any(repository$path %in% files)
})
}
repos_list <- private$find_repos_by_id(repos_list)
} else {
repos_list <- list()
Expand Down
26 changes: 16 additions & 10 deletions R/EngineRestGitLab.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ EngineRestGitLab <- R6::R6Class("EngineRestGitLab",
repos_table <- private$search_repos_by_phrase(
org = org,
phrase = settings$phrase,
files = settings$files,
language = settings$language
) %>%
private$tailor_repos_info() %>%
Expand Down Expand Up @@ -204,36 +205,41 @@ EngineRestGitLab <- R6::R6Class("EngineRestGitLab",
# @return A list of repositories.
search_repos_by_phrase = function(phrase,
org,
files,
language = "All",
page_max = 1e6) {
page <- 1
still_more_hits <- TRUE
resp_list <- list()
full_repos_list <- list()
groups_url <- if (!private$scan_all) {
paste0('/groups/', private$get_group_id(org))
paste0("/groups/", private$get_group_id(org))
} else {
''
""
}
while (still_more_hits | page < page_max) {
resp <- self$response(
repos_list <- self$response(
paste0(
self$rest_api_url, groups_url,
'/search?scope=blobs&search="', phrase, '"&per_page=100&page=', page
"/search?scope=blobs&search=%22", phrase, "%22&per_page=100&page=", page
)
)

if (length(resp) == 0) {
if (length(repos_list) == 0) {
still_more_hits <- FALSE
break()
} else {
resp_list <- append(resp_list, resp)
if (!is.null(files)) {
repos_list <- purrr::keep(repos_list, function(repository) {
any(repository$path %in% files)
})
}
full_repos_list <- append(full_repos_list, repos_list)
page <- page + 1
}
}
repos_list <- resp_list %>%
full_repos_list <- full_repos_list %>%
private$find_repos_by_id() %>%
private$pull_repos_languages()
return(repos_list)
return(full_repos_list)
},

# @description Perform get request to find projects by ids.
Expand Down
12 changes: 11 additions & 1 deletion R/GitHost.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,17 @@ GitHost <- R6::R6Class("GitHost",
#' @param pulled_repos Optional parameter to pass repository output object.
#' @return A table.
pull_files = function(file_path, pulled_repos = NULL) {
files_table <- purrr::map(private$orgs, function(org) {
if (!is.null(pulled_repos)) {
orgs <- pulled_repos %>%
dplyr::filter(grepl(private$api_url, api_url)) %>%
dplyr::select(organization) %>%
unique() %>%
unlist() %>%
unname()
} else {
orgs <- private$orgs
}
files_table <- purrr::map(orgs, function(org) {
repos_table <- purrr::map(private$engines, function(engine) {
if (inherits(engine, "EngineGraphQL")) {
files_table <- engine$pull_files(
Expand Down
45 changes: 31 additions & 14 deletions R/GitStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ GitStats <- R6::R6Class("GitStats",
#' @param search_param One of four: `team`, `org`, `repo` or `phrase`.
#' @param team_name A name of a team.
#' @param phrase A phrase to look for.
#' @param files Define files to scan.
#' @param language A language of programming code.
#' @param print_out A boolean stating if you want to print output after
#' pulling.
#' @return Nothing.
set_params = function(search_param,
team_name = NULL,
phrase = NULL,
files = NULL,
language = "All",
print_out = TRUE) {
search_param <- match.arg(
Expand Down Expand Up @@ -61,6 +63,12 @@ GitStats <- R6::R6Class("GitStats",
)
}
}
if (!is.null(files)) {
private$settings$files <- files
cli::cli_alert_info("Set files {files} to scan.")
} else {
private$settings$files <- NULL
}
private$settings$search_param <- search_param
if (language != "All") {
private$settings$language <- private$language_handler(language)
Expand Down Expand Up @@ -140,12 +148,12 @@ GitStats <- R6::R6Class("GitStats",
#' it may be used mainly by loading in data scripts and not used as a
#' dependency of other packages.
pull_R_package_usage = function(package_name, only_loading = FALSE) {
repos_using_package <- private$check_R_package_loading(package_name)
repos_with_package_as_dependency <- if (!only_loading) {
private$check_R_package_as_dependency(package_name)
if (!only_loading) {
repos_with_package_as_dependency <- private$check_R_package_as_dependency(package_name)
} else {
NULL
repos_with_package_as_dependency <- NULL
}
repos_using_package <- private$check_R_package_loading(package_name)
package_usage_table <- purrr::list_rbind(
list(
repos_with_package_as_dependency,
Expand All @@ -158,6 +166,7 @@ GitStats <- R6::R6Class("GitStats",
dplyr::mutate(
package_usage = ifelse(api_url %in% duplicated_repos, "import, library", package_usage)
)
rownames(package_usage_table) <- c(1:nrow(package_usage_table))
private$R_package_usage <- package_usage_table
return(invisible(self))
},
Expand Down Expand Up @@ -341,6 +350,7 @@ GitStats <- R6::R6Class("GitStats",
private$print_item("Search parameter", private$settings$search_param)
private$print_item("Team", private$settings$team_name, paste0(private$settings$team_name, " (", length(private$settings$team), " members)"))
private$print_item("Phrase", private$settings$phrase)
private$print_item("Files", private$settings$files)
private$print_item("Language", private$settings$language)
private$print_item("Repositories output", private$repos, paste0("Rows number: ", nrow(private$repos)))
private$print_item("Commits output", private$commits, paste0("Since: ", min(private$commits$committed_date), "; Until: ", max(private$commits$committed_date), "; Rows number: ", nrow(private$commits)))
Expand All @@ -355,6 +365,7 @@ GitStats <- R6::R6Class("GitStats",
settings = list(
search_param = NULL,
phrase = NULL,
files = NULL,
team_name = NULL,
team = list(),
language = "All",
Expand Down Expand Up @@ -382,17 +393,18 @@ GitStats <- R6::R6Class("GitStats",
cli::cli_alert_info("Checking where [{package_name}] is loaded from library...")
package_usage_phrases <- c(
paste0("library(", package_name, ")"),
paste0(package_name, "::")
paste0("require(", package_name, ")")
)
repos_using_package <- purrr::map(package_usage_phrases, ~ {
suppressMessages(
suppressMessages({
self$set_params(
search_param = "phrase",
phrase = .,
files = NULL,
print_out = FALSE
)
)
self$pull_repos()
self$pull_repos()
})
repos_using_package <- self$get_repos()
if (!is.null(repos_using_package)) {
repos_using_package$package_usage <- "library"
Expand All @@ -410,11 +422,16 @@ GitStats <- R6::R6Class("GitStats",
# @param package_name Name of a package.
check_R_package_as_dependency = function(package_name) {
cli::cli_alert_info("Checking where [{package_name}] is used as a dependency...")
self$pull_files(
file_path = c("DESCRIPTION", "NAMESPACE")
)
desc_table <- self$get_files()
repos_with_package <- desc_table[grepl(package_name, desc_table$file_content), ]
suppressMessages({
self$set_params(
search_param = "phrase",
phrase = package_name,
files = c("DESCRIPTION", "NAMESPACE"),
print_out = FALSE
)
self$pull_repos()
})
repos_with_package <- self$get_repos()
if (nrow(repos_with_package) > 0) {
repos_with_package <- repos_with_package[!duplicated(repos_with_package$api_url),]
repos_with_package$package_usage <- "import"
Expand Down Expand Up @@ -476,7 +493,7 @@ GitStats <- R6::R6Class("GitStats",
print_item = function(item_name,
item_to_check,
item_to_print = item_to_check) {
if (item_name %in% c("Organisations", "Repositories")) {
if (item_name %in% c("Organisations", "Repositories", "Files")) {
item_to_print <- unlist(item_to_print)
item_to_print <- purrr::map_vec(item_to_print, function(element) {
gsub("%2f", "/", element)
Expand Down
3 changes: 3 additions & 0 deletions R/gitstats_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ set_host <- function(gitstats_obj,
#' @param search_param One of three: team, orgs or phrase.
#' @param team_name Name of a team.
#' @param phrase A phrase to look for.
#' @param files Define files to scan.
#' @param language Code programming language.
#' @param print_out A boolean to decide whether to print output.
#' @return A `GitStats` object.
Expand All @@ -77,12 +78,14 @@ set_params <- function(gitstats_obj,
search_param = NULL,
team_name = NULL,
phrase = NULL,
files = NULL,
language = "All",
print_out = TRUE) {
gitstats_obj$set_params(
search_param = search_param,
team_name = team_name,
phrase = phrase,
files = files,
language = language,
print_out = print_out
)
Expand Down
3 changes: 3 additions & 0 deletions man/set_params.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions tests/testthat/_snaps/03-EngineGraphQLGitLab.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,11 @@
Message
i [Engine:GraphQL][org:mbtests] Pulling README.md files...

# `pull_files()` pulls two files in the table format

Code
gl_files_table <- test_gql_gl$pull_files(org = "mbtests", file_path = c(
"meta_data.yaml", "README.md"))
Message
i [Engine:GraphQL][org:mbtests] Pulling meta_data.yaml and README.md files...

Loading