From a603c95343b3e8af2bd58178da5c406e9a1c7c11 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 3 Dec 2024 08:37:16 +0000 Subject: [PATCH 01/99] Fix pulling commits for GitLab subroups when repos are set as scanning scope. --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/GitHostGitLab.R | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 86362475..433ef879 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2 +Version: 2.1.2.9000 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 4c468524..91aed15c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# GitStats (development version) + +- Fixed pulling commits for GitLab subgroups when repositories are set as scope to scan ([#551](https://github.com/r-world-devs/GitStats/issues/551)). + # GitStats 2.1.2 This is a patch release which introduces some hot fixes and new data in `get_commits()` output. diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index 875dc083..61e40006 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -268,7 +268,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", set_repositories = function(org, settings) { if (private$searching_scope == "repo") { repos <- private$orgs_repos[[org]] - repos_names <- paste0(org, "%2f", repos) + repos_names <- paste0(utils::URLencode(org, reserved = TRUE), "%2f", repos) } else { repos_table <- private$get_all_repos( verbose = FALSE From 0e62cb818f200affc596c440f2cab05472b81b49 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 4 Dec 2024 08:24:53 +0000 Subject: [PATCH 02/99] Fill empty data for GitLab authors names and logins as much as possible. --- R/EngineRestGitLab.R | 64 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/R/EngineRestGitLab.R b/R/EngineRestGitLab.R index 1324df50..42adb4fa 100644 --- a/R/EngineRestGitLab.R +++ b/R/EngineRestGitLab.R @@ -276,7 +276,7 @@ EngineRestGitLab <- R6::R6Class( .after = author ) - empty_dict <- all(is.na(authors_dict[, c("author_login", "author_name")] %>% + empty_dict <- all(is.na(authors_dict[, c("author_login", "author_name")] |> unlist())) if (!empty_dict) { commits_table <- dplyr::mutate( @@ -494,7 +494,7 @@ EngineRestGitLab <- R6::R6Class( }, get_authors_dict = function(commits_table, progress) { - purrr::map(unique(commits_table$author), function(author) { + authors_dict <- purrr::map(unique(commits_table$author), function(author) { author <- url_encode(author) search_endpoint <- paste0( self$rest_api_url, @@ -533,6 +533,66 @@ EngineRestGitLab <- R6::R6Class( return(user_tbl) }, .progress = progress) %>% purrr::list_rbind() + authors_dict <- private$clean_authors_dict(authors_dict) + return(authors_dict) + }, + + clean_authors_dict = function(authors_dict) { + authors_to_clean <- authors_dict$author[is.na(authors_dict$author_name)] + authors_dict <- private$clean_authors_with_comma( + authors_to_clean = authors_to_clean, + authors_dict = authors_dict + ) + authors_dict <- private$fill_empty_authors( + authors_to_clean = authors_to_clean, + authors_dict = authors_dict + ) + return(authors_dict) + }, + + clean_authors_with_comma = function(authors_dict, authors_to_clean) { + if (any(grepl(",", authors_to_clean))) { + authors_with_comma <- authors_to_clean[grepl(",", authors_to_clean)] + clean_authors <- purrr::map(authors_with_comma, function(author) { + split_author <- stringr::str_split_1(author, ",") + split_author <- purrr::map(split_author, function(x) { + stringr::str_replace(x, "\\{.*?\\}", "") |> + stringr::str_replace_all(" ", "") + }) + source_author <- unlist(split_author) + clean_author <- paste(source_author[2], source_author[1]) + dplyr::tibble( + author = author, + author_login = NA_character_, + author_name = clean_author + ) + }) |> + purrr::list_rbind() + authors_dict <- authors_dict |> + dplyr::filter(!author %in% authors_with_comma) + authors_dict <- rbind(authors_dict, clean_authors) + return(authors_dict) + } + }, + + fill_empty_authors = function(authors_dict, authors_to_clean) { + authors_to_clean <- authors_to_clean[!grepl(",", authors_to_clean)] + author_names <- purrr::keep(authors_to_clean, function(author) { + length(stringr::str_split_1(author, " ")) > 1 + }) + author_logins <- purrr::keep(authors_to_clean, function(author) { + length(stringr::str_split_1(author, " ")) == 1 + }) + authors_dict <- authors_dict |> + dplyr::mutate( + author_name = ifelse(author %in% author_names, + author, + author_name), + author_login = ifelse(author %in% author_logins, + author, + author_login) + ) + return(authors_dict) } ) ) From 2f7bef020ceba3c9b6254bdaff429a22427fcb78 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 4 Dec 2024 11:02:21 +0000 Subject: [PATCH 03/99] Fill GitHub's author names if empty. --- R/EngineGraphQLGitHub.R | 10 ++++++++++ R/utils.R | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index f75f827a..f32af3d3 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -167,6 +167,7 @@ EngineGraphQLGitHub <- R6::R6Class( .before = api_url ) } + commits_table <- private$fill_empty_authors(commits_table) return(commits_table) }, @@ -539,6 +540,15 @@ EngineGraphQLGitHub <- R6::R6Class( "files" = files ) return(result) + }, + + fill_empty_authors = function(commits_table) { + commits_table <- commits_table |> + dplyr::rowwise() |> + dplyr::mutate( + author_name = ifelse(is.na(author_name) & is_name(author), author, author_name), + author_login = ifelse(is.na(author_login) & is_login(author), author, author_login) + ) } ) ) diff --git a/R/utils.R b/R/utils.R index 3d8311d5..91919377 100644 --- a/R/utils.R +++ b/R/utils.R @@ -63,3 +63,13 @@ standardize_dates <- function(dates) { url_encode <- function(url) { URLencode(url, reserved = TRUE) } + +#' @noRd +is_name <- function(author) { + length(stringr::str_split_1(author, " ")) > 1 +} + +#' @noRd +is_login <- function(author) { + length(stringr::str_split_1(author, " ")) == 1 && identical(author, tolower(author)) +} From b820f2638615706ed2cbdbf9326a9c4b5f9640a4 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 4 Dec 2024 11:45:41 +0000 Subject: [PATCH 04/99] Fix. --- R/EngineRestGitLab.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/EngineRestGitLab.R b/R/EngineRestGitLab.R index 42adb4fa..2ce2455d 100644 --- a/R/EngineRestGitLab.R +++ b/R/EngineRestGitLab.R @@ -538,19 +538,17 @@ EngineRestGitLab <- R6::R6Class( }, clean_authors_dict = function(authors_dict) { - authors_to_clean <- authors_dict$author[is.na(authors_dict$author_name)] authors_dict <- private$clean_authors_with_comma( - authors_to_clean = authors_to_clean, authors_dict = authors_dict ) authors_dict <- private$fill_empty_authors( - authors_to_clean = authors_to_clean, authors_dict = authors_dict ) return(authors_dict) }, - clean_authors_with_comma = function(authors_dict, authors_to_clean) { + clean_authors_with_comma = function(authors_dict) { + authors_to_clean <- authors_dict$author[is.na(authors_dict$author_name)] if (any(grepl(",", authors_to_clean))) { authors_with_comma <- authors_to_clean[grepl(",", authors_to_clean)] clean_authors <- purrr::map(authors_with_comma, function(author) { @@ -571,11 +569,12 @@ EngineRestGitLab <- R6::R6Class( authors_dict <- authors_dict |> dplyr::filter(!author %in% authors_with_comma) authors_dict <- rbind(authors_dict, clean_authors) - return(authors_dict) } + return(authors_dict) }, - fill_empty_authors = function(authors_dict, authors_to_clean) { + fill_empty_authors = function(authors_dict) { + authors_to_clean <- authors_dict$author[is.na(authors_dict$author_name)] authors_to_clean <- authors_to_clean[!grepl(",", authors_to_clean)] author_names <- purrr::keep(authors_to_clean, function(author) { length(stringr::str_split_1(author, " ")) > 1 From 003a6b91bcb723656d6352dbf58309c1a2ec8bc5 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 4 Dec 2024 11:45:53 +0000 Subject: [PATCH 05/99] Add NEWS. --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 91aed15c..7d20fbd7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # GitStats (development version) - Fixed pulling commits for GitLab subgroups when repositories are set as scope to scan ([#551](https://github.com/r-world-devs/GitStats/issues/551)). +- Filled more information on `author_name` and `author_login` if it was missing in `commits_table` ([#550](https://github.com/r-world-devs/GitStats/issues/550)). # GitStats 2.1.2 From 4b18cb9cff2c41a61da1b6f5e3b72510e7a88ba6 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 4 Dec 2024 11:46:14 +0000 Subject: [PATCH 06/99] Add GitLab tests. --- tests/testthat/test-get_commits-GitLab.R | 48 ++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/testthat/test-get_commits-GitLab.R b/tests/testthat/test-get_commits-GitLab.R index bfca6d1d..5f904133 100644 --- a/tests/testthat/test-get_commits-GitLab.R +++ b/tests/testthat/test-get_commits-GitLab.R @@ -39,6 +39,54 @@ test_that("`prepare_commits_table()` prepares table of commits properly", { test_mocker$cache(gl_commits_table) }) +test_authors_dict <- data.frame( + author = c("TestFamily, TestName {TestID}", "TestName TestFamily", "testlogin"), + author_login = rep(NA, 3), + author_name = rep(NA, 3) +) + +test_that("clean_authors_with_comma parses properly authors data", { + test_authors_dict <- test_rest_gitlab_priv$clean_authors_with_comma(test_authors_dict) + expect_equal( + test_authors_dict$author_name[test_authors_dict$author == "TestFamily, TestName {TestID}"], + "TestName TestFamily" + ) + test_mocker$cache(test_authors_dict) +}) + +test_that("fill_empty_authors fills properly authors data", { + test_authors_dict <- test_rest_gitlab_priv$fill_empty_authors( + authors_dict = test_mocker$use("test_authors_dict") + ) + expect_equal( + test_authors_dict$author_name[test_authors_dict$author == "TestName TestFamily"], + "TestName TestFamily" + ) + expect_equal( + test_authors_dict$author_login[test_authors_dict$author == "testlogin"], + "testlogin" + ) + test_mocker$cache(test_authors_dict) +}) + +test_that("clean_authors_dict", { + test_authors_dict <- test_rest_gitlab_priv$clean_authors_dict( + authors_dict = test_authors_dict + ) + expect_equal( + test_authors_dict$author_name[test_authors_dict$author == "TestFamily, TestName {TestID}"], + "TestName TestFamily" + ) + expect_equal( + test_authors_dict$author_name[test_authors_dict$author == "TestName TestFamily"], + "TestName TestFamily" + ) + expect_equal( + test_authors_dict$author_login[test_authors_dict$author == "testlogin"], + "testlogin" + ) +}) + test_that("get_authors_dict() prepares dictionary with handles and names", { mockery::stub( test_rest_gitlab_priv$get_authors_dict, From b5907c3a1dc4b33988d434ccabc3d21e0eac5c5a Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 4 Dec 2024 11:57:44 +0000 Subject: [PATCH 07/99] Add GitHub test. --- tests/testthat/test-get_commits-GitHub.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test-get_commits-GitHub.R b/tests/testthat/test-get_commits-GitHub.R index 65433a53..a64e1bf1 100644 --- a/tests/testthat/test-get_commits-GitHub.R +++ b/tests/testthat/test-get_commits-GitHub.R @@ -72,6 +72,18 @@ test_that("`prepare_commits_table()` prepares commits table", { test_mocker$cache(gh_commits_table) }) +test_that("fill_empty_authors() works as expected", { + commits_table <- test_mocker$use("gh_commits_table") + commits_table$author_name <- NA + commits_table <- test_graphql_github_priv$fill_empty_authors( + commits_table = commits_table + ) + expect_equal( + commits_table$author_name, + "Maciej Banas" + ) +}) + test_that("get_commits_from_orgs for GitHub works", { mockery::stub( github_testhost_repos_priv$get_commits_from_orgs, From b557c79f756da93f3b6ab897e5d8e8bee4e29c69 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 6 Dec 2024 11:08:09 +0000 Subject: [PATCH 08/99] Handle empty GraphQL responses. --- DESCRIPTION | 2 +- R/EngineGraphQLGitHub.R | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 433ef879..4830424a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9000 +Version: 2.1.2.9001 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index f32af3d3..18ae076a 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -355,9 +355,19 @@ EngineGraphQLGitHub <- R6::R6Class( "repoCursor" = repo_cursor ) ) + private$handle_gql_response_error(response) return(response) }, + handle_gql_response_error = function(response) { + if (any(names(response) %in% "errors")) { + cli::cli_abort(c( + "i" = "GraphQL response error", + "x" = response$errors[[1]]$message + ), call = NULL) + } + }, + # An iterator over pulling commit pages from one repository. get_commits_from_one_repo = function(org, repo, @@ -543,12 +553,15 @@ EngineGraphQLGitHub <- R6::R6Class( }, fill_empty_authors = function(commits_table) { - commits_table <- commits_table |> - dplyr::rowwise() |> - dplyr::mutate( - author_name = ifelse(is.na(author_name) & is_name(author), author, author_name), - author_login = ifelse(is.na(author_login) & is_login(author), author, author_login) - ) + if (length(commits_table) > 0) { + commits_table <- commits_table |> + dplyr::rowwise() |> + dplyr::mutate( + author_name = ifelse(is.na(author_name) & is_name(author), author, author_name), + author_login = ifelse(is.na(author_login) & is_login(author), author, author_login) + ) + } + return(commits_table) } ) ) From 4a4cd72662d48b4828edbe7bf81341c14c5ff36c Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 6 Dec 2024 15:39:17 +0000 Subject: [PATCH 09/99] Fix error with repo cursor, try handle 502 error when fails with req_retry and fix preparing table output when login is empty. Passing empty `repo_cursor` ('') as variable to GraphQL request works for public GitHub but not necessarily for private platforms. The suggested solution will work for both. --- NEWS.md | 1 + R/EngineGraphQL.R | 7 +++++-- R/EngineGraphQLGitHub.R | 45 +++++++++++++++++++++++++++-------------- R/GQLQueryGitHub.R | 22 ++++++++++---------- 4 files changed, 47 insertions(+), 28 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7d20fbd7..41046e2d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ - Fixed pulling commits for GitLab subgroups when repositories are set as scope to scan ([#551](https://github.com/r-world-devs/GitStats/issues/551)). - Filled more information on `author_name` and `author_login` if it was missing in `commits_table` ([#550](https://github.com/r-world-devs/GitStats/issues/550)). +- Handled a `GraphQL` response error when pulling repositories with R error. Earlier, `GitStats` just returned empty table with no clue on what has happened, as errors from `GraphQL` are returned as list outputs (they do not break code). # GitStats 2.1.2 diff --git a/R/EngineGraphQL.R b/R/EngineGraphQL.R index 42f314e3..7036b23a 100644 --- a/R/EngineGraphQL.R +++ b/R/EngineGraphQL.R @@ -56,21 +56,24 @@ EngineGraphQL <- R6::R6Class( httr2::req_body_json(list(query = gql_query, variables = vars)) %>% httr2::req_retry( is_transient = ~ httr2::resp_status(.x) %in% c(400, 502), - max_seconds = 30 + max_seconds = 60 ) %>% httr2::req_perform() return(response) }, + is_query_error = function(response) { check <- FALSE if (length(response) > 0) { - check <- names(response) == "errors" + check <- any(names(response) == "errors") } return(check) }, + filter_files_by_pattern = function(files_structure, pattern) { files_structure[grepl(pattern, files_structure)] }, + get_path_from_files_structure = function(host_files_structure, only_text_files, org, diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index 18ae076a..5ce4ddae 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -135,12 +135,12 @@ EngineGraphQLGitHub <- R6::R6Class( commit$node$author_login <- if (!is.null(commit_author$user$login)) { commit_author$user$login } else { - NA + NA_character_ } commit$node$author_name <- if (!is.null(commit_author$user$name)) { commit_author$user$name } else { - NA + NA_character_ } commit$node$committed_date <- gts_to_posixt(commit$node$committed_date) commit$node$repo_url <- commit$node$repository$url @@ -344,15 +344,18 @@ EngineGraphQLGitHub <- R6::R6Class( type = c("organization", "user"), repo_cursor = "") { repos_query <- if (type == "organization") { - self$gql_query$repos_by_org() + self$gql_query$repos_by_org( + repo_cursor = repo_cursor + ) } else { - self$gql_query$repos_by_user() + self$gql_query$repos_by_user( + repo_cursor = repo_cursor + ) } response <- self$gql_response( gql_query = repos_query, vars = list( - "login" = login, - "repoCursor" = repo_cursor + "login" = login ) ) private$handle_gql_response_error(response) @@ -360,7 +363,7 @@ EngineGraphQLGitHub <- R6::R6Class( }, handle_gql_response_error = function(response) { - if (any(names(response) %in% "errors")) { + if (private$is_query_error(response)) { cli::cli_abort(c( "i" = "GraphQL response error", "x" = response$errors[[1]]$message @@ -407,15 +410,27 @@ EngineGraphQLGitHub <- R6::R6Class( commits_by_org_query <- self$gql_query$commits_from_repo( commits_cursor = commits_cursor ) - response <- self$gql_response( - gql_query = commits_by_org_query, - vars = list( - "org" = org, - "repo" = repo, - "since" = date_to_gts(since), - "until" = date_to_gts(until) + response <- tryCatch({ + self$gql_response( + gql_query = commits_by_org_query, + vars = list( + "org" = org, + "repo" = repo, + "since" = date_to_gts(since), + "until" = date_to_gts(until) + ) ) - ) + }, error = function(e) { + self$gql_response( + gql_query = commits_by_org_query, + vars = list( + "org" = org, + "repo" = repo, + "since" = date_to_gts(since), + "until" = date_to_gts(until) + ) + ) + }) return(response) }, diff --git a/R/GQLQueryGitHub.R b/R/GQLQueryGitHub.R index 2c465b67..fdf6c391 100644 --- a/R/GQLQueryGitHub.R +++ b/R/GQLQueryGitHub.R @@ -51,12 +51,12 @@ GQLQueryGitHub <- R6::R6Class("GQLQueryGitHub", #' @description Prepare query to get repositories from GitHub. #' @return A query. - repos_by_org = function() { + repos_by_org = function(repo_cursor) { paste0(' - query GetReposByOrg($login: String! $repoCursor: String!) { + query GetReposByOrg($login: String!) { repositoryOwner(login: $login) { ... on Organization { - ', private$repositories_field(), ' + ', private$repositories_field(repo_cursor), ' } } }') @@ -64,11 +64,11 @@ GQLQueryGitHub <- R6::R6Class("GQLQueryGitHub", #' @description Prepare query to get repositories from GitHub. #' @return A query. - repos_by_user = function() { + repos_by_user = function(repo_cursor) { paste0(' - query GetUsersRepos($login: String! $repoCursor: String!){ + query GetUsersRepos($login: String!){ user(login: $login) { - ', private$repositories_field(), ' + ', private$repositories_field(repo_cursor), ' } }' ) @@ -206,7 +206,7 @@ GQLQueryGitHub <- R6::R6Class("GQLQueryGitHub", } ), private = list( - # @description Helper over defining cursor agument for the query. + # @description Helper over defining cursor argument for the query. # @param cursor A cursor. # @return A string of cursor argument. add_cursor = function(cursor) { @@ -219,9 +219,9 @@ GQLQueryGitHub <- R6::R6Class("GQLQueryGitHub", }, # @description Helper to prepare repository query. - repositories_field = function() { - ' - repositories(first: 100 after: $repoCursor) { + repositories_field = function(repo_cursor) { + paste0(' + repositories(first: 100', private$add_cursor(repo_cursor), ') { totalCount pageInfo { endCursor @@ -250,7 +250,7 @@ GQLQueryGitHub <- R6::R6Class("GQLQueryGitHub", repo_url: url } } - ' + ') } ) ) From 9eed4fe6c77250367208aa193ac8a0ced6b003dc Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 6 Dec 2024 16:01:53 +0000 Subject: [PATCH 10/99] Fix tests. --- tests/testthat/_snaps/01-get_repos-GitHub.md | 2 +- tests/testthat/test-01-get_repos-GitHub.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/01-get_repos-GitHub.md b/tests/testthat/_snaps/01-get_repos-GitHub.md index b4419cf2..e51ab31c 100644 --- a/tests/testthat/_snaps/01-get_repos-GitHub.md +++ b/tests/testthat/_snaps/01-get_repos-GitHub.md @@ -3,7 +3,7 @@ Code gh_repos_by_org_query Output - [1] "\n query GetReposByOrg($login: String! $repoCursor: String!) {\n repositoryOwner(login: $login) {\n ... on Organization {\n \n repositories(first: 100 after: $repoCursor) {\n totalCount\n pageInfo {\n endCursor\n hasNextPage\n }\n nodes {\n repo_id: id\n repo_name: name\n default_branch: defaultBranchRef {\n name\n }\n stars: stargazerCount\n forks: forkCount\n created_at: createdAt\n last_activity_at: pushedAt\n languages (first: 5) { nodes {name} }\n issues_open: issues (first: 100 states: [OPEN]) {\n totalCount\n }\n issues_closed: issues (first: 100 states: [CLOSED]) {\n totalCount\n }\n organization: owner {\n login\n }\n repo_url: url\n }\n }\n \n }\n }\n }" + [1] "\n query GetReposByOrg($login: String!) {\n repositoryOwner(login: $login) {\n ... on Organization {\n \n repositories(first: 100) {\n totalCount\n pageInfo {\n endCursor\n hasNextPage\n }\n nodes {\n repo_id: id\n repo_name: name\n default_branch: defaultBranchRef {\n name\n }\n stars: stargazerCount\n forks: forkCount\n created_at: createdAt\n last_activity_at: pushedAt\n languages (first: 5) { nodes {name} }\n issues_open: issues (first: 100 states: [OPEN]) {\n totalCount\n }\n issues_closed: issues (first: 100 states: [CLOSED]) {\n totalCount\n }\n organization: owner {\n login\n }\n repo_url: url\n }\n }\n \n }\n }\n }" # `get_all_repos()` prints proper message diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index 3e1ccb48..6fca8d8c 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -1,6 +1,6 @@ test_that("repos_by_org query is built properly", { gh_repos_by_org_query <- - test_gqlquery_gh$repos_by_org() + test_gqlquery_gh$repos_by_org(repo_cursor = "") expect_snapshot( gh_repos_by_org_query ) From 19a7a974c6e64ab69e75c611da650866281530cb Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 9 Dec 2024 09:19:57 +0000 Subject: [PATCH 11/99] Add test. --- tests/testthat/_snaps/get_commits-GitHub.md | 4 ++++ tests/testthat/test-get_commits-GitHub.R | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/tests/testthat/_snaps/get_commits-GitHub.md b/tests/testthat/_snaps/get_commits-GitHub.md index 2f631896..5fe49647 100644 --- a/tests/testthat/_snaps/get_commits-GitHub.md +++ b/tests/testthat/_snaps/get_commits-GitHub.md @@ -5,3 +5,7 @@ Output [1] "\n query GetCommitsFromRepo($repo: String!\n $org: String!\n $since: GitTimestamp\n $until: GitTimestamp){\n repository(name: $repo, owner: $org) {\n defaultBranchRef {\n target {\n ... on Commit {\n history(since: $since\n until: $until\n ) {\n pageInfo {\n hasNextPage\n endCursor\n }\n edges {\n node {\n ... on Commit {\n id\n committed_date: committedDate\n author {\n name\n user {\n name\n login\n }\n }\n additions\n deletions\n repository {\n url\n }\n }\n }\n }\n }\n }\n }\n }\n }\n }" +# error in GraphQL response is handled properly + + i GraphQL response error + diff --git a/tests/testthat/test-get_commits-GitHub.R b/tests/testthat/test-get_commits-GitHub.R index a64e1bf1..1e0fd957 100644 --- a/tests/testthat/test-get_commits-GitHub.R +++ b/tests/testthat/test-get_commits-GitHub.R @@ -6,6 +6,19 @@ test_that("commits_by_repo GitHub query is built properly", { ) }) +test_that("error in GraphQL response is handled properly", { + mockery::stub( + test_graphql_github_priv$handle_gql_response_error, + "private$is_query_error", + TRUE + ) + expect_snapshot_error( + test_graphql_github_priv$handle_gql_response_error( + response = list() + ) + ) +}) + test_that("`get_commits_page_from_repo()` pulls commits page from repository", { mockery::stub( test_graphql_github_priv$get_commits_page_from_repo, From db4e8afe451e6c8745d4299a93b01579917fe178 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 08:49:52 +0000 Subject: [PATCH 12/99] Bump version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4830424a..07199bdb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9001 +Version: 2.1.2.9002 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), From 21e8220186430e4750d38e335178cbacf7d2cdf2 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 08:51:30 +0000 Subject: [PATCH 13/99] Add "year" as possible option to time interval, adjust better test fixtures for commits. --- R/GitStats.R | 2 +- R/gitstats_functions.R | 2 +- R/test_helpers.R | 17 +++++++++ tests/testthat/helper-fixtures.R | 40 ++++++++++++---------- tests/testthat/test-get_commits-GitHub.R | 7 ++-- tests/testthat/test-get_commits-GitStats.R | 16 ++++++++- 6 files changed, 60 insertions(+), 24 deletions(-) diff --git a/R/GitStats.R b/R/GitStats.R index 46a6325f..f4567d4f 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -243,7 +243,7 @@ GitStats <- R6::R6Class( #' @param time_interval A character, specifying time interval to show #' statistics. #' @return A table of `commits_stats` class. - get_commits_stats = function(time_interval = c("month", "day", "week")) { + get_commits_stats = function(time_interval = c("year", "month", "day", "week")) { commits <- private$storage[["commits"]] if (is.null(commits)) { cli::cli_abort(c( diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index 563300a0..05c67bd9 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -276,7 +276,7 @@ get_commits <- function(gitstats_object, #' } #' @export get_commits_stats <- function(gitstats_object, - time_interval = c("month", "day", "week")) { + time_interval = c("year", "month", "day", "week")) { gitstats_object$get_commits_stats( time_interval = time_interval ) diff --git a/R/test_helpers.R b/R/test_helpers.R index 656c250b..056e2a86 100644 --- a/R/test_helpers.R +++ b/R/test_helpers.R @@ -201,3 +201,20 @@ create_testrest <- function(rest_api_url = "https://api.github.com", } return(test_rest) } + +generate_random_timestamps <- function(n, start_year, end_year) { + start_date <- as.POSIXct(paste0(start_year, "-01-01 00:00:00"), tz = "UTC") + end_date <- as.POSIXct(paste0(end_year, "-12-31 23:59:59"), tz = "UTC") + + random_times <- runif(n, min = as.numeric(start_date), max = as.numeric(end_date)) + random_datetimes <- as.POSIXct(random_times, origin = "1970-01-01", tz = "UTC") + + formatted_dates <- format(random_datetimes, "%Y-%m-%dT%H:%M:%SZ") + + return(formatted_dates) +} + +generate_random_names <- function(n, names) { + random_names <- sample(names, size = n, replace = TRUE) + return(random_names) +} diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R index 5fc10365..873d30e2 100644 --- a/tests/testthat/helper-fixtures.R +++ b/tests/testthat/helper-fixtures.R @@ -245,24 +245,30 @@ test_fixtures$gitlab_repos_by_user_response <- list( ) ) -github_commit_edge <- list( - "node" = list( - "id" = "xxx", - "committed_date" = "2023-01-25T10:26:41Z", - "author" = list( - "name" = "Maciej Banas", - "user" = list( - "name" = "Maciej Banas", - "login" = "maciekbanas" +github_commit_edge <- function(timestamp, author) { + list( + "node" = list( + "id" = "xxx", + "committed_date" = timestamp, + "author" = list( + "name" = author, + "user" = list( + "name" = "Maciej Banas", + "login" = "maciekbanas" + ) + ), + "additions" = 5L, + "deletions" = 8L, + "repository" = list( + "url" = "test_url" ) - ), - "additions" = 5L, - "deletions" = 8L, - "repository" = list( - "url" = "test_url" ) ) -) +} + +set.seed(123) +commit_timestamps <- generate_random_timestamps(25, 2023, 2024) +commit_authors <- generate_random_names(25, c("John Test", "Barbara Check", "Bob Test")) test_fixtures$github_commits_response <- list( "data" = list( @@ -270,9 +276,7 @@ test_fixtures$github_commits_response <- list( "defaultBranchRef" = list( "target" = list( "history" = list( - "edges" = list( - rep(github_commit_edge, 5) - ) + "edges" = purrr::map2(commit_timestamps, commit_authors, github_commit_edge) ) ) ) diff --git a/tests/testthat/test-get_commits-GitHub.R b/tests/testthat/test-get_commits-GitHub.R index 1e0fd957..91140a8f 100644 --- a/tests/testthat/test-get_commits-GitHub.R +++ b/tests/testthat/test-get_commits-GitHub.R @@ -91,9 +91,10 @@ test_that("fill_empty_authors() works as expected", { commits_table <- test_graphql_github_priv$fill_empty_authors( commits_table = commits_table ) - expect_equal( - commits_table$author_name, - "Maciej Banas" + expect_true( + all( + c("Bob Test", "Barbara Check", "John Test") %in% commits_table$author_name + ) ) }) diff --git a/tests/testthat/test-get_commits-GitStats.R b/tests/testthat/test-get_commits-GitStats.R index c82e2d8d..a28d15e0 100644 --- a/tests/testthat/test-get_commits-GitStats.R +++ b/tests/testthat/test-get_commits-GitStats.R @@ -87,7 +87,10 @@ test_that("get_commits_stats returns error when no commits", { }) test_that("get_commits_stats prepares table with statistics on commits", { - commits_stats <- get_commits_stats(test_gitstats) + commits_stats <- get_commits_stats( + gitstats_obj = test_gitstats, + time_interval = "month" + ) expect_s3_class(commits_stats, "commits_stats") expect_equal( colnames(commits_stats), @@ -106,4 +109,15 @@ test_that("get_commits_stats prepares table with statistics on commits", { colnames(commits_stats_daily), c("stats_date", "platform", "organization", "commits_n") ) + + commits_stats_yearly <- get_commits_stats( + gitstats_obj = test_gitstats, + time_interval = "year") + expect_equal(commits_stats_yearly$stats_date, + as.POSIXct(c(rep("2023-01-01", 2), "2024-01-01"), tz = 'UTC')) + expect_s3_class(commits_stats_yearly, "commits_stats") + expect_equal( + colnames(commits_stats_yearly), + c("stats_date", "platform", "organization", "commits_n") + ) }) From 8f4dfc377fd9eddab445a132b431d56669b7b4b4 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 08:52:16 +0000 Subject: [PATCH 14/99] Update docs. --- man/get_commits_stats.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/get_commits_stats.Rd b/man/get_commits_stats.Rd index 63a2181e..8815d0de 100644 --- a/man/get_commits_stats.Rd +++ b/man/get_commits_stats.Rd @@ -4,7 +4,10 @@ \alias{get_commits_stats} \title{Get commits statistics} \usage{ -get_commits_stats(gitstats_object, time_interval = c("month", "day", "week")) +get_commits_stats( + gitstats_object, + time_interval = c("year", "month", "day", "week") +) } \arguments{ \item{gitstats_object}{A GitStats class object.} From bb416eb083444282928cab659ec57612836b88cf Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 10:10:34 +0000 Subject: [PATCH 15/99] Add possibility to pass multiple grouping variables to function and customize statistics. --- R/GitStats.R | 27 ++++++++++++------- R/gitstats_functions.R | 17 +++++++++--- man/get_commits_stats.Rd | 15 ++++++++--- tests/testthat/test-get_commits-GitStats.R | 31 +++++++++++++++++----- 4 files changed, 67 insertions(+), 23 deletions(-) diff --git a/R/GitStats.R b/R/GitStats.R index f4567d4f..86cfd9a5 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -242,8 +242,14 @@ GitStats <- R6::R6Class( #' @description Prepare statistics from the pulled commits data. #' @param time_interval A character, specifying time interval to show #' statistics. + #' @param ... Other grouping variables to be passed to `dplyr::group_by()` + #' function apart from `stats_date` and `githost`. Could be: `author`, + #' `author_login`, `author_name`, `repository` or `organization`. Should be + #' passed without quotation marks. #' @return A table of `commits_stats` class. - get_commits_stats = function(time_interval = c("year", "month", "day", "week")) { + get_commits_stats = function(time_interval = c("year", "month", "day", "week"), + ..., + stats) { commits <- private$storage[["commits"]] if (is.null(commits)) { cli::cli_abort(c( @@ -256,7 +262,9 @@ GitStats <- R6::R6Class( commits_stats <- private$prepare_commits_stats( commits = commits, - time_interval = time_interval + time_interval = time_interval, + ... = ..., + stats = stats ) return(commits_stats) }, @@ -1115,19 +1123,20 @@ GitStats <- R6::R6Class( }, # Prepare stats out of commits table - prepare_commits_stats = function(commits, time_interval) { - commits_stats <- commits %>% + prepare_commits_stats = function(commits, time_interval, ..., stats) { + commits <- commits |> dplyr::mutate( stats_date = lubridate::floor_date( committed_date, unit = time_interval ), - platform = retrieve_platform(api_url) - ) %>% - dplyr::group_by(stats_date, platform, organization) %>% + githost = retrieve_platform(api_url) + ) + commits_stats <- commits |> + dplyr::group_by(stats_date, githost, ...) |> dplyr::summarise( - commits_n = dplyr::n() - ) %>% + stats = stats + ) |> dplyr::arrange( stats_date ) diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index 05c67bd9..64aab276 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -260,9 +260,14 @@ get_commits <- function(gitstats_object, #' @description Prepare statistics from the pulled commits data. #' @details To make function work, you need first to get commits data with #' `GitStats`. See examples section. -#' @param gitstats_object A GitStats class object. +#' @param gitstats_object A `GitStats` object. #' @param time_interval A character, specifying time interval to show #' statistics. +#' @param ... Other grouping variables to be passed to `dplyr::group_by()` +#' function apart from `stats_date` and `githost`. Could be: `author`, +#' `author_login`, `author_name`, `repository` or `organization`. Should be +#' passed without quotation marks. +#' @param stats Customize statistics. #' @return A table of `commits_stats` class. #' @examples #' \dontrun{ @@ -272,13 +277,17 @@ get_commits <- function(gitstats_object, #' repos = c("r-world-devs/GitStats", "openpharma/visR") #' ) #' get_commits(my_gitstats, since = "2022-01-01") -#' get_commits_stats(my_gitstats, time_interval = "week") +#' get_commits_stats(my_gitstats, author, time_interval = "year") #' } #' @export get_commits_stats <- function(gitstats_object, - time_interval = c("year", "month", "day", "week")) { + time_interval = c("year", "month", "day", "week"), + ..., + stats = dplyr::n()) { gitstats_object$get_commits_stats( - time_interval = time_interval + time_interval = time_interval, + ... = ..., + stats = stats ) } diff --git a/man/get_commits_stats.Rd b/man/get_commits_stats.Rd index 8815d0de..5be0d03e 100644 --- a/man/get_commits_stats.Rd +++ b/man/get_commits_stats.Rd @@ -6,14 +6,23 @@ \usage{ get_commits_stats( gitstats_object, - time_interval = c("year", "month", "day", "week") + time_interval = c("year", "month", "day", "week"), + ..., + stats = dplyr::n() ) } \arguments{ -\item{gitstats_object}{A GitStats class object.} +\item{gitstats_object}{A \code{GitStats} object.} \item{time_interval}{A character, specifying time interval to show statistics.} + +\item{...}{Other grouping variables to be passed to \code{dplyr::group_by()} +function apart from \code{stats_date} and \code{githost}. Could be: \code{author}, +\code{author_login}, \code{author_name}, \code{repository} or \code{organization}. Should be +passed without quotation marks.} + +\item{stats}{Customize statistics.} } \value{ A table of \code{commits_stats} class. @@ -33,6 +42,6 @@ To make function work, you need first to get commits data with repos = c("r-world-devs/GitStats", "openpharma/visR") ) get_commits(my_gitstats, since = "2022-01-01") - get_commits_stats(my_gitstats, time_interval = "week") + get_commits_stats(my_gitstats, author, time_interval = "year") } } diff --git a/tests/testthat/test-get_commits-GitStats.R b/tests/testthat/test-get_commits-GitStats.R index a28d15e0..238c11e9 100644 --- a/tests/testthat/test-get_commits-GitStats.R +++ b/tests/testthat/test-get_commits-GitStats.R @@ -74,6 +74,19 @@ test_that("get_commits() returns error when since is not defined", { ) }) +test_that("prepare_commits_stats prepares commits statistics", { + commits_stats <- test_gitstats_priv$prepare_commits_stats( + commits = test_mocker$use("commits_table"), + time_interval = "week", + author, + stats = dplyr::n() + ) + expect_equal( + colnames(commits_stats), + c("stats_date", "githost", "author", "stats") + ) +}) + test_gitstats <- create_test_gitstats( hosts = 2, inject_commits = "commits_table" @@ -89,35 +102,39 @@ test_that("get_commits_stats returns error when no commits", { test_that("get_commits_stats prepares table with statistics on commits", { commits_stats <- get_commits_stats( gitstats_obj = test_gitstats, - time_interval = "month" + time_interval = "month", + organization ) expect_s3_class(commits_stats, "commits_stats") expect_equal( colnames(commits_stats), - c("stats_date", "platform", "organization", "commits_n") + c("stats_date", "githost", "organization", "stats") ) expect_true( - "github" %in% commits_stats$platform + "github" %in% commits_stats$githost ) test_mocker$cache(commits_stats) commits_stats_daily <- get_commits_stats( gitstats_obj = test_gitstats, - time_interval = "day") + time_interval = "day", + organization, + ) expect_s3_class(commits_stats_daily, "commits_stats") expect_equal( colnames(commits_stats_daily), - c("stats_date", "platform", "organization", "commits_n") + c("stats_date", "githost", "organization", "stats") ) commits_stats_yearly <- get_commits_stats( gitstats_obj = test_gitstats, - time_interval = "year") + time_interval = "year" + ) expect_equal(commits_stats_yearly$stats_date, as.POSIXct(c(rep("2023-01-01", 2), "2024-01-01"), tz = 'UTC')) expect_s3_class(commits_stats_yearly, "commits_stats") expect_equal( colnames(commits_stats_yearly), - c("stats_date", "platform", "organization", "commits_n") + c("stats_date", "githost", "stats") ) }) From bf27fac13181a29b88f1b51ac4e4787182abf28c Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 10:22:37 +0000 Subject: [PATCH 16/99] Change name of the parametr, remove superfluous docs. --- R/GitStats.R | 22 ++++++---------------- R/gitstats_functions.R | 6 +++--- R/utils.R | 4 ++-- man/get_commits_stats.Rd | 4 ++-- tests/testthat/test-get_commits-GitStats.R | 8 ++++---- 5 files changed, 17 insertions(+), 27 deletions(-) diff --git a/R/GitStats.R b/R/GitStats.R index 86cfd9a5..6e9f01c1 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -237,17 +237,7 @@ GitStats <- R6::R6Class( return(commits) }, - #' @title Get statistics on commits - #' @name get_commits_stats - #' @description Prepare statistics from the pulled commits data. - #' @param time_interval A character, specifying time interval to show - #' statistics. - #' @param ... Other grouping variables to be passed to `dplyr::group_by()` - #' function apart from `stats_date` and `githost`. Could be: `author`, - #' `author_login`, `author_name`, `repository` or `organization`. Should be - #' passed without quotation marks. - #' @return A table of `commits_stats` class. - get_commits_stats = function(time_interval = c("year", "month", "day", "week"), + get_commits_stats = function(time_aggregation = c("year", "month", "day", "week"), ..., stats) { commits <- private$storage[["commits"]] @@ -258,11 +248,11 @@ GitStats <- R6::R6Class( ), call = NULL) } - time_interval <- match.arg(time_interval) + time_aggregation <- match.arg(time_aggregation) commits_stats <- private$prepare_commits_stats( commits = commits, - time_interval = time_interval, + time_aggregation = time_aggregation, ... = ..., stats = stats ) @@ -1123,12 +1113,12 @@ GitStats <- R6::R6Class( }, # Prepare stats out of commits table - prepare_commits_stats = function(commits, time_interval, ..., stats) { + prepare_commits_stats = function(commits, time_aggregation, ..., stats) { commits <- commits |> dplyr::mutate( stats_date = lubridate::floor_date( committed_date, - unit = time_interval + unit = time_aggregation ), githost = retrieve_platform(api_url) ) @@ -1142,7 +1132,7 @@ GitStats <- R6::R6Class( ) commits_stats <- commits_stats( object = commits_stats, - time_interval = time_interval + time_aggregation = time_aggregation ) return(commits_stats) }, diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index 64aab276..b3dd99a2 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -261,7 +261,7 @@ get_commits <- function(gitstats_object, #' @details To make function work, you need first to get commits data with #' `GitStats`. See examples section. #' @param gitstats_object A `GitStats` object. -#' @param time_interval A character, specifying time interval to show +#' @param time_aggregation A character, specifying time aggregation of #' statistics. #' @param ... Other grouping variables to be passed to `dplyr::group_by()` #' function apart from `stats_date` and `githost`. Could be: `author`, @@ -281,11 +281,11 @@ get_commits <- function(gitstats_object, #' } #' @export get_commits_stats <- function(gitstats_object, - time_interval = c("year", "month", "day", "week"), + time_aggregation = c("year", "month", "day", "week"), ..., stats = dplyr::n()) { gitstats_object$get_commits_stats( - time_interval = time_interval, + time_aggregation = time_aggregation, ... = ..., stats = stats ) diff --git a/R/utils.R b/R/utils.R index 91919377..ed0aafcf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -42,11 +42,11 @@ retrieve_platform <- function(api_url) { #' @noRd #' @description A constructor for `commits_stats` class. -commits_stats <- function(object, time_interval) { +commits_stats <- function(object, time_aggregation) { stopifnot(inherits(object, "grouped_df")) object <- dplyr::ungroup(object) class(object) <- append(class(object), "commits_stats") - attr(object, "time_interval") <- time_interval + attr(object, "time_aggregation") <- time_aggregation object } diff --git a/man/get_commits_stats.Rd b/man/get_commits_stats.Rd index 5be0d03e..011e63ac 100644 --- a/man/get_commits_stats.Rd +++ b/man/get_commits_stats.Rd @@ -6,7 +6,7 @@ \usage{ get_commits_stats( gitstats_object, - time_interval = c("year", "month", "day", "week"), + time_aggregation = c("year", "month", "day", "week"), ..., stats = dplyr::n() ) @@ -14,7 +14,7 @@ get_commits_stats( \arguments{ \item{gitstats_object}{A \code{GitStats} object.} -\item{time_interval}{A character, specifying time interval to show +\item{time_aggregation}{A character, specifying time aggregation of statistics.} \item{...}{Other grouping variables to be passed to \code{dplyr::group_by()} diff --git a/tests/testthat/test-get_commits-GitStats.R b/tests/testthat/test-get_commits-GitStats.R index 238c11e9..4555510d 100644 --- a/tests/testthat/test-get_commits-GitStats.R +++ b/tests/testthat/test-get_commits-GitStats.R @@ -77,7 +77,7 @@ test_that("get_commits() returns error when since is not defined", { test_that("prepare_commits_stats prepares commits statistics", { commits_stats <- test_gitstats_priv$prepare_commits_stats( commits = test_mocker$use("commits_table"), - time_interval = "week", + time_aggregation = "week", author, stats = dplyr::n() ) @@ -102,7 +102,7 @@ test_that("get_commits_stats returns error when no commits", { test_that("get_commits_stats prepares table with statistics on commits", { commits_stats <- get_commits_stats( gitstats_obj = test_gitstats, - time_interval = "month", + time_aggregation = "month", organization ) expect_s3_class(commits_stats, "commits_stats") @@ -117,7 +117,7 @@ test_that("get_commits_stats prepares table with statistics on commits", { commits_stats_daily <- get_commits_stats( gitstats_obj = test_gitstats, - time_interval = "day", + time_aggregation = "day", organization, ) expect_s3_class(commits_stats_daily, "commits_stats") @@ -128,7 +128,7 @@ test_that("get_commits_stats prepares table with statistics on commits", { commits_stats_yearly <- get_commits_stats( gitstats_obj = test_gitstats, - time_interval = "year" + time_aggregation = "year" ) expect_equal(commits_stats_yearly$stats_date, as.POSIXct(c(rep("2023-01-01", 2), "2024-01-01"), tz = 'UTC')) From cdcc66f33f024b79f33458e33684a1f1b1fed6c1 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 10:31:52 +0000 Subject: [PATCH 17/99] Adds test for the GitStats public method. --- tests/testthat/test-get_commits-GitStats.R | 26 ++++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test-get_commits-GitStats.R b/tests/testthat/test-get_commits-GitStats.R index 4555510d..aee11c7f 100644 --- a/tests/testthat/test-get_commits-GitStats.R +++ b/tests/testthat/test-get_commits-GitStats.R @@ -92,18 +92,11 @@ test_gitstats <- create_test_gitstats( inject_commits = "commits_table" ) -test_that("get_commits_stats returns error when no commits", { - test_gitstats <- create_test_gitstats() - expect_snapshot_error( - get_commits_stats(test_gitstats) - ) -}) - -test_that("get_commits_stats prepares table with statistics on commits", { - commits_stats <- get_commits_stats( - gitstats_obj = test_gitstats, +test_that("get_commits_stats method works", { + commits_stats <- test_gitstats$get_commits_stats( time_aggregation = "month", - organization + organization, + stats = dplyr::n() ) expect_s3_class(commits_stats, "commits_stats") expect_equal( @@ -111,10 +104,19 @@ test_that("get_commits_stats prepares table with statistics on commits", { c("stats_date", "githost", "organization", "stats") ) expect_true( - "github" %in% commits_stats$githost + all(c("gitlab", "github") %in% commits_stats$githost) ) test_mocker$cache(commits_stats) +}) + +test_that("get_commits_stats returns error when no commits", { + test_gitstats <- create_test_gitstats() + expect_snapshot_error( + get_commits_stats(test_gitstats) + ) +}) +test_that("get_commits_stats prepares table with statistics on commits", { commits_stats_daily <- get_commits_stats( gitstats_obj = test_gitstats, time_aggregation = "day", From b209dd3110ac0d03f7edd529fd94385cdf0070cd Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 10:38:28 +0000 Subject: [PATCH 18/99] Add NEWS. --- NEWS.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/NEWS.md b/NEWS.md index 41046e2d..bb902757 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,15 @@ # GitStats (development version) +## Features: + +- Improved `get_commits_stats()` function ([#556](https://github.com/r-world-devs/GitStats/issues/556)) with: + - giving possibility to customize grouping variables by passing them in a `dplyr` way with `...` parameter, + - adding `stats` parameter to customize statistics, + - changing name of `time_interval` parameter to `time_aggregation`, + - adding `yearly` aggregation to `time_aggregation` parameter. + +## Fixes: + - Fixed pulling commits for GitLab subgroups when repositories are set as scope to scan ([#551](https://github.com/r-world-devs/GitStats/issues/551)). - Filled more information on `author_name` and `author_login` if it was missing in `commits_table` ([#550](https://github.com/r-world-devs/GitStats/issues/550)). - Handled a `GraphQL` response error when pulling repositories with R error. Earlier, `GitStats` just returned empty table with no clue on what has happened, as errors from `GraphQL` are returned as list outputs (they do not break code). From e47113c21c883e63007878aa0220ae2fdc767a0d Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 10:39:06 +0000 Subject: [PATCH 19/99] Update example and namespace to meet checks. --- NAMESPACE | 1 + R/GitStats-package.R | 1 + R/gitstats_functions.R | 7 ++++++- man/get_commits_stats.Rd | 7 ++++++- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e3e5e9d1..2d1a9552 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ importFrom(purrr,map) importFrom(purrr,map_chr) importFrom(rlang,"%||%") importFrom(rlang,expr) +importFrom(stats,runif) importFrom(stringr,str_length) importFrom(stringr,str_replace) importFrom(utils,URLdecode) diff --git a/R/GitStats-package.R b/R/GitStats-package.R index ed7ff2a1..8fa65150 100644 --- a/R/GitStats-package.R +++ b/R/GitStats-package.R @@ -7,4 +7,5 @@ #' @importFrom rlang expr %||% #' @importFrom httr2 request req_headers req_perform resp_body_json #' @importFrom glue glue +#' @importFrom stats runif NULL diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index b3dd99a2..8b9609ab 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -277,7 +277,12 @@ get_commits <- function(gitstats_object, #' repos = c("r-world-devs/GitStats", "openpharma/visR") #' ) #' get_commits(my_gitstats, since = "2022-01-01") -#' get_commits_stats(my_gitstats, author, time_interval = "year") +#' get_commits_stats( +#' gitstats_object = my_gitstats, +#' author, +#' organization, +#' time_interval = "year" +#' ) #' } #' @export get_commits_stats <- function(gitstats_object, diff --git a/man/get_commits_stats.Rd b/man/get_commits_stats.Rd index 011e63ac..b9b7d05e 100644 --- a/man/get_commits_stats.Rd +++ b/man/get_commits_stats.Rd @@ -42,6 +42,11 @@ To make function work, you need first to get commits data with repos = c("r-world-devs/GitStats", "openpharma/visR") ) get_commits(my_gitstats, since = "2022-01-01") - get_commits_stats(my_gitstats, author, time_interval = "year") + get_commits_stats( + gitstats_object = my_gitstats, + author, + organization, + time_interval = "year" + ) } } From ce29e53e6b41acb58b8d6b5a0410567497016665 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 13:00:37 +0000 Subject: [PATCH 20/99] Replace ... with group_var param to make workflow more straightforward (due to other parameters). --- R/GitStats.R | 24 ++++++++++++---------- R/gitstats_functions.R | 18 ++++++++-------- tests/testthat/test-get_commits-GitStats.R | 15 ++++++-------- 3 files changed, 28 insertions(+), 29 deletions(-) diff --git a/R/GitStats.R b/R/GitStats.R index 6e9f01c1..deb6176b 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -237,8 +237,8 @@ GitStats <- R6::R6Class( return(commits) }, - get_commits_stats = function(time_aggregation = c("year", "month", "day", "week"), - ..., + get_commits_stats = function(time_aggregation, + group_var, stats) { commits <- private$storage[["commits"]] if (is.null(commits)) { @@ -248,12 +248,10 @@ GitStats <- R6::R6Class( ), call = NULL) } - time_aggregation <- match.arg(time_aggregation) - commits_stats <- private$prepare_commits_stats( commits = commits, time_aggregation = time_aggregation, - ... = ..., + group_var = !!group_var, stats = stats ) return(commits_stats) @@ -1113,7 +1111,7 @@ GitStats <- R6::R6Class( }, # Prepare stats out of commits table - prepare_commits_stats = function(commits, time_aggregation, ..., stats) { + prepare_commits_stats = function(commits, time_aggregation, group_var, stats) { commits <- commits |> dplyr::mutate( stats_date = lubridate::floor_date( @@ -1122,11 +1120,15 @@ GitStats <- R6::R6Class( ), githost = retrieve_platform(api_url) ) - commits_stats <- commits |> - dplyr::group_by(stats_date, githost, ...) |> - dplyr::summarise( - stats = stats - ) |> + commits_grouped <- commits |> + dplyr::group_by(stats_date, githost, {{ group_var }}) + if (stats == "count") { + commits_stats <- commits_grouped |> + dplyr::summarise( + stats = dplyr::n() + ) + } + commits_stats <- commits_stats |> dplyr::arrange( stats_date ) diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index 8b9609ab..f91e6610 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -263,10 +263,10 @@ get_commits <- function(gitstats_object, #' @param gitstats_object A `GitStats` object. #' @param time_aggregation A character, specifying time aggregation of #' statistics. -#' @param ... Other grouping variables to be passed to `dplyr::group_by()` +#' @param group_var Other grouping variable to be passed to `dplyr::group_by()` #' function apart from `stats_date` and `githost`. Could be: `author`, -#' `author_login`, `author_name`, `repository` or `organization`. Should be -#' passed without quotation marks. +#' `author_login`, `author_name` or `organization`. Should be passed without +#' quotation marks. #' @param stats Customize statistics. #' @return A table of `commits_stats` class. #' @examples @@ -279,19 +279,19 @@ get_commits <- function(gitstats_object, #' get_commits(my_gitstats, since = "2022-01-01") #' get_commits_stats( #' gitstats_object = my_gitstats, -#' author, -#' organization, +#' group_var = author, #' time_interval = "year" #' ) #' } #' @export get_commits_stats <- function(gitstats_object, - time_aggregation = c("year", "month", "day", "week"), - ..., - stats = dplyr::n()) { + time_aggregation = c("year", "month", "week", "day"), + group_var, + stats = "count") { + time_aggregation <- match.arg(time_aggregation) gitstats_object$get_commits_stats( time_aggregation = time_aggregation, - ... = ..., + group_var = rlang::enquo(group_var), stats = stats ) } diff --git a/tests/testthat/test-get_commits-GitStats.R b/tests/testthat/test-get_commits-GitStats.R index aee11c7f..c3a4d05c 100644 --- a/tests/testthat/test-get_commits-GitStats.R +++ b/tests/testthat/test-get_commits-GitStats.R @@ -78,8 +78,8 @@ test_that("prepare_commits_stats prepares commits statistics", { commits_stats <- test_gitstats_priv$prepare_commits_stats( commits = test_mocker$use("commits_table"), time_aggregation = "week", - author, - stats = dplyr::n() + group_var = author, + stats = "count" ) expect_equal( colnames(commits_stats), @@ -95,14 +95,10 @@ test_gitstats <- create_test_gitstats( test_that("get_commits_stats method works", { commits_stats <- test_gitstats$get_commits_stats( time_aggregation = "month", - organization, - stats = dplyr::n() + group_var = "organization", + stats = "count" ) expect_s3_class(commits_stats, "commits_stats") - expect_equal( - colnames(commits_stats), - c("stats_date", "githost", "organization", "stats") - ) expect_true( all(c("gitlab", "github") %in% commits_stats$githost) ) @@ -120,7 +116,8 @@ test_that("get_commits_stats prepares table with statistics on commits", { commits_stats_daily <- get_commits_stats( gitstats_obj = test_gitstats, time_aggregation = "day", - organization, + group_var = organization, + stats = "count" ) expect_s3_class(commits_stats_daily, "commits_stats") expect_equal( From 2c37b82fe87851222a1ce86196aafaf2bba37535 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 13:05:14 +0000 Subject: [PATCH 21/99] Update docs. --- man/get_commits_stats.Rd | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/man/get_commits_stats.Rd b/man/get_commits_stats.Rd index b9b7d05e..0e786ab6 100644 --- a/man/get_commits_stats.Rd +++ b/man/get_commits_stats.Rd @@ -6,9 +6,9 @@ \usage{ get_commits_stats( gitstats_object, - time_aggregation = c("year", "month", "day", "week"), - ..., - stats = dplyr::n() + time_aggregation = c("year", "month", "week", "day"), + group_var, + stats = "count" ) } \arguments{ @@ -17,10 +17,10 @@ get_commits_stats( \item{time_aggregation}{A character, specifying time aggregation of statistics.} -\item{...}{Other grouping variables to be passed to \code{dplyr::group_by()} +\item{group_var}{Other grouping variable to be passed to \code{dplyr::group_by()} function apart from \code{stats_date} and \code{githost}. Could be: \code{author}, -\code{author_login}, \code{author_name}, \code{repository} or \code{organization}. Should be -passed without quotation marks.} +\code{author_login}, \code{author_name} or \code{organization}. Should be passed without +quotation marks.} \item{stats}{Customize statistics.} } @@ -44,8 +44,7 @@ To make function work, you need first to get commits data with get_commits(my_gitstats, since = "2022-01-01") get_commits_stats( gitstats_object = my_gitstats, - author, - organization, + group_var = author, time_interval = "year" ) } From 154e50e6c98a746c9b3c875b11674ede10df4b40 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 10 Dec 2024 13:13:21 +0000 Subject: [PATCH 22/99] Remove `stats` parameter. --- NEWS.md | 5 ++--- R/GitStats.R | 22 ++++++++-------------- R/gitstats_functions.R | 7 ++----- man/get_commits_stats.Rd | 5 +---- tests/testthat/test-get_commits-GitStats.R | 9 +++------ 5 files changed, 16 insertions(+), 32 deletions(-) diff --git a/NEWS.md b/NEWS.md index bb902757..58c5b75e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,9 +3,8 @@ ## Features: - Improved `get_commits_stats()` function ([#556](https://github.com/r-world-devs/GitStats/issues/556)) with: - - giving possibility to customize grouping variables by passing them in a `dplyr` way with `...` parameter, - - adding `stats` parameter to customize statistics, - - changing name of `time_interval` parameter to `time_aggregation`, + - giving possibility to customize grouping variable by passing it with the `group_var` parameter, + - changing name of the `time_interval` parameter to `time_aggregation`, - adding `yearly` aggregation to `time_aggregation` parameter. ## Fixes: diff --git a/R/GitStats.R b/R/GitStats.R index deb6176b..c8a25461 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -238,8 +238,7 @@ GitStats <- R6::R6Class( }, get_commits_stats = function(time_aggregation, - group_var, - stats) { + group_var) { commits <- private$storage[["commits"]] if (is.null(commits)) { cli::cli_abort(c( @@ -251,8 +250,7 @@ GitStats <- R6::R6Class( commits_stats <- private$prepare_commits_stats( commits = commits, time_aggregation = time_aggregation, - group_var = !!group_var, - stats = stats + group_var = !!group_var ) return(commits_stats) }, @@ -1111,7 +1109,7 @@ GitStats <- R6::R6Class( }, # Prepare stats out of commits table - prepare_commits_stats = function(commits, time_aggregation, group_var, stats) { + prepare_commits_stats = function(commits, time_aggregation, group_var) { commits <- commits |> dplyr::mutate( stats_date = lubridate::floor_date( @@ -1120,15 +1118,11 @@ GitStats <- R6::R6Class( ), githost = retrieve_platform(api_url) ) - commits_grouped <- commits |> - dplyr::group_by(stats_date, githost, {{ group_var }}) - if (stats == "count") { - commits_stats <- commits_grouped |> - dplyr::summarise( - stats = dplyr::n() - ) - } - commits_stats <- commits_stats |> + commits_stats <- commits |> + dplyr::group_by(stats_date, githost, {{ group_var }}) |> + dplyr::summarise( + stats = dplyr::n() + ) |> dplyr::arrange( stats_date ) diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index f91e6610..7b1f0b0b 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -267,7 +267,6 @@ get_commits <- function(gitstats_object, #' function apart from `stats_date` and `githost`. Could be: `author`, #' `author_login`, `author_name` or `organization`. Should be passed without #' quotation marks. -#' @param stats Customize statistics. #' @return A table of `commits_stats` class. #' @examples #' \dontrun{ @@ -286,13 +285,11 @@ get_commits <- function(gitstats_object, #' @export get_commits_stats <- function(gitstats_object, time_aggregation = c("year", "month", "week", "day"), - group_var, - stats = "count") { + group_var) { time_aggregation <- match.arg(time_aggregation) gitstats_object$get_commits_stats( time_aggregation = time_aggregation, - group_var = rlang::enquo(group_var), - stats = stats + group_var = rlang::enquo(group_var) ) } diff --git a/man/get_commits_stats.Rd b/man/get_commits_stats.Rd index 0e786ab6..1d634a59 100644 --- a/man/get_commits_stats.Rd +++ b/man/get_commits_stats.Rd @@ -7,8 +7,7 @@ get_commits_stats( gitstats_object, time_aggregation = c("year", "month", "week", "day"), - group_var, - stats = "count" + group_var ) } \arguments{ @@ -21,8 +20,6 @@ statistics.} function apart from \code{stats_date} and \code{githost}. Could be: \code{author}, \code{author_login}, \code{author_name} or \code{organization}. Should be passed without quotation marks.} - -\item{stats}{Customize statistics.} } \value{ A table of \code{commits_stats} class. diff --git a/tests/testthat/test-get_commits-GitStats.R b/tests/testthat/test-get_commits-GitStats.R index c3a4d05c..003c6382 100644 --- a/tests/testthat/test-get_commits-GitStats.R +++ b/tests/testthat/test-get_commits-GitStats.R @@ -78,8 +78,7 @@ test_that("prepare_commits_stats prepares commits statistics", { commits_stats <- test_gitstats_priv$prepare_commits_stats( commits = test_mocker$use("commits_table"), time_aggregation = "week", - group_var = author, - stats = "count" + group_var = author ) expect_equal( colnames(commits_stats), @@ -95,8 +94,7 @@ test_gitstats <- create_test_gitstats( test_that("get_commits_stats method works", { commits_stats <- test_gitstats$get_commits_stats( time_aggregation = "month", - group_var = "organization", - stats = "count" + group_var = "organization" ) expect_s3_class(commits_stats, "commits_stats") expect_true( @@ -116,8 +114,7 @@ test_that("get_commits_stats prepares table with statistics on commits", { commits_stats_daily <- get_commits_stats( gitstats_obj = test_gitstats, time_aggregation = "day", - group_var = organization, - stats = "count" + group_var = organization ) expect_s3_class(commits_stats_daily, "commits_stats") expect_equal( From 0af2b3bbad92a6b91607d4d088b8b29787db50df Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 11 Dec 2024 10:11:53 +0000 Subject: [PATCH 23/99] Move get_commits_stats from GitStats, change basic input to commits_data, change name of gitstats_obj parameter to gitstats, do some cleaning of superfluous docs. --- DESCRIPTION | 2 +- NEWS.md | 5 +- R/GitStats.R | 179 ------------------ R/get_commits_stats.R | 68 +++++++ R/gitstats_functions.R | 156 ++++++--------- R/global.R | 2 +- R/utils.R | 10 - inst/get_commits_workflow.R | 17 ++ man/get_R_package_usage.Rd | 8 +- man/get_commits.Rd | 6 +- man/get_commits_stats.Rd | 17 +- man/get_files_content.Rd | 6 +- man/get_files_structure.Rd | 6 +- man/get_release_logs.Rd | 6 +- man/get_repos.Rd | 6 +- man/get_repos_urls.Rd | 6 +- man/get_storage.Rd | 6 +- man/get_users.Rd | 9 +- man/is_verbose.Rd | 4 +- man/set_github_host.Rd | 6 +- man/set_gitlab_host.Rd | 6 +- man/show_orgs.Rd | 4 +- man/verbose_off.Rd | 4 +- man/verbose_on.Rd | 4 +- renv.lock | 4 +- ...its-GitHub.md => 01-get_commits-GitHub.md} | 0 .../_snaps/01-get_commits-GitStats.md | 4 + tests/testthat/_snaps/get_commits-GitStats.md | 9 - tests/testthat/_snaps/get_commits_stats.md | 5 + tests/testthat/_snaps/set_host.md | 2 +- ...-GitHub.R => test-01-get_commits-GitHub.R} | 0 ...-GitLab.R => test-01-get_commits-GitLab.R} | 0 tests/testthat/test-01-get_commits-GitStats.R | 76 ++++++++ tests/testthat/test-get_commits-GitStats.R | 136 ------------- tests/testthat/test-get_commits_stats.R | 34 ++++ tests/testthat/test-get_storage.R | 2 +- tests/testthat/test-set_host.R | 4 +- 37 files changed, 323 insertions(+), 496 deletions(-) create mode 100644 R/get_commits_stats.R rename tests/testthat/_snaps/{get_commits-GitHub.md => 01-get_commits-GitHub.md} (100%) create mode 100644 tests/testthat/_snaps/01-get_commits-GitStats.md delete mode 100644 tests/testthat/_snaps/get_commits-GitStats.md create mode 100644 tests/testthat/_snaps/get_commits_stats.md rename tests/testthat/{test-get_commits-GitHub.R => test-01-get_commits-GitHub.R} (100%) rename tests/testthat/{test-get_commits-GitLab.R => test-01-get_commits-GitLab.R} (100%) create mode 100644 tests/testthat/test-01-get_commits-GitStats.R delete mode 100644 tests/testthat/test-get_commits-GitStats.R create mode 100644 tests/testthat/test-get_commits_stats.R diff --git a/DESCRIPTION b/DESCRIPTION index 07199bdb..cdfba9aa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9002 +Version: 2.1.2.9003 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 58c5b75e..64459297 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,11 @@ ## Features: -- Improved `get_commits_stats()` function ([#556](https://github.com/r-world-devs/GitStats/issues/556)) with: +- Improved `get_commits_stats()` function ([#556](https://github.com/r-world-devs/GitStats/issues/556), [#557](https://github.com/r-world-devs/GitStats/issues/557)) with: - giving possibility to customize grouping variable by passing it with the `group_var` parameter, - changing name of the `time_interval` parameter to `time_aggregation`, - - adding `yearly` aggregation to `time_aggregation` parameter. + - adding `yearly` aggregation to `time_aggregation` parameter, + - changing basic input from `GitStats` to `commits_data` object which allows to build workflow in one pipeline (`create_gitstats() |> set_*_host() |> get_commits() |> get_commits_stats()`). ## Fixes: diff --git a/R/GitStats.R b/R/GitStats.R index c8a25461..a58518b8 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -4,19 +4,6 @@ GitStats <- R6::R6Class( classname = "GitStats", public = list( - #' @description Method to set connections to Git platforms. - #' @param host A character, optional, URL name of the host. If not passed, a - #' public host will be used (api.github.com). - #' @param token A token. - #' @param orgs An optional character vector of organisations (owners of - #' repositories in case of GitHub). If you pass it, `repos` parameter - #' should stay `NULL`. - #' @param repos An optional character vector of repositories full names - #' (organization and repository name, e.g. "r-world-devs/GitStats"). If - #' you pass it, `orgs` parameter should stay `NULL`. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing - #' output is switched off. - #' @return Nothing, puts connection information into `$hosts` slot. set_github_host = function(host, token = NULL, orgs = NULL, @@ -33,19 +20,6 @@ GitStats <- R6::R6Class( private$add_new_host(new_host) }, - #' @description Method to set connections to Git platforms. - #' @param host A character, optional, URL name of the host. If not passed, a - #' public host will be used (gitlab.com/api/v4). - #' @param token A token. - #' @param orgs An optional character vector of organisations (groups of - #' projects in case of GitLab). If you pass it, `repos` parameter should - #' stay `NULL`. - #' @param repos An optional character vector of repositories full names - #' (organization and repository name, e.g. "r-world-devs/GitStats"). If - #' you pass it, `orgs` parameter should stay `NULL`. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing - #' output is switched off. - #' @return Nothing, puts connection information into `$hosts` slot. set_gitlab_host = function(host, token = NULL, orgs = NULL, @@ -62,23 +36,6 @@ GitStats <- R6::R6Class( private$add_new_host(new_host) }, - #' @description A method to list all repositories for an organization or by - #' a keyword. - #' @param add_contributors A boolean to decide whether to add contributors - #' information to repositories. - #' @param with_code A character vector, if defined, GitStats will pull - #' repositories with specified code phrases in code blobs. - #' @param in_files A character vector of file names. Works when `with_code` is - #' set - then it searches code blobs only in files passed to `in_files` - #' parameter. - #' @param with_files A character vector, if defined, GitStats will pull - #' repositories with specified files. - #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last - #' result from its storage. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and - #' printing output is switched off. - #' @param progress A logical, by default set to `verbose` value. If `FALSE` no - #' `cli` progress bar will be displayed. get_repos = function(add_contributors = FALSE, with_code = NULL, in_files = NULL, @@ -126,24 +83,6 @@ GitStats <- R6::R6Class( return(repositories) }, - #' @description A wrapper over search API endpoints to list repositories - #' URLS. - #' @param type A character, choose if `api` or `web` (`html`) URLs should be - #' returned. - #' @param with_code A character vector, if defined, GitStats will pull - #' repositories with specified code phrases in code blobs. - #' @param in_files A character vector of file names. Works when `with_code` - #' is set - then it searches code blobs only in files passed to `in_files` - #' parameter. - #' @param with_files A character vector, if defined, GitStats will pull - #' repositories with specified files. - #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last - #' result from its storage. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and - #' printing output is switched off. - #' @param progress A logical, by default set to `verbose` value. If `FALSE` - #' no `cli` progress bar will be displayed. - #' @return A character vector. get_repos_urls = function(type = "web", with_code = NULL, in_files = NULL, @@ -192,15 +131,6 @@ GitStats <- R6::R6Class( return(repos_urls) }, - #' @description A method to get information on commits. - #' @param since A starting date for commits. - #' @param until An end date for commits. - #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last - #' result from its storage. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and - #' printing output is switched off. - #' @param progress A logical, by default set to `verbose` value. If `FALSE` - #' no `cli` progress bar will be displayed. get_commits = function(since, until, cache = TRUE, @@ -237,30 +167,6 @@ GitStats <- R6::R6Class( return(commits) }, - get_commits_stats = function(time_aggregation, - group_var) { - commits <- private$storage[["commits"]] - if (is.null(commits)) { - cli::cli_abort(c( - "x" = "No commits found in GitStats storage.", - "i" = "Run first `get_commits()`." - ), - call = NULL) - } - commits_stats <- private$prepare_commits_stats( - commits = commits, - time_aggregation = time_aggregation, - group_var = !!group_var - ) - return(commits_stats) - }, - - #' @description Get information on users. - #' @param logins Character vector of logins. - #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last - #' result from its storage. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and - #' printing output is switched off. get_users = function(logins, cache = TRUE, verbose = TRUE) { private$check_for_host() args_list <- list("logins" = logins) @@ -286,26 +192,6 @@ GitStats <- R6::R6Class( return(users) }, - #' @description Pull text content of a file from all repositories. - #' @param file_path Optional. A standardized path to file(s) in - #' repositories. May be a character vector if multiple files are to be - #' pulled. If set to `NULL` and `use_files_structure` parameter is set to - #' `TRUE`, `GitStats` will try to pull data from `files_structure` (see - #' below). - #' @param use_files_structure Logical. If `TRUE` and `file_path` is set to - #' `NULL`, will iterate over `files_structure` pulled by - #' `get_files_structure()` function and kept in storage. If there is no - #' `files_structure` in storage, an error will be returned. If `file_path` - #' is defined, it will override `use_files_structure` parameter. - #' @param only_text_files A logical, `TRUE` by default. If set to `FALSE`, - #' apart from files with text content shows in table output also non-text - #' files with `NA` value for text content. - #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last - #' result from its storage. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and - #' printing output is switched off. - #' @param progress A logical, by default set to `verbose` value. If `FALSE` - #' no `cli` progress bar will be displayed. get_files_content = function(file_path = NULL, use_files_structure = TRUE, only_text_files = TRUE, @@ -344,21 +230,6 @@ GitStats <- R6::R6Class( return(files) }, - #' @name get_files_structure - #' @description Pulls file structure for a given repository. - #' @param gitstats_object A GitStats object. - #' @param pattern An optional regular expression. If defined, it pulls file - #' structure for a repository matching this pattern. - #' @param depth An optional integer. Defines level of directories to retrieve - #' files from. E.g. if set to `0`, it will pull files only from root, if `1`, - #' will take data from `root` directory and directories visible in `root` - #' directory. If left with no argument, will pull files from all directories. - #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last - #' result from its storage. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing - #' output is switched off. - #' @param progress A logical, by default set to `verbose` value. If `FALSE` - #' no `cli` progress bar will be displayed. get_files_structure = function(pattern, depth, cache = TRUE, @@ -397,15 +268,6 @@ GitStats <- R6::R6Class( return(files_structure) }, - #' @description Get release logs of repositories. - #' @param since A starting date for release logs. - #' @param until An end date for release logs. - #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last - #' result from its storage. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and - #' printing output is switched off. - #' @param progress A logical, by default set to `verbose` value. If `FALSE` - #' no `cli` progress bar will be displayed. get_release_logs = function(since, until = Sys.Date(), cache = TRUE, @@ -440,18 +302,6 @@ GitStats <- R6::R6Class( return(release_logs) }, - #' @description Wrapper over pulling repositories by code. - #' @param packages A character vector, names of R packages to look for. - #' @param only_loading A boolean, if `TRUE` function will check only if - #' package is loaded in repositories, not used as dependencies. - #' @param split_output Optional, a boolean. If `TRUE` will return a list of - #' tables, where every element of the list stands for the package passed to - #' `packages` parameter. If `FALSE`, will return only one table with name of - #' the package stored in first column. - #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last - #' result from its storage. - #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and - #' printing output is switched off. get_R_package_usage = function(packages, only_loading = FALSE, split_output = FALSE, @@ -496,7 +346,6 @@ GitStats <- R6::R6Class( return(R_package_usage) }, - #' @description Return organizations vector from GitStats. show_orgs = function() { purrr::map(private$hosts, function(host) { orgs <- host$.__enclos_env__$private$orgs @@ -505,12 +354,10 @@ GitStats <- R6::R6Class( unlist() }, - #' @description switch on verbose mode verbose_on = function() { private$settings$verbose <- TRUE }, - #' @description switch off verbose mode verbose_off = function() { private$settings$verbose <- FALSE }, @@ -527,7 +374,6 @@ GitStats <- R6::R6Class( } }, - #' @description A print method for a GitStats object. print = function() { cat(paste0("A ", cli::col_blue('GitStats'), " object for ", length(private$hosts), " hosts: \n")) private$print_hosts() @@ -1108,31 +954,6 @@ GitStats <- R6::R6Class( return(repos_table) }, - # Prepare stats out of commits table - prepare_commits_stats = function(commits, time_aggregation, group_var) { - commits <- commits |> - dplyr::mutate( - stats_date = lubridate::floor_date( - committed_date, - unit = time_aggregation - ), - githost = retrieve_platform(api_url) - ) - commits_stats <- commits |> - dplyr::group_by(stats_date, githost, {{ group_var }}) |> - dplyr::summarise( - stats = dplyr::n() - ) |> - dplyr::arrange( - stats_date - ) - commits_stats <- commits_stats( - object = commits_stats, - time_aggregation = time_aggregation - ) - return(commits_stats) - }, - # @description Check whether the urls do not repeat in input. # @param host An object of GitPlatform class. # @return A GitPlatform object. diff --git a/R/get_commits_stats.R b/R/get_commits_stats.R new file mode 100644 index 00000000..c73a0d08 --- /dev/null +++ b/R/get_commits_stats.R @@ -0,0 +1,68 @@ +#' @title Get commits statistics +#' @name get_commits_stats +#' @description Prepare statistics from the pulled commits data. +#' @details To make function work, you need first to get commits data with +#' `GitStats`. See examples section. +#' @param commits A `commits_data` table (output of `get_commits()`). +#' @param time_aggregation A character, specifying time aggregation of +#' statistics. +#' @param group_var Other grouping variable to be passed to `dplyr::group_by()` +#' function apart from `stats_date` and `githost`. Could be: `author`, +#' `author_login`, `author_name` or `organization`. Should be passed without +#' quotation marks. +#' @return A table of `commits_stats` class. +#' @examples +#' \dontrun{ +#' my_gitstats <- create_gitstats() %>% +#' set_github_host( +#' token = Sys.getenv("GITHUB_PAT"), +#' repos = c("r-world-devs/GitStats", "openpharma/visR") +#' ) |> +#' get_commits(my_gitstats, since = "2022-01-01") |> +#' get_commits_stats( +#' time_aggregation = "year", +#' group_var = author +#' ) +#' } +#' @export +get_commits_stats <- function(commits, + time_aggregation = c("year", "month", "week", "day"), + group_var) { + if (!inherits(commits, "commits_data")) { + cli::cli_abort(c( + "x" = "`commits` must be a `commits_data` object.", + "i" = "Pull first your commits with `get_commits()` function." + )) + } + commits <- commits |> + dplyr::mutate( + stats_date = lubridate::floor_date( + committed_date, + unit = time_aggregation + ), + githost = retrieve_platform(api_url) + ) + commits_stats <- commits |> + dplyr::group_by(stats_date, githost, {{ group_var }}) |> + dplyr::summarise( + stats = dplyr::n() + ) |> + dplyr::arrange( + stats_date + ) + commits_stats <- set_commits_stats_class( + object = commits_stats, + time_aggregation = time_aggregation + ) + return(commits_stats) +} + +#' @noRd +#' @description A constructor for `commits_stats` class. +set_commits_stats_class <- function(object, time_aggregation) { + stopifnot(inherits(object, "grouped_df")) + object <- dplyr::ungroup(object) + class(object) <- append(class(object), "commits_stats") + attr(object, "time_aggregation") <- time_aggregation + object +} diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index 7b1f0b0b..fb95e945 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -10,7 +10,7 @@ create_gitstats <- function() { #' @title Set GitHub host #' @name set_github_host -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param host A character, optional, URL name of the host. If not passed, a #' public host will be used. #' @param token A token. @@ -35,13 +35,13 @@ create_gitstats <- function() { #' ) #' } #' @export -set_github_host <- function(gitstats_object, +set_github_host <- function(gitstats, host = NULL, token = NULL, orgs = NULL, repos = NULL, - verbose = is_verbose(gitstats_object)) { - gitstats_object$set_github_host( + verbose = is_verbose(gitstats)) { + gitstats$set_github_host( host = host, token = token, orgs = orgs, @@ -49,7 +49,7 @@ set_github_host <- function(gitstats_object, verbose = verbose ) - return(invisible(gitstats_object)) + return(invisible(gitstats)) } #' @title Set GitLab host @@ -70,13 +70,13 @@ set_github_host <- function(gitstats_object, #' ) #' } #' @export -set_gitlab_host <- function(gitstats_object, +set_gitlab_host <- function(gitstats, host = NULL, token = NULL, orgs = NULL, repos = NULL, - verbose = is_verbose(gitstats_object)) { - gitstats_object$set_gitlab_host( + verbose = is_verbose(gitstats)) { + gitstats$set_gitlab_host( host = host, token = token, orgs = orgs, @@ -84,7 +84,7 @@ set_gitlab_host <- function(gitstats_object, verbose = verbose ) - return(invisible(gitstats_object)) + return(invisible(gitstats)) } #' @title Get data on repositories @@ -92,7 +92,7 @@ set_gitlab_host <- function(gitstats_object, #' @description Pulls data on all repositories for an organization, individual #' user or those with a given text in code blobs (`with_code` parameter) or a #' file (`with_files` parameter) and parse it into table format. -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param add_contributors A logical parameter to decide whether to add #' information about repositories' contributors to the repositories output #' (table). If set to `FALSE` it makes function run faster as, in the case of @@ -134,15 +134,15 @@ set_gitlab_host <- function(gitstats_object, #' get_repos(my_gitstats, with_files = "DESCRIPTION") #' } #' @export -get_repos <- function(gitstats_object, +get_repos <- function(gitstats, add_contributors = TRUE, with_code = NULL, in_files = NULL, with_files = NULL, cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose) { - gitstats_object$get_repos( + gitstats$get_repos( add_contributors = add_contributors, with_code = with_code, in_files = in_files, @@ -158,7 +158,7 @@ get_repos <- function(gitstats_object, #' @description Pulls a vector of repositories URLs (web or API): either all for #' an organization or those with a given text in code blobs (`with_code` #' parameter) or a file (`with_files` parameter). -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param type A character, choose if `api` or `web` (`html`) URLs should be #' returned. #' @param with_code A character vector, if defined, GitStats will pull @@ -190,15 +190,15 @@ get_repos <- function(gitstats_object, #' get_repos_urls(my_gitstats, with_files = c("DESCRIPTION", "LICENSE")) #' } #' @export -get_repos_urls <- function(gitstats_object, +get_repos_urls <- function(gitstats, type = "web", with_code = NULL, in_files = NULL, with_files = NULL, cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose) { - gitstats_object$get_repos_urls( + gitstats$get_repos_urls( type = type, with_code = with_code, in_files = in_files, @@ -213,7 +213,7 @@ get_repos_urls <- function(gitstats_object, #' @name get_commits #' @description List all commits from all repositories for an organization or a #' vector of repositories. -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param since A starting date. #' @param until An end date. #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last @@ -237,16 +237,16 @@ get_repos_urls <- function(gitstats_object, #' get_commits(my_gitstats, since = "2018-01-01") #' } #' @export -get_commits <- function(gitstats_object, +get_commits <- function(gitstats, since = NULL, until = Sys.Date() + lubridate::days(1), cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose) { if (is.null(since)) { cli::cli_abort(cli::col_red("You need to pass date to `since` parameter."), call = NULL) } - gitstats_object$get_commits( + gitstats$get_commits( since = since, until = until, cache = cache, @@ -255,47 +255,9 @@ get_commits <- function(gitstats_object, ) } -#' @title Get commits statistics -#' @name get_commits_stats -#' @description Prepare statistics from the pulled commits data. -#' @details To make function work, you need first to get commits data with -#' `GitStats`. See examples section. -#' @param gitstats_object A `GitStats` object. -#' @param time_aggregation A character, specifying time aggregation of -#' statistics. -#' @param group_var Other grouping variable to be passed to `dplyr::group_by()` -#' function apart from `stats_date` and `githost`. Could be: `author`, -#' `author_login`, `author_name` or `organization`. Should be passed without -#' quotation marks. -#' @return A table of `commits_stats` class. -#' @examples -#' \dontrun{ -#' my_gitstats <- create_gitstats() %>% -#' set_github_host( -#' token = Sys.getenv("GITHUB_PAT"), -#' repos = c("r-world-devs/GitStats", "openpharma/visR") -#' ) -#' get_commits(my_gitstats, since = "2022-01-01") -#' get_commits_stats( -#' gitstats_object = my_gitstats, -#' group_var = author, -#' time_interval = "year" -#' ) -#' } -#' @export -get_commits_stats <- function(gitstats_object, - time_aggregation = c("year", "month", "week", "day"), - group_var) { - time_aggregation <- match.arg(time_aggregation) - gitstats_object$get_commits_stats( - time_aggregation = time_aggregation, - group_var = rlang::enquo(group_var) - ) -} - #' @title Get users data #' @name get_users -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param logins A character vector of logins. #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last #' result from its storage. @@ -316,11 +278,11 @@ get_commits_stats <- function(gitstats_object, #' } #' @return A data.frame. #' @export -get_users <- function(gitstats_object, +get_users <- function(gitstats, logins, cache = TRUE, - verbose = is_verbose(gitstats_object)) { - gitstats_object$get_users( + verbose = is_verbose(gitstats)) { + gitstats$get_users( logins = logins, cache = cache, verbose = verbose @@ -331,7 +293,7 @@ get_users <- function(gitstats_object, #' @name get_files_content #' @description Pull text files content for a given scope (orgs, repos or whole #' git hosts). -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param file_path Optional. A standardized path to file(s) in repositories. #' May be a character vector if multiple files are to be pulled. If set to #' `NULL` and `use_files_structure` parameter is set to `TRUE`, `GitStats` @@ -379,14 +341,14 @@ get_users <- function(gitstats_object, #' } #' @return A data.frame. #' @export -get_files_content <- function(gitstats_object, +get_files_content <- function(gitstats, file_path = NULL, use_files_structure = TRUE, only_text_files = TRUE, cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose) { - gitstats_object$get_files_content( + gitstats$get_files_content( file_path = file_path, use_files_structure = use_files_structure, only_text_files = only_text_files, @@ -399,7 +361,7 @@ get_files_content <- function(gitstats_object, #' @title Get structure of files #' @name get_files_structure #' @description Pulls file structure for a given repository. -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param pattern An optional regular expression. If defined, it pulls file #' structure for a repository matching this pattern. #' @param depth An optional integer. Defines level of directories to retrieve @@ -430,13 +392,13 @@ get_files_content <- function(gitstats_object, #' } #' @return A list of vectors. #' @export -get_files_structure <- function(gitstats_object, +get_files_structure <- function(gitstats, pattern = NULL, depth = Inf, cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose) { - gitstats_object$get_files_structure( + gitstats$get_files_structure( pattern = pattern, depth = depth, cache = cache, @@ -451,7 +413,7 @@ get_files_structure <- function(gitstats_object, #' loading package (`library(package)` and `require(package)` in all files) or #' using it as a dependency (`package` in `DESCRIPTION` and `NAMESPACE` #' files). -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param packages A character vector, names of R packages to look for. #' @param only_loading A boolean, if `TRUE` function will check only if package #' is loaded in repositories, not used as dependencies. @@ -474,19 +436,19 @@ get_files_structure <- function(gitstats_object, #' ) #' #' get_R_package_usage( -#' gitstats_object = my_gitstats, +#' gitstats = my_gitstats, #' packages = c("purrr", "shiny"), #' split_output = TRUE #' ) #' } #' @export -get_R_package_usage <- function(gitstats_object, +get_R_package_usage <- function(gitstats, packages, only_loading = FALSE, split_output = FALSE, cache = TRUE, - verbose = is_verbose(gitstats_object)) { - gitstats_object$get_R_package_usage( + verbose = is_verbose(gitstats)) { + gitstats$get_R_package_usage( packages = packages, only_loading = only_loading, split_output = split_output, @@ -510,16 +472,16 @@ get_R_package_usage <- function(gitstats_object, #' get_release_logs(my_gistats, since = "2024-01-01") #' } #' @export -get_release_logs <- function(gitstats_object, +get_release_logs <- function(gitstats, since = NULL, until = Sys.Date() + lubridate::days(1), cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose) { if (is.null(since)) { cli::cli_abort(cli::col_red("You need to pass date to `since` parameter."), call = NULL) } - gitstats_object$get_release_logs( + gitstats$get_release_logs( since = since, until = until, cache = cache, @@ -533,47 +495,47 @@ get_release_logs <- function(gitstats_object, #' @description Retrieves organizations set or pulled by `GitStats`. Especially #' helpful when user is scanning whole git platform and wants to have a #' glimpse at organizations. -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @return A vector of organizations. #' @export -show_orgs <- function(gitstats_object) { - gitstats_object$show_orgs() +show_orgs <- function(gitstats) { + gitstats$show_orgs() } #' @title Switch on verbose mode #' @name verbose_on #' @description Print all messages and output. -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @return A GitStats object. #' @export -verbose_on <- function(gitstats_object) { - gitstats_object$verbose_on() - return(invisible(gitstats_object)) +verbose_on <- function(gitstats) { + gitstats$verbose_on() + return(invisible(gitstats)) } #' @title Switch off verbose mode #' @name verbose_off #' @description Stop printing messages and output. -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @return A GitStats object. #' @export -verbose_off <- function(gitstats_object) { - gitstats_object$verbose_off() - return(invisible(gitstats_object)) +verbose_off <- function(gitstats) { + gitstats$verbose_off() + return(invisible(gitstats)) } #' @title Is verbose mode switched on #' @name is_verbose -#' @param gitstats_object A GitStats object. -is_verbose <- function(gitstats_object) { - gitstats_object$is_verbose() +#' @param gitstats A GitStats object. +is_verbose <- function(gitstats) { + gitstats$is_verbose() } #' @title Get data from `GitStats` storage #' @name get_storage #' @description Retrieves whole or particular data (see `storage` parameter) #' pulled earlier with `GitStats`. -#' @param gitstats_object A GitStats object. +#' @param gitstats A GitStats object. #' @param storage A character, type of the data you want to get from storage: #' `commits`, `repositories`, `release_logs`, `users`, `files`, #' `files_structure`, `R_package_usage` or `release_logs`. @@ -590,14 +552,14 @@ is_verbose <- function(gitstats_object) { #' get_repos(my_gitstats) #' #' release_logs <- get_storage( -#' gitstats_object = my_gitstats, +#' gitstats = my_gitstats, #' storage = "release_logs" #' ) #' } #' @export -get_storage <- function(gitstats_object, +get_storage <- function(gitstats, storage = NULL) { - gitstats_object$get_storage( + gitstats$get_storage( storage = storage ) } diff --git a/R/global.R b/R/global.R index 23d1fe37..df3db824 100644 --- a/R/global.R +++ b/R/global.R @@ -4,7 +4,7 @@ globalVariables(c( "repo_name", "created_at", "last_activity_at", "last_activity", "stats_date", "committed_date", "commits_n", "api_url", "row_no", ".N", ".data", "repository", "stars", "forks", "languages", "issues_open", "issues_closed", - "contributors_n" + "contributors_n", "githost" )) non_text_files_pattern <- "\\.(png||.jpg||.jpeg||.bmp||.gif||.tiff)$" diff --git a/R/utils.R b/R/utils.R index ed0aafcf..8670874e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -40,16 +40,6 @@ retrieve_platform <- function(api_url) { ) } -#' @noRd -#' @description A constructor for `commits_stats` class. -commits_stats <- function(object, time_aggregation) { - stopifnot(inherits(object, "grouped_df")) - object <- dplyr::ungroup(object) - class(object) <- append(class(object), "commits_stats") - attr(object, "time_aggregation") <- time_aggregation - object -} - #' @noRd standardize_dates <- function(dates) { purrr::discard(dates, is.null) %>% diff --git a/inst/get_commits_workflow.R b/inst/get_commits_workflow.R index df7fd15c..70ab4111 100644 --- a/inst/get_commits_workflow.R +++ b/inst/get_commits_workflow.R @@ -28,3 +28,20 @@ get_commits( # Check printing in storage test_gitstats + +commits_stats <- create_gitstats() %>% + set_github_host( + orgs = c("r-world-devs", "openpharma"), + token = Sys.getenv("GITHUB_PAT") + ) %>% + set_gitlab_host( + orgs = c("mbtests"), + token = Sys.getenv("GITLAB_PAT_PUBLIC") + ) |> + get_commits( + since = "2024-01-01" + ) |> + get_commits_stats( + time_aggregation = "year", + group_var = author_name + ) diff --git a/man/get_R_package_usage.Rd b/man/get_R_package_usage.Rd index 9e56c0e5..36a77be5 100644 --- a/man/get_R_package_usage.Rd +++ b/man/get_R_package_usage.Rd @@ -5,16 +5,16 @@ \title{Get data on package usage across repositories} \usage{ get_R_package_usage( - gitstats_object, + gitstats, packages, only_loading = FALSE, split_output = FALSE, cache = TRUE, - verbose = is_verbose(gitstats_object) + verbose = is_verbose(gitstats) ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{packages}{A character vector, names of R packages to look for.} @@ -51,7 +51,7 @@ files). ) get_R_package_usage( - gitstats_object = my_gitstats, + gitstats = my_gitstats, packages = c("purrr", "shiny"), split_output = TRUE ) diff --git a/man/get_commits.Rd b/man/get_commits.Rd index 181817db..6358cb43 100644 --- a/man/get_commits.Rd +++ b/man/get_commits.Rd @@ -5,16 +5,16 @@ \title{Get data on commits} \usage{ get_commits( - gitstats_object, + gitstats, since = NULL, until = Sys.Date() + lubridate::days(1), cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{since}{A starting date.} diff --git a/man/get_commits_stats.Rd b/man/get_commits_stats.Rd index 1d634a59..b2a64d0d 100644 --- a/man/get_commits_stats.Rd +++ b/man/get_commits_stats.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gitstats_functions.R +% Please edit documentation in R/get_commits_stats.R \name{get_commits_stats} \alias{get_commits_stats} \title{Get commits statistics} \usage{ get_commits_stats( - gitstats_object, + commits, time_aggregation = c("year", "month", "week", "day"), group_var ) } \arguments{ -\item{gitstats_object}{A \code{GitStats} object.} +\item{commits}{A \code{commits_data} table (output of \code{get_commits()}).} \item{time_aggregation}{A character, specifying time aggregation of statistics.} @@ -37,12 +37,11 @@ To make function work, you need first to get commits data with set_github_host( token = Sys.getenv("GITHUB_PAT"), repos = c("r-world-devs/GitStats", "openpharma/visR") + ) |> + get_commits(my_gitstats, since = "2022-01-01") |> + get_commits_stats( + time_aggregation = "year", + group_var = author ) - get_commits(my_gitstats, since = "2022-01-01") - get_commits_stats( - gitstats_object = my_gitstats, - group_var = author, - time_interval = "year" - ) } } diff --git a/man/get_files_content.Rd b/man/get_files_content.Rd index 8a2b0292..1aa1a830 100644 --- a/man/get_files_content.Rd +++ b/man/get_files_content.Rd @@ -5,17 +5,17 @@ \title{Get content of files} \usage{ get_files_content( - gitstats_object, + gitstats, file_path = NULL, use_files_structure = TRUE, only_text_files = TRUE, cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{file_path}{Optional. A standardized path to file(s) in repositories. May be a character vector if multiple files are to be pulled. If set to diff --git a/man/get_files_structure.Rd b/man/get_files_structure.Rd index ec3fd1c7..f02be215 100644 --- a/man/get_files_structure.Rd +++ b/man/get_files_structure.Rd @@ -5,16 +5,16 @@ \title{Get structure of files} \usage{ get_files_structure( - gitstats_object, + gitstats, pattern = NULL, depth = Inf, cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{pattern}{An optional regular expression. If defined, it pulls file structure for a repository matching this pattern.} diff --git a/man/get_release_logs.Rd b/man/get_release_logs.Rd index 7fb365eb..56b7aba9 100644 --- a/man/get_release_logs.Rd +++ b/man/get_release_logs.Rd @@ -5,16 +5,16 @@ \title{Get release logs} \usage{ get_release_logs( - gitstats_object, + gitstats, since = NULL, until = Sys.Date() + lubridate::days(1), cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{since}{A starting date.} diff --git a/man/get_repos.Rd b/man/get_repos.Rd index 6bf5406b..97f36a6c 100644 --- a/man/get_repos.Rd +++ b/man/get_repos.Rd @@ -5,18 +5,18 @@ \title{Get data on repositories} \usage{ get_repos( - gitstats_object, + gitstats, add_contributors = TRUE, with_code = NULL, in_files = NULL, with_files = NULL, cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{add_contributors}{A logical parameter to decide whether to add information about repositories' contributors to the repositories output diff --git a/man/get_repos_urls.Rd b/man/get_repos_urls.Rd index 5dfeedbf..2b5b21bc 100644 --- a/man/get_repos_urls.Rd +++ b/man/get_repos_urls.Rd @@ -5,18 +5,18 @@ \title{Get repository URLS} \usage{ get_repos_urls( - gitstats_object, + gitstats, type = "web", with_code = NULL, in_files = NULL, with_files = NULL, cache = TRUE, - verbose = is_verbose(gitstats_object), + verbose = is_verbose(gitstats), progress = verbose ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{type}{A character, choose if \code{api} or \code{web} (\code{html}) URLs should be returned.} diff --git a/man/get_storage.Rd b/man/get_storage.Rd index e2077f6e..287a1918 100644 --- a/man/get_storage.Rd +++ b/man/get_storage.Rd @@ -4,10 +4,10 @@ \alias{get_storage} \title{Get data from \code{GitStats} storage} \usage{ -get_storage(gitstats_object, storage = NULL) +get_storage(gitstats, storage = NULL) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{storage}{A character, type of the data you want to get from storage: \code{commits}, \code{repositories}, \code{release_logs}, \code{users}, \code{files}, @@ -32,7 +32,7 @@ pulled earlier with \code{GitStats}. get_repos(my_gitstats) release_logs <- get_storage( - gitstats_object = my_gitstats, + gitstats = my_gitstats, storage = "release_logs" ) } diff --git a/man/get_users.Rd b/man/get_users.Rd index e3bbdf9b..00980f94 100644 --- a/man/get_users.Rd +++ b/man/get_users.Rd @@ -4,15 +4,10 @@ \alias{get_users} \title{Get users data} \usage{ -get_users( - gitstats_object, - logins, - cache = TRUE, - verbose = is_verbose(gitstats_object) -) +get_users(gitstats, logins, cache = TRUE, verbose = is_verbose(gitstats)) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{logins}{A character vector of logins.} diff --git a/man/is_verbose.Rd b/man/is_verbose.Rd index 2a4a211a..b5530bb6 100644 --- a/man/is_verbose.Rd +++ b/man/is_verbose.Rd @@ -4,10 +4,10 @@ \alias{is_verbose} \title{Is verbose mode switched on} \usage{ -is_verbose(gitstats_object) +is_verbose(gitstats) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} } \description{ Is verbose mode switched on diff --git a/man/set_github_host.Rd b/man/set_github_host.Rd index 899a1519..68ab860c 100644 --- a/man/set_github_host.Rd +++ b/man/set_github_host.Rd @@ -5,16 +5,16 @@ \title{Set GitHub host} \usage{ set_github_host( - gitstats_object, + gitstats, host = NULL, token = NULL, orgs = NULL, repos = NULL, - verbose = is_verbose(gitstats_object) + verbose = is_verbose(gitstats) ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{host}{A character, optional, URL name of the host. If not passed, a public host will be used.} diff --git a/man/set_gitlab_host.Rd b/man/set_gitlab_host.Rd index 3aa58ab9..065713a3 100644 --- a/man/set_gitlab_host.Rd +++ b/man/set_gitlab_host.Rd @@ -5,16 +5,16 @@ \title{Set GitLab host} \usage{ set_gitlab_host( - gitstats_object, + gitstats, host = NULL, token = NULL, orgs = NULL, repos = NULL, - verbose = is_verbose(gitstats_object) + verbose = is_verbose(gitstats) ) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} \item{host}{A character, optional, URL name of the host. If not passed, a public host will be used.} diff --git a/man/show_orgs.Rd b/man/show_orgs.Rd index fa4bdf38..dee0f6dc 100644 --- a/man/show_orgs.Rd +++ b/man/show_orgs.Rd @@ -4,10 +4,10 @@ \alias{show_orgs} \title{Show organizations set in \code{GitStats}} \usage{ -show_orgs(gitstats_object) +show_orgs(gitstats) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} } \value{ A vector of organizations. diff --git a/man/verbose_off.Rd b/man/verbose_off.Rd index 932b0e1c..4af2e835 100644 --- a/man/verbose_off.Rd +++ b/man/verbose_off.Rd @@ -4,10 +4,10 @@ \alias{verbose_off} \title{Switch off verbose mode} \usage{ -verbose_off(gitstats_object) +verbose_off(gitstats) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} } \value{ A GitStats object. diff --git a/man/verbose_on.Rd b/man/verbose_on.Rd index d3a4c846..a0f20ffd 100644 --- a/man/verbose_on.Rd +++ b/man/verbose_on.Rd @@ -4,10 +4,10 @@ \alias{verbose_on} \title{Switch on verbose mode} \usage{ -verbose_on(gitstats_object) +verbose_on(gitstats) } \arguments{ -\item{gitstats_object}{A GitStats object.} +\item{gitstats}{A GitStats object.} } \value{ A GitStats object. diff --git a/renv.lock b/renv.lock index dabdc725..e5fe7720 100644 --- a/renv.lock +++ b/renv.lock @@ -1349,7 +1349,7 @@ }, "withr": { "Package": "withr", - "Version": "3.0.0", + "Version": "3.0.2", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1357,7 +1357,7 @@ "grDevices", "graphics" ], - "Hash": "d31b6c62c10dcf11ec530ca6b0dd5d35" + "Hash": "cc2d62c76458d425210d1eb1478b30b4" }, "xfun": { "Package": "xfun", diff --git a/tests/testthat/_snaps/get_commits-GitHub.md b/tests/testthat/_snaps/01-get_commits-GitHub.md similarity index 100% rename from tests/testthat/_snaps/get_commits-GitHub.md rename to tests/testthat/_snaps/01-get_commits-GitHub.md diff --git a/tests/testthat/_snaps/01-get_commits-GitStats.md b/tests/testthat/_snaps/01-get_commits-GitStats.md new file mode 100644 index 00000000..300457c0 --- /dev/null +++ b/tests/testthat/_snaps/01-get_commits-GitStats.md @@ -0,0 +1,4 @@ +# get_commits() returns error when since is not defined + + You need to pass date to `since` parameter. + diff --git a/tests/testthat/_snaps/get_commits-GitStats.md b/tests/testthat/_snaps/get_commits-GitStats.md deleted file mode 100644 index 0963c7f0..00000000 --- a/tests/testthat/_snaps/get_commits-GitStats.md +++ /dev/null @@ -1,9 +0,0 @@ -# get_commits() returns error when since is not defined - - You need to pass date to `since` parameter. - -# get_commits_stats returns error when no commits - - x No commits found in GitStats storage. - i Run first `get_commits()`. - diff --git a/tests/testthat/_snaps/get_commits_stats.md b/tests/testthat/_snaps/get_commits_stats.md new file mode 100644 index 00000000..a457c06b --- /dev/null +++ b/tests/testthat/_snaps/get_commits_stats.md @@ -0,0 +1,5 @@ +# get_commits_stats returns error when commits is not commits_data object + + x `commits` must be a `commits_data` object. + i Pull first your commits with `get_commits()` function. + diff --git a/tests/testthat/_snaps/set_host.md b/tests/testthat/_snaps/set_host.md index 48d069fd..19211cfd 100644 --- a/tests/testthat/_snaps/set_host.md +++ b/tests/testthat/_snaps/set_host.md @@ -1,7 +1,7 @@ # Set connection returns appropriate messages Code - set_github_host(gitstats_obj = test_gitstats, token = Sys.getenv("GITHUB_PAT"), + set_github_host(gitstats = test_gitstats, token = Sys.getenv("GITHUB_PAT"), orgs = c("openpharma", "r-world-devs")) Message i Searching scope set to [org]. diff --git a/tests/testthat/test-get_commits-GitHub.R b/tests/testthat/test-01-get_commits-GitHub.R similarity index 100% rename from tests/testthat/test-get_commits-GitHub.R rename to tests/testthat/test-01-get_commits-GitHub.R diff --git a/tests/testthat/test-get_commits-GitLab.R b/tests/testthat/test-01-get_commits-GitLab.R similarity index 100% rename from tests/testthat/test-get_commits-GitLab.R rename to tests/testthat/test-01-get_commits-GitLab.R diff --git a/tests/testthat/test-01-get_commits-GitStats.R b/tests/testthat/test-01-get_commits-GitStats.R new file mode 100644 index 00000000..1a69732c --- /dev/null +++ b/tests/testthat/test-01-get_commits-GitStats.R @@ -0,0 +1,76 @@ +commits_table_from_hosts <- purrr::list_rbind( + list( + test_mocker$use("gh_commits_table"), + test_mocker$use("gl_commits_table") + ) +) + +test_that("set_object_class works for commits", { + commits_table <- test_gitstats_priv$set_object_class( + object = commits_table_from_hosts, + class = "commits_data", + attr_list = list( + "date_range" = c("2023-06-15", "2023-06-30") + ) + ) + expect_s3_class( + commits_table, + "commits_data" + ) + expect_equal( + attr(commits_table, "date_range"), + c("2023-06-15", "2023-06-30") + ) +}) + +test_that("get_commits works properly", { + mockery::stub( + test_gitstats$get_commits, + "private$get_commits_from_hosts", + commits_table_from_hosts + ) + suppressMessages( + commits_table <- test_gitstats$get_commits( + since = "2023-06-15", + until = "2023-06-30", + verbose = FALSE + ) + ) + expect_commits_table( + commits_table + ) + test_mocker$cache(commits_table) +}) + +test_that("get_commits() works", { + mockery::stub( + get_commits, + "gitstats_object$get_commits", + test_mocker$use("commits_table") + ) + commits_data <- get_commits( + test_gitstats, + since = "2023-06-15", + until = "2023-06-30", + verbose = FALSE + ) + expect_s3_class( + commits_data, + "commits_data" + ) + test_mocker$cache(commits_data) +}) + +test_that("get_commits() returns error when since is not defined", { + mockery::stub( + get_commits, + "gitstats_object$get_commits", + test_mocker$use("commits_table") + ) + expect_snapshot_error( + get_commits( + test_gitstats, + verbose = FALSE + ) + ) +}) diff --git a/tests/testthat/test-get_commits-GitStats.R b/tests/testthat/test-get_commits-GitStats.R deleted file mode 100644 index 003c6382..00000000 --- a/tests/testthat/test-get_commits-GitStats.R +++ /dev/null @@ -1,136 +0,0 @@ -commits_table_from_hosts <- purrr::list_rbind( - list( - test_mocker$use("gh_commits_table"), - test_mocker$use("gl_commits_table") - ) -) - -test_that("set_object_class works for commits", { - commits_table<- test_gitstats_priv$set_object_class( - object = commits_table_from_hosts, - class = "commits_data", - attr_list = list( - "date_range" = c("2023-06-15", "2023-06-30") - ) - ) - expect_s3_class( - commits_table, - "commits_data" - ) - expect_equal( - attr(commits_table, "date_range"), - c("2023-06-15", "2023-06-30") - ) -}) - -test_that("get_commits works properly", { - mockery::stub( - test_gitstats$get_commits, - "private$get_commits_from_hosts", - commits_table_from_hosts - ) - suppressMessages( - commits_table <- test_gitstats$get_commits( - since = "2023-06-15", - until = "2023-06-30", - verbose = FALSE - ) - ) - expect_commits_table( - commits_table - ) - test_mocker$cache(commits_table) -}) - -test_that("get_commits() works", { - mockery::stub( - get_commits, - "gitstats_object$get_commits", - test_mocker$use("commits_table") - ) - commits_table <- get_commits( - test_gitstats, - since = "2023-06-15", - until = "2023-06-30", - verbose = FALSE - ) - expect_s3_class( - commits_table, - "commits_data" - ) -}) - -test_that("get_commits() returns error when since is not defined", { - mockery::stub( - get_commits, - "gitstats_object$get_commits", - test_mocker$use("commits_table") - ) - expect_snapshot_error( - get_commits( - test_gitstats, - verbose = FALSE - ) - ) -}) - -test_that("prepare_commits_stats prepares commits statistics", { - commits_stats <- test_gitstats_priv$prepare_commits_stats( - commits = test_mocker$use("commits_table"), - time_aggregation = "week", - group_var = author - ) - expect_equal( - colnames(commits_stats), - c("stats_date", "githost", "author", "stats") - ) -}) - -test_gitstats <- create_test_gitstats( - hosts = 2, - inject_commits = "commits_table" -) - -test_that("get_commits_stats method works", { - commits_stats <- test_gitstats$get_commits_stats( - time_aggregation = "month", - group_var = "organization" - ) - expect_s3_class(commits_stats, "commits_stats") - expect_true( - all(c("gitlab", "github") %in% commits_stats$githost) - ) - test_mocker$cache(commits_stats) -}) - -test_that("get_commits_stats returns error when no commits", { - test_gitstats <- create_test_gitstats() - expect_snapshot_error( - get_commits_stats(test_gitstats) - ) -}) - -test_that("get_commits_stats prepares table with statistics on commits", { - commits_stats_daily <- get_commits_stats( - gitstats_obj = test_gitstats, - time_aggregation = "day", - group_var = organization - ) - expect_s3_class(commits_stats_daily, "commits_stats") - expect_equal( - colnames(commits_stats_daily), - c("stats_date", "githost", "organization", "stats") - ) - - commits_stats_yearly <- get_commits_stats( - gitstats_obj = test_gitstats, - time_aggregation = "year" - ) - expect_equal(commits_stats_yearly$stats_date, - as.POSIXct(c(rep("2023-01-01", 2), "2024-01-01"), tz = 'UTC')) - expect_s3_class(commits_stats_yearly, "commits_stats") - expect_equal( - colnames(commits_stats_yearly), - c("stats_date", "githost", "stats") - ) -}) diff --git a/tests/testthat/test-get_commits_stats.R b/tests/testthat/test-get_commits_stats.R new file mode 100644 index 00000000..46cf7f45 --- /dev/null +++ b/tests/testthat/test-get_commits_stats.R @@ -0,0 +1,34 @@ +test_that("get_commits_stats method works", { + commits_stats <- get_commits_stats( + commits = test_mocker$use("commits_data"), + time_aggregation = "month", + group_var = organization + ) + expect_s3_class(commits_stats, "commits_stats") + expect_equal( + colnames(commits_stats), + c("stats_date", "githost", "organization", "stats") + ) + expect_true( + all(c("gitlab", "github") %in% commits_stats$githost) + ) + commits_stats_yearly <- get_commits_stats( + commits = test_mocker$use("commits_data"), + time_aggregation = "year" + ) + expect_equal(commits_stats_yearly$stats_date, + as.POSIXct(c(rep("2023-01-01", 2), "2024-01-01"), tz = 'UTC')) + expect_s3_class(commits_stats_yearly, "commits_stats") + expect_equal( + colnames(commits_stats_yearly), + c("stats_date", "githost", "stats") + ) +}) + +test_that("get_commits_stats returns error when commits is not commits_data object", { + expect_snapshot_error( + get_commits_stats( + commits = test_mocker$use("gh_commits_table") + ) + ) +}) diff --git a/tests/testthat/test-get_storage.R b/tests/testthat/test-get_storage.R index e350a359..d6aa50b2 100644 --- a/tests/testthat/test-get_storage.R +++ b/tests/testthat/test-get_storage.R @@ -36,7 +36,7 @@ test_that("get_storage retrieves one table", { test_that("get_storage retrieves one table", { gitstats_storage <- get_storage( - gitstats_object = test_gitstats, + gitstats = test_gitstats, storage = "files_structure" ) expect_type( diff --git a/tests/testthat/test-set_host.R b/tests/testthat/test-set_host.R index cdeaf470..956d28c5 100644 --- a/tests/testthat/test-set_host.R +++ b/tests/testthat/test-set_host.R @@ -4,7 +4,7 @@ test_that("Set connection returns appropriate messages", { skip_on_cran() expect_snapshot( set_github_host( - gitstats_obj = test_gitstats, + gitstats = test_gitstats, token = Sys.getenv("GITHUB_PAT"), orgs = c("openpharma", "r-world-devs") ) @@ -108,7 +108,7 @@ test_that("Error shows, when wrong input is passed when setting connection and h test_gitstats <- create_gitstats() expect_snapshot_error( set_gitlab_host( - gitstats_object = test_gitstats, + gitstats = test_gitstats, host = "https://avengers.com", token = Sys.getenv("GITLAB_PAT_PUBLIC") ) From 14d97d54419314e5c138d86143a1943d9326f6fa Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 11 Dec 2024 11:27:55 +0000 Subject: [PATCH 24/99] Update example and vignettes. --- inst/get_commits_workflow.R | 4 +-- vignettes/get_and_store_data.Rmd | 50 +++++++++++++++++++++---------- vignettes/get_files.Rmd | 12 ++++---- vignettes/get_repos_with_code.Rmd | 6 ++-- vignettes/set_hosts.Rmd | 2 +- 5 files changed, 46 insertions(+), 28 deletions(-) diff --git a/inst/get_commits_workflow.R b/inst/get_commits_workflow.R index 70ab4111..8e66b709 100644 --- a/inst/get_commits_workflow.R +++ b/inst/get_commits_workflow.R @@ -11,7 +11,7 @@ git_stats <- create_gitstats() %>% ) release_logs <- get_release_logs( - gitstats_object = git_stats, + gitstats = git_stats, since = "2024-01-01", verbose = FALSE ) @@ -22,7 +22,7 @@ release_logs test_gitstats get_commits( - git_stats, + gitstats = git_stats, since = "2024-01-01" ) diff --git a/vignettes/get_and_store_data.Rmd b/vignettes/get_and_store_data.Rmd index 92593ff0..e38ca37e 100644 --- a/vignettes/get_and_store_data.Rmd +++ b/vignettes/get_and_store_data.Rmd @@ -54,7 +54,7 @@ If messages overwhelm you, you can switch them off in the function: ```{r} release_logs <- get_release_logs( - gitstats_object = git_stats, + gitstats = git_stats, since = "2024-01-01", verbose = FALSE ) @@ -73,10 +73,10 @@ After pulling, the data is saved to `GitStats`. ```{r} commits <- get_commits( - gitstats_object = git_stats, - since = "2024-06-01", - until = "2024-06-30", - progress = FALSE + gitstats = git_stats, + since = "2024-06-01", + until = "2024-06-30", + progress = FALSE ) dplyr::glimpse(commits) ``` @@ -85,9 +85,9 @@ Caching feature is by default turned on. If you run the `get_*()` function once ```{r} commits <- get_commits( - gitstats_object = git_stats, - since = "2024-06-01", - until = "2024-06-30" + gitstats = git_stats, + since = "2024-06-01", + until = "2024-06-30" ) dplyr::glimpse(commits) ``` @@ -96,10 +96,10 @@ Unless, you switch off the cache: ```{r} commits <- get_commits( - gitstats_object = git_stats, - since = "2024-06-01", - until = "2024-06-30", - cache = FALSE, + gitstats = git_stats, + since = "2024-06-01", + until = "2024-06-30", + cache = FALSE, progress = FALSE ) dplyr::glimpse(commits) @@ -109,9 +109,9 @@ Or simply change the parameters for the function: ```{r} commits <- get_commits( - gitstats_object = git_stats, - since = "2024-07-01", - progress = FALSE + gitstats = git_stats, + since = "2024-07-01", + progress = FALSE ) dplyr::glimpse(commits) ``` @@ -134,7 +134,25 @@ Or particular data set: ```{r} get_storage( - gitstats_object = git_stats, + gitstats = git_stats, storage = "repositories" ) ``` + +## Commits statistics + +Pull statistics in one pipe: + +```{r} +commits_stats <- get_commits( + gitstats = git_stats, + since = "2024-06-01", + until = "2024-06-30", + verbose = FALSE +) |> + get_commits_stats( + time_aggregation = "year", + group_var = author_name + ) +dplyr::glimpse(commits_stats) +``` diff --git a/vignettes/get_files.Rmd b/vignettes/get_files.Rmd index 30a1226e..60da1ccf 100644 --- a/vignettes/get_files.Rmd +++ b/vignettes/get_files.Rmd @@ -38,10 +38,10 @@ With `GitStats` you can get the content of all text files in repo that are of yo ```{r} files_structure <- get_files_structure( - gitstats_object = git_stats, - pattern = "\\.md", - depth = 1L, - progress = FALSE + gitstats = git_stats, + pattern = "\\.md", + depth = 1L, + progress = FALSE ) dplyr::glimpse(files_structure) ``` @@ -50,8 +50,8 @@ Once you pull the files structure, `GitStats` will store it. If you run then `ge ```{r} files_content <- get_files_content( - gitstats_object = git_stats, - progress = FALSE + gitstats = git_stats, + progress = FALSE ) dplyr::glimpse(files_content) ``` diff --git a/vignettes/get_repos_with_code.Rmd b/vignettes/get_repos_with_code.Rmd index 71c58a26..03f298e7 100644 --- a/vignettes/get_repos_with_code.Rmd +++ b/vignettes/get_repos_with_code.Rmd @@ -39,7 +39,7 @@ You can limit your search, as it is allowed with GitLab and GitHub API search en ```{r, eval = FALSE} repos_urls <- get_repos_urls( - gitstats_object = github_stats, + gitstats = github_stats, with_code = c("purrr", "shiny"), in_files = c("DESCRIPTION", "NAMESPACE", "renv.lock") ) @@ -49,7 +49,7 @@ You can also search for repositories with certain files (do not confuse `with_fi ```{r, eval = FALSE} repos_urls <- get_repos_urls( - gitstats_object = github_stats, + gitstats = github_stats, with_files = c("renv.lock", "DESCRIPTION") ) ``` @@ -60,7 +60,7 @@ repos_urls <- get_repos_urls( ```{r, eval = FALSE} package_usage <- get_R_package_usage( - gitstats_object = github_stats, + gitstats = github_stats, packages = c("shiny", "purrr"), split_output = TRUE ) diff --git a/vignettes/set_hosts.Rmd b/vignettes/set_hosts.Rmd index 74734a71..2e4d35c9 100644 --- a/vignettes/set_hosts.Rmd +++ b/vignettes/set_hosts.Rmd @@ -17,7 +17,7 @@ knitr::opts_chunk$set( ``` -To make `GitStats` work you need to set hosts after creating `gitstats_object`. +To make `GitStats` work you need to set hosts after creating `gitstats`. You can set GitLab host with `set_gitlab_host()` and GitHub host with `set_github_host()` or both. From cfa5b79801cba7283d101bc0138a8dec6c55c598 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 11 Dec 2024 11:36:37 +0000 Subject: [PATCH 25/99] Try meet code coverage job. --- tests/testthat/test-01-get_repos-GitStats.R | 24 +++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-01-get_repos-GitStats.R b/tests/testthat/test-01-get_repos-GitStats.R index 90bcdd6f..d960a8f3 100644 --- a/tests/testthat/test-01-get_repos-GitStats.R +++ b/tests/testthat/test-01-get_repos-GitStats.R @@ -67,16 +67,32 @@ test_that("get_repos works properly and for the second time uses cache", { "private$get_repos_from_hosts", test_mocker$use("repos_table") ) - repos_table <- test_gitstats$get_repos(verbose = FALSE) + repos_data <- test_gitstats$get_repos(verbose = FALSE) expect_repos_table_object( - repos_object = repos_table, + repos_object = repos_data, with_cols = c("contributors", "contributors_n") ) - repos_table <- test_gitstats$get_repos( + repos_data <- test_gitstats$get_repos( verbose = FALSE ) expect_repos_table_object( - repos_object = repos_table, + repos_object = repos_data, + with_cols = c("contributors", "contributors_n") + ) + test_mocker$cache(repos_data) +}) + +test_that("get_repos works", { + mockery::stub( + get_repos, + "gitstats$get_repos", + test_mocker$use("repos_data") + ) + repos_data <- get_repos( + gitstats = test_gitstats + ) + expect_repos_table_object( + repos_object = repos_data, with_cols = c("contributors", "contributors_n") ) }) From 0eab7aad905417561860f314e237e28fae00f378 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 12 Dec 2024 14:04:14 +0000 Subject: [PATCH 26/99] Make possible set searching scope to both orgs and repos and pull repositories in both scopes, adjust tests. --- DESCRIPTION | 2 +- R/GitHost.R | 117 ++++++++++-------- R/GitHostGitHub.R | 2 +- R/GitHostGitLab.R | 2 +- tests/testthat/_snaps/01-get_repos-GitHub.md | 7 -- tests/testthat/_snaps/helpers.md | 10 -- tests/testthat/_snaps/set_host.md | 21 +--- tests/testthat/helper-fixtures.R | 88 ++++++------- tests/testthat/test-01-get_repos-GitHub.R | 56 +++++++-- .../test-get_files_structure-GitHub.R | 10 +- .../test-get_files_structure-GitStats.R | 2 +- tests/testthat/test-get_release-GitHub.R | 17 ++- tests/testthat/test-helpers.R | 10 -- tests/testthat/test-set_host.R | 17 --- 14 files changed, 184 insertions(+), 177 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cdfba9aa..b6e5fb16 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9003 +Version: 2.1.2.9004 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/R/GitHost.R b/R/GitHost.R index 0fb6981c..ec600e58 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -367,9 +367,10 @@ GitHost <- R6::R6Class( if (is.null(repos) && is.null(orgs)) { if (private$is_public) { cli::cli_abort(c( - "You need to specify `orgs` for public Git Host.", + "You need to specify `orgs` or/and `repos` for public Git Host.", "x" = "Host will not be added.", - "i" = "Add organizations to your `orgs` parameter." + "i" = "Add organizations to your `orgs` and/or repositories to + `repos` parameter." ), call = NULL) } else { @@ -385,25 +386,11 @@ GitHost <- R6::R6Class( private$scan_all <- TRUE } } - if (!is.null(repos) && is.null(orgs)) { - if (verbose) { - cli::cli_alert_info(cli::col_grey("Searching scope set to [repo].")) - } - private$searching_scope <- "repo" - } - if (is.null(repos) && !is.null(orgs)) { - if (verbose) { - cli::cli_alert_info(cli::col_grey("Searching scope set to [org].")) - } - private$searching_scope <- "org" + if (!is.null(repos)) { + private$searching_scope <- c(private$searching_scope, "repo") } - if (!is.null(repos) && !is.null(orgs)) { - cli::cli_abort(c( - "Do not specify `orgs` while specifing `repos`.", - "x" = "Host will not be added.", - "i" = "Specify `orgs` or `repos`." - ), - call = NULL) + if (!is.null(orgs)) { + private$searching_scope <- c(private$searching_scope, "org") } }, @@ -422,8 +409,8 @@ GitHost <- R6::R6Class( verbose = verbose ) private$repos_fullnames <- repos - orgs_repos <- private$extract_repos_and_orgs(repos) - private$orgs <- private$set_owner_type( + orgs_repos <- private$extract_repos_and_orgs(private$repos_fullnames) + orgs <- private$set_owner_type( owners = names(orgs_repos) ) private$repos <- unname(unlist(orgs_repos)) @@ -589,7 +576,7 @@ GitHost <- R6::R6Class( # Set repositories set_repos = function(org) { - if (private$searching_scope == "repo") { + if ("repo" %in% private$searching_scope) { repos <- private$orgs_repos[[org]] } else { repos <- NULL @@ -605,7 +592,6 @@ GitHost <- R6::R6Class( ) }, - #' Retrieve all repositories for an organization in a table format. get_all_repos = function(verbose = TRUE, progress = TRUE) { if (private$scan_all && is.null(private$orgs)) { if (verbose) { @@ -617,34 +603,67 @@ GitHost <- R6::R6Class( } private$orgs <- private$engines$graphql$get_orgs() } - graphql_engine <- private$engines$graphql - repos_table <- purrr::map(private$orgs, function(org) { - type <- attr(org, "type") %||% "organization" - org <- utils::URLdecode(org) - if (!private$scan_all && verbose) { - show_message( - host = private$host_name, - engine = "graphql", - scope = org, - information = "Pulling repositories" - ) - } - repos <- private$set_repos(org) - repos_table <- graphql_engine$get_repos_from_org( - org = org, - type = type - ) %>% - graphql_engine$prepare_repos_table() - if (!is.null(repos)) { - repos_table <- repos_table %>% - dplyr::filter(repo_name %in% repos) - } - return(repos_table) - }, .progress = progress) %>% - purrr::list_rbind() + repos_table <- purrr::list_rbind( + list( + private$get_repos_from_orgs(verbose, progress), + private$get_repos_individual(verbose, progress) + ) + ) return(repos_table) }, + get_repos_from_orgs = function(verbose, progress) { + if ("org" %in% private$searching_scope) { + graphql_engine <- private$engines$graphql + purrr::map(private$orgs, function(org) { + type <- attr(org, "type") %||% "organization" + org <- utils::URLdecode(org) + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = "Pulling repositories" + ) + } + repos_table <- graphql_engine$get_repos_from_org( + org = org, + type = type + ) |> + graphql_engine$prepare_repos_table() + return(repos_table) + }, .progress = progress) |> + purrr::list_rbind() + } + }, + + get_repos_individual = function(verbose, progress) { + if ("repo" %in% private$searching_scope) { + graphql_engine <- private$engines$graphql + orgs <- names(private$orgs_repos) + purrr::map(orgs, function(org) { + type <- attr(org, "type") %||% "organization" + org <- utils::URLdecode(org) + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = "Pulling repositories" + ) + } + repos_table <- graphql_engine$get_repos_from_org( + org = org, + type = type + ) |> + graphql_engine$prepare_repos_table() |> + dplyr::filter(repo_name == private$orgs_repos[[org]]) + return(repos_table) + }, .progress = progress) |> + purrr::list_rbind() + } + }, + # Pull repositories with specific code get_repos_with_code = function(code, in_files = NULL, diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index 4b0089c3..ab38aa91 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -203,7 +203,7 @@ GitHostGitHub <- R6::R6Class( # Use repositories either from parameter or, if not set, pull them from API set_repositories = function(org) { - if (private$searching_scope == "repo") { + if ("repo" %in% private$searching_scope) { repos_names <- private$orgs_repos[[org]] } else { repos_table <- private$get_all_repos( diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index 61e40006..d8d129e3 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -266,7 +266,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", # Use repositories either from parameter or, if not set, pull them from API set_repositories = function(org, settings) { - if (private$searching_scope == "repo") { + if ("repo" %in% private$searching_scope) { repos <- private$orgs_repos[[org]] repos_names <- paste0(utils::URLencode(org, reserved = TRUE), "%2f", repos) } else { diff --git a/tests/testthat/_snaps/01-get_repos-GitHub.md b/tests/testthat/_snaps/01-get_repos-GitHub.md index e51ab31c..2edba7cf 100644 --- a/tests/testthat/_snaps/01-get_repos-GitHub.md +++ b/tests/testthat/_snaps/01-get_repos-GitHub.md @@ -5,10 +5,3 @@ Output [1] "\n query GetReposByOrg($login: String!) {\n repositoryOwner(login: $login) {\n ... on Organization {\n \n repositories(first: 100) {\n totalCount\n pageInfo {\n endCursor\n hasNextPage\n }\n nodes {\n repo_id: id\n repo_name: name\n default_branch: defaultBranchRef {\n name\n }\n stars: stargazerCount\n forks: forkCount\n created_at: createdAt\n last_activity_at: pushedAt\n languages (first: 5) { nodes {name} }\n issues_open: issues (first: 100 states: [OPEN]) {\n totalCount\n }\n issues_closed: issues (first: 100 states: [CLOSED]) {\n totalCount\n }\n organization: owner {\n login\n }\n repo_url: url\n }\n }\n \n }\n }\n }" -# `get_all_repos()` prints proper message - - Code - gh_repos_table <- github_testhost_priv$get_all_repos(verbose = TRUE) - Message - i [Host:GitHub][Engine:GraphQl][Scope:test-org] Pulling repositories... - diff --git a/tests/testthat/_snaps/helpers.md b/tests/testthat/_snaps/helpers.md index aec3df67..d1167c04 100644 --- a/tests/testthat/_snaps/helpers.md +++ b/tests/testthat/_snaps/helpers.md @@ -3,16 +3,12 @@ Code gitlab_testhost_priv$set_searching_scope(orgs = "mbtests", repos = NULL, verbose = TRUE) - Message - i Searching scope set to [org]. --- Code gitlab_testhost_priv$set_searching_scope(orgs = NULL, repos = "mbtests/GitStatsTesting", verbose = TRUE) - Message - i Searching scope set to [repo]. # When token is empty throw error @@ -50,9 +46,3 @@ Message i Using PAT from GITLAB_PAT envar. -# `set_searching_scope` throws error when both `orgs` and `repos` are defined - - Do not specify `orgs` while specifing `repos`. - x Host will not be added. - i Specify `orgs` or `repos`. - diff --git a/tests/testthat/_snaps/set_host.md b/tests/testthat/_snaps/set_host.md index 19211cfd..0cbab877 100644 --- a/tests/testthat/_snaps/set_host.md +++ b/tests/testthat/_snaps/set_host.md @@ -4,7 +4,6 @@ set_github_host(gitstats = test_gitstats, token = Sys.getenv("GITHUB_PAT"), orgs = c("openpharma", "r-world-devs")) Message - i Searching scope set to [org]. i Checking organizations... v Set connection to GitHub. @@ -14,7 +13,6 @@ test_gitstats %>% set_gitlab_host(token = Sys.getenv("GITLAB_PAT_PUBLIC"), orgs = c("mbtests")) Message - i Searching scope set to [org]. i Checking organizations... v Set connection to GitLab. @@ -25,7 +23,6 @@ "r-world-devs")) Message i Using PAT from GITHUB_PAT envar. - i Searching scope set to [org]. i Checking organizations... v Set connection to GitHub. @@ -37,7 +34,6 @@ }) Message i Using PAT from GITLAB_PAT envar. - i Searching scope set to [org]. i Checking organizations... v Set connection to GitLab. @@ -48,7 +44,6 @@ "r-world-devs/GitStats", "r-world-devs/shinyCohortBuilder", "openpharma/GithubMetrics", "openpharma/DataFakeR")) Message - i Searching scope set to [repo]. i Checking repositories... v Set connection to GitHub. @@ -58,21 +53,14 @@ test_gitstats %>% set_gitlab_host(token = Sys.getenv("GITLAB_PAT_PUBLIC"), repos = c("mbtests/gitstatstesting", "mbtests/gitstats-testing-2")) Message - i Searching scope set to [repo]. i Checking repositories... v Set connection to GitLab. -# Set host prints error when repos and orgs are defined and host is not passed to GitStats - - Do not specify `orgs` while specifing `repos`. - x Host will not be added. - i Specify `orgs` or `repos`. - # Error shows if organizations are not specified and host is not passed - You need to specify `orgs` for public Git Host. + You need to specify `orgs` or/and `repos` for public Git Host. x Host will not be added. - i Add organizations to your `orgs` parameter. + i Add organizations to your `orgs` and/or repositories to `repos` parameter. # Error shows, when wrong input is passed when setting connection and host is not passed @@ -86,10 +74,8 @@ test_gitstats %>% set_github_host(token = Sys.getenv("GITHUB_PAT"), orgs = "pharmaverse") %>% set_github_host(token = Sys.getenv("GITHUB_PAT"), orgs = "openpharma") Message - i Searching scope set to [org]. i Checking organizations... v Set connection to GitHub. - i Searching scope set to [org]. i Checking organizations... v Set connection to GitHub. Condition @@ -102,7 +88,6 @@ test_gitstats <- create_gitstats() %>% set_github_host(token = Sys.getenv( "GITHUB_PAT"), orgs = c("openparma")) Message - i Searching scope set to [org]. i Checking organizations... Condition Error in `purrr::map()`: @@ -118,7 +103,6 @@ test_gitstats <- create_gitstats() %>% set_gitlab_host(token = Sys.getenv( "GITLAB_PAT_PUBLIC"), orgs = c("openparma", "mbtests")) Message - i Searching scope set to [org]. i Checking organizations... Condition Error in `purrr::map()`: @@ -134,7 +118,6 @@ test_gitstats <- create_gitstats() %>% set_github_host(token = Sys.getenv( "GITHUB_PAT"), orgs = c("openpharma", "r_world_devs")) Message - i Searching scope set to [org]. i Checking organizations... Condition Error in `purrr::map()`: diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R index 873d30e2..e1773d7f 100644 --- a/tests/testthat/helper-fixtures.R +++ b/tests/testthat/helper-fixtures.R @@ -96,40 +96,42 @@ test_fixtures$gitlab_repositories_rest_response <- list( ) ) -github_repository_node <- list( - "repo_id" = "xyz", - "repo_name" = "TestRepo", - "default_branch" = list( - "name" = "main" - ), - "stars" = 10, - "forks" = 2, - "created_at" = "2022-04-20T00:00:00Z", - "last_activity_at" = "2023-04-20T00:00:00Z", - "languages" = list( - "nodes" = list( - list( - "name" = "R" - ), - list( - "name" = "CSS" - ), - list( - "name" = "JavaScript" +github_repository_node <- function(repo_name) { + list( + "repo_id" = "xyz", + "repo_name" = repo_name, + "default_branch" = list( + "name" = "main" + ), + "stars" = 10, + "forks" = 2, + "created_at" = "2022-04-20T00:00:00Z", + "last_activity_at" = "2023-04-20T00:00:00Z", + "languages" = list( + "nodes" = list( + list( + "name" = "R" + ), + list( + "name" = "CSS" + ), + list( + "name" = "JavaScript" + ) ) - ) - ), - "issues_open" = list( - "totalCount" = 10 - ), - "issues_closed" = list( - "totalCount" = 5 - ), - "organization" = list( - "login" = "test_org" - ), - "repo_url" = "https://test_url" -) + ), + "issues_open" = list( + "totalCount" = 10 + ), + "issues_closed" = list( + "totalCount" = 5 + ), + "organization" = list( + "login" = "test_org" + ), + "repo_url" = "https://test_url" + ) +} test_fixtures$github_repos_by_org_response <- list( "data" = list( @@ -141,11 +143,11 @@ test_fixtures$github_repos_by_org_response <- list( "hasNextPage" = FALSE ), "nodes" = list( - github_repository_node, - github_repository_node, - github_repository_node, - github_repository_node, - github_repository_node + github_repository_node("TestRepo"), + github_repository_node("TestRepo1"), + github_repository_node("TestRepo2"), + github_repository_node("TestRepo3"), + github_repository_node("TestRepo4") ) ) ) @@ -162,11 +164,11 @@ test_fixtures$github_repos_by_user_response <- list( "hasNextPage" = FALSE ), "nodes" = list( - github_repository_node, - github_repository_node, - github_repository_node, - github_repository_node, - github_repository_node + github_repository_node("TestRepo"), + github_repository_node("TestRepo1"), + github_repository_node("TestRepo2"), + github_repository_node("TestRepo3"), + github_repository_node("TestRepo4") ) ) ) diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index 6fca8d8c..43c50f81 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -391,32 +391,64 @@ test_that("GitHub prepares repos table from repositories response", { test_mocker$cache(gh_repos_table) }) -test_that("`get_all_repos()` works as expected", { +test_that("get_repos_from_org works", { mockery::stub( - github_testhost_priv$get_all_repos, + github_testhost_priv$get_repos_from_orgs, "graphql_engine$prepare_repos_table", test_mocker$use("gh_repos_table") ) - gh_repos_table <- github_testhost_priv$get_all_repos( - verbose = FALSE + gh_repos_from_orgs <- github_testhost_priv$get_repos_from_orgs( + verbose = FALSE, + progress = FALSE ) expect_repos_table( - gh_repos_table + gh_repos_from_orgs ) - test_mocker$cache(gh_repos_table) + test_mocker$cache(gh_repos_from_orgs) }) -test_that("`get_all_repos()` prints proper message", { +test_that("get_repos_individual works", { mockery::stub( - github_testhost_priv$get_all_repos, + github_testhost_priv$get_repos_individual, "graphql_engine$prepare_repos_table", test_mocker$use("gh_repos_table") ) - expect_snapshot( - gh_repos_table <- github_testhost_priv$get_all_repos( - verbose = TRUE - ) + github_testhost_priv$searching_scope <- c("org", "repo") + github_testhost_priv$repos_fullnames <- c("test_org/TestRepo") + github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + test_org <- "test_org" + attr(test_org, "type") <- "organization" + github_testhost_priv$orgs <- test_org + + gh_repos_individual <- github_testhost_priv$get_repos_individual( + verbose = FALSE, + progress = FALSE + ) + expect_repos_table( + gh_repos_individual ) + test_mocker$cache(gh_repos_individual) +}) + +test_that("`get_all_repos()` works as expected", { + mockery::stub( + github_testhost_priv$get_all_repos, + "private$get_repos_from_orgs", + test_mocker$use("gh_repos_from_orgs") + ) + mockery::stub( + github_testhost_priv$get_all_repos, + "private$get_repos_individual", + test_mocker$use("gh_repos_individual") + ) + gh_repos_table <- github_testhost_priv$get_all_repos( + verbose = FALSE, + progress = FALSE + ) + expect_repos_table( + gh_repos_table + ) + test_mocker$cache(gh_repos_table) }) test_that("GitHost adds `repo_api_url` column to GitHub repos table", { diff --git a/tests/testthat/test-get_files_structure-GitHub.R b/tests/testthat/test-get_files_structure-GitHub.R index 0c63da20..3c632d55 100644 --- a/tests/testthat/test-get_files_structure-GitHub.R +++ b/tests/testthat/test-get_files_structure-GitHub.R @@ -127,12 +127,12 @@ test_that("GitHub GraphQL Engine pulls files structure from repositories", { ) gh_files_structure <- test_graphql_github$get_files_structure_from_org( org = "test_org", - repos = rep("TestRepo", 5) + repos = c("TestRepo", "TestRepo1", "TestRepo2", "TestRepo3", "TestRepo4") ) purrr::walk(gh_files_structure, ~ expect_true(length(.) > 0)) expect_equal( names(gh_files_structure), - rep("TestRepo", 5) + c("TestRepo", "TestRepo1", "TestRepo2", "TestRepo3", "TestRepo4") ) test_mocker$cache(gh_files_structure) }) @@ -174,7 +174,7 @@ test_that("get_files_structure_from_orgs", { ) expect_equal( names(gh_files_structure_from_orgs), - "test-org" + "test_org" ) test_mocker$cache(gh_files_structure_from_orgs) }) @@ -219,7 +219,7 @@ test_that("get_path_from_files_structure gets file path from files structure", { file_path <- test_graphql_github$get_path_from_files_structure( host_files_structure = test_mocker$use("gh_files_structure_from_orgs"), only_text_files = FALSE, - org = "test-org", + org = "test_org", repo = "TestRepo" ) expect_equal(typeof(file_path), "character") @@ -239,7 +239,7 @@ test_that("get_files_structure pulls files structure for repositories in orgs", ) expect_equal( names(gh_files_structure_from_orgs), - "test-org" + "test_org" ) purrr::walk(gh_files_structure_from_orgs[[1]], function(repo_files) { expect_true(any(grepl("\\.md|\\.Rmd", repo_files))) diff --git a/tests/testthat/test-get_files_structure-GitStats.R b/tests/testthat/test-get_files_structure-GitStats.R index 280f5873..50d6c46c 100644 --- a/tests/testthat/test-get_files_structure-GitStats.R +++ b/tests/testthat/test-get_files_structure-GitStats.R @@ -11,7 +11,7 @@ test_that("get_files_structure_from_hosts works as expected", { ) expect_equal(names(files_structure_from_hosts), c("github.com", "gitlab.com")) - expect_equal(names(files_structure_from_hosts[[1]]), c("test-org")) + expect_equal(names(files_structure_from_hosts[[1]]), c("test_org")) files_structure_from_hosts[[2]] <- test_mocker$use("gl_files_structure_from_orgs") test_mocker$cache(files_structure_from_hosts) }) diff --git a/tests/testthat/test-get_release-GitHub.R b/tests/testthat/test-get_release-GitHub.R index 60a0e56b..88fcb586 100644 --- a/tests/testthat/test-get_release-GitHub.R +++ b/tests/testthat/test-get_release-GitHub.R @@ -33,12 +33,27 @@ test_that("`prepare_releases_table()` prepares releases table", { test_mocker$cache(releases_table) }) -test_that("`set_repositories` works", { +test_that("`set_repositories` works when searching scope set to repo", { mockery::stub( github_testhost_priv$set_repositories, "private$get_all_repos", test_mocker$use("gh_repos_table") ) + github_testhost_priv$searching_scope <- "repo" + github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + repos_names <- github_testhost_priv$set_repositories(org = "test_org") + expect_type(repos_names, "character") + expect_gt(length(repos_names), 0) +}) + +test_that("`set_repositories` works for whole orgs", { + mockery::stub( + github_testhost_priv$set_repositories, + "private$get_all_repos", + test_mocker$use("gh_repos_table") + ) + github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + github_testhost_priv$searching_scope <- "org" repos_names <- github_testhost_priv$set_repositories() expect_type(repos_names, "character") expect_gt(length(repos_names), 0) diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index 09bb8488..d95052ac 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -192,13 +192,3 @@ test_that("`set_default_token` sets default token for GitLab", { 200 ) }) - -test_that("`set_searching_scope` throws error when both `orgs` and `repos` are defined", { - expect_snapshot_error( - gitlab_testhost_priv$set_searching_scope( - orgs = "mbtests", - repos = "mbtests/GitStatsTesting", - verbose = TRUE - ) - ) -}) diff --git a/tests/testthat/test-set_host.R b/tests/testthat/test-set_host.R index 956d28c5..c26f8308 100644 --- a/tests/testthat/test-set_host.R +++ b/tests/testthat/test-set_host.R @@ -71,23 +71,6 @@ test_that("Set GitLab host with particular repos vector instead of orgs", { ) }) -test_that("Set host prints error when repos and orgs are defined and host is not passed to GitStats", { - skip_on_cran() - test_gitstats <- create_gitstats() - expect_snapshot_error( - test_gitstats %>% - set_github_host( - token = Sys.getenv("GITHUB_PAT"), - orgs = c('r-world-devs', "openpharma"), - repos = c("r-world-devs/GitStats", "r-world-devs/shinyCohortBuilder", "openpharma/GithubMetrics", "openpharma/DataFakeR") - ) - ) - expect_length( - test_gitstats$.__enclos_env__$private$hosts, - 0 - ) -}) - test_that("Error shows if organizations are not specified and host is not passed", { skip_on_cran() test_gitstats <- create_gitstats() From 4b3c5a59465029edd5b050d91281619f0721dc53 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 12 Dec 2024 14:06:49 +0000 Subject: [PATCH 27/99] Fix/adjust workflows. --- inst/example_workflow.R | 77 +++++++++++++++++++++++++++++++++++++ inst/get_files_workflow.R | 8 ++-- inst/get_storage_workflow.R | 6 +-- 3 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 inst/example_workflow.R diff --git a/inst/example_workflow.R b/inst/example_workflow.R new file mode 100644 index 00000000..2b17f83f --- /dev/null +++ b/inst/example_workflow.R @@ -0,0 +1,77 @@ +git_stats <- create_gitstats() |> + set_github_host( + orgs = "r-world-devs", + repos = "openpharma/DataFakeR" + ) + +get_repos(git_stats) + +get_repos_urls(git_stats, + with_files = "project_metadata.yaml", + progress = FALSE) + +get_repos_urls(git_stats, + with_files = "project_metadata.yaml", + cache = FALSE, + verbose = FALSE, + progress = TRUE) + +get_repos_urls(git_stats, + with_code = "Shiny", + in_files = "DESCRIPTION", + cache = FALSE, + verbose = FALSE) + +get_repos(git_stats) + +get_repos(git_stats, + cache = FALSE, + verbose = FALSE, + progress = TRUE) + +get_repos(git_stats, + cache = FALSE, + verbose = FALSE) + +get_repos(git_stats, + with_code = "Shiny") + +get_repos(git_stats, + with_code = "Shiny", + cache = FALSE, + verbose = FALSE) + +get_repos(git_stats, + with_code = "Shiny", + cache = FALSE, + verbose = FALSE, + progress = TRUE) + +get_repos(git_stats, + with_code = "Shiny", + in_files = "DESCRIPTION", + cache = FALSE) + +get_repos(git_stats, + with_code = c("shiny", "purrr"), + in_files = c("DESCRIPTION", "NAMESPACE"), + verbose = FALSE) + +get_commits(git_stats, since = "2024-06-01") + +get_commits(git_stats, + since = "2024-06-02", + verbose = FALSE, + progress = TRUE) + +get_release_logs( + gitstats = git_stats, + since = "2024-06-02", + verbose = FALSE +) + +get_release_logs( + gitstats = git_stats, + sinces = "2024-06-01", + verbose = TRUE +) diff --git a/inst/get_files_workflow.R b/inst/get_files_workflow.R index 960e7427..8b7d2eff 100644 --- a/inst/get_files_workflow.R +++ b/inst/get_files_workflow.R @@ -9,12 +9,12 @@ test_gitstats <- create_gitstats() |> ) get_files_content( - gitstats_obj = test_gitstats, + gitstats = test_gitstats, file_path = c("LICENSE", "DESCRIPTION") ) md_files_structure <- get_files_structure( - gitstats_obj = test_gitstats, + gitstats = test_gitstats, pattern = "\\.md|.R", depth = 2L ) @@ -22,8 +22,8 @@ md_files_structure <- get_files_structure( get_files_content(test_gitstats) md_files_structure <- get_files_structure( - gitstats_obj = test_gitstats, - pattern = "\\.md|\\.qmd|\\.Rmd", + gitstats = test_gitstats, + pattern = "DESCRIPTION|\\.md|\\.qmd|\\.Rmd", depth = 2L, verbose = FALSE ) diff --git a/inst/get_storage_workflow.R b/inst/get_storage_workflow.R index 10bc2221..c8ebb4ce 100644 --- a/inst/get_storage_workflow.R +++ b/inst/get_storage_workflow.R @@ -7,17 +7,17 @@ git_stats <- create_gitstats() %>% ) release_logs <- get_release_logs( - gitstats_object = git_stats, + gitstats = git_stats, since = "2024-01-01" ) repos_urls <- get_repos_urls( - gitstats_object = git_stats, + gitstats = git_stats, with_code = "shiny" ) files_structure <- get_files_structure( - gitstats_object = git_stats, + gitstats = git_stats, pattern = "\\.md", depth = 1L ) From 138f2aa02b4025c0a52ef55c5e161e2cc11f0e55 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 12 Dec 2024 15:26:44 +0000 Subject: [PATCH 28/99] Make possible pulling commits with new approach to setting scope. --- R/GitHost.R | 13 ++++- R/GitHostGitHub.R | 115 ++++++++++++++++++++++++++-------------- R/GitHostGitLab.R | 130 ++++++++++++++++++++++++++++++---------------- 3 files changed, 172 insertions(+), 86 deletions(-) diff --git a/R/GitHost.R b/R/GitHost.R index ec600e58..d851b6c8 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -133,12 +133,23 @@ GitHost <- R6::R6Class( cli::cli_alert_info("[{private$host_name}][Engine:{cli::col_yellow('GraphQL')}] Pulling all organizations...") private$orgs <- private$engines$graphql$get_orgs() } - commits_table <- private$get_commits_from_orgs( + commits_from_orgs <- private$get_commits_from_orgs( since = since, until = until, verbose = verbose, progress = progress ) + commits_from_repos <- private$get_commits_from_repos( + since = since, + until = until, + verbose = verbose, + progress = progress + ) + commits_table <- list( + commits_from_orgs, + commits_from_repos + ) |> + purrr::list_rbind() return(commits_table) }, diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index ab38aa91..0b8abdbc 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -167,50 +167,85 @@ GitHostGitHub <- R6::R6Class( # Pull commits from GitHub get_commits_from_orgs = function(since, until, verbose, progress) { - graphql_engine <- private$engines$graphql - commits_table <- purrr::map(private$orgs, function(org) { - commits_table_org <- NULL - if (!private$scan_all && verbose) { - show_message( - host = private$host_name, - engine = "graphql", - scope = org, - information = "Pulling commits" - ) - } - repos_names <- private$set_repositories( - org = org - ) - commits_table_org <- graphql_engine$get_commits_from_repos( - org = org, - repos_names = repos_names, - since = since, - until = until, - progress = progress - ) %>% - graphql_engine$prepare_commits_table( - org = org - ) - return(commits_table_org) - }, .progress = if (private$scan_all && progress) { - "[GitHost:GitHub] Pulling commits..." - } else { - FALSE - }) %>% - purrr::list_rbind() - return(commits_table) + if ("org" %in% private$searching_scope) { + graphql_engine <- private$engines$graphql + commits_table <- purrr::map(private$orgs, function(org) { + commits_table_org <- NULL + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = "Pulling commits" + ) + } + commits_table_org <- graphql_engine$get_commits_from_repos( + org = org, + repos_names = private$get_repos_names(org), + since = since, + until = until, + progress = progress + ) %>% + graphql_engine$prepare_commits_table( + org = org + ) + return(commits_table_org) + }, .progress = if (private$scan_all && progress) { + "[GitHost:GitHub] Pulling commits..." + } else { + FALSE + }) %>% + purrr::list_rbind() + return(commits_table) + } }, - # Use repositories either from parameter or, if not set, pull them from API - set_repositories = function(org) { + # Pull commits from GitHub + get_commits_from_repos = function(since, until, verbose, progress) { if ("repo" %in% private$searching_scope) { - repos_names <- private$orgs_repos[[org]] - } else { - repos_table <- private$get_all_repos( - verbose = FALSE - ) - repos_names <- repos_table$repo_name + graphql_engine <- private$engines$graphql + orgs <- names(private$orgs_repos) + commits_table <- purrr::map(orgs, function(org) { + commits_table_org <- NULL + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = "Pulling commits" + ) + } + commits_table_org <- graphql_engine$get_commits_from_repos( + org = org, + repos_names = private$orgs_repos[[org]], + since = since, + until = until, + progress = progress + ) %>% + graphql_engine$prepare_commits_table( + org = org + ) + return(commits_table_org) + }, .progress = if (private$scan_all && progress) { + "[GitHost:GitHub] Pulling commits..." + } else { + FALSE + }) %>% + purrr::list_rbind() + return(commits_table) } + }, + + # Use repositories either from parameter or, if not set, pull them from API + get_repos_names = function(org) { + type <- attr(org, "type") %||% "organization" + org <- utils::URLdecode(org) + graphql_engine <- private$engines$graphql + repos_names <- graphql_engine$get_repos_from_org( + org = org, + type = type + ) |> + purrr::map_vec(~ .$repo_name) return(repos_names) }, diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index d8d129e3..05884359 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -223,61 +223,101 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", }) }, - # Pull commits from GitHub get_commits_from_orgs = function(since, until, verbose = TRUE, progress = verbose) { - rest_engine <- private$engines$rest - commits_table <- purrr::map(private$orgs, function(org) { - commits_table_org <- NULL - if (!private$scan_all && verbose) { - show_message( - host = private$host_name, - engine = "rest", - scope = utils::URLdecode(org), - information = "Pulling commits" + if ("org" %in% private$searching_scope) { + rest_engine <- private$engines$rest + commits_table <- purrr::map(private$orgs, function(org) { + commits_table_org <- NULL + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "rest", + scope = utils::URLdecode(org), + information = "Pulling commits" + ) + } + repos_names <- private$get_repos_names( + org = org ) - } - repos_names <- private$set_repositories( - org = org - ) - commits_table_org <- rest_engine$get_commits_from_repos( - repos_names = repos_names, - since = since, - until = until, - progress = progress - ) %>% - rest_engine$tailor_commits_info(org = org) %>% - rest_engine$prepare_commits_table() %>% - rest_engine$get_commits_authors_handles_and_names( - verbose = verbose, + commits_table_org <- rest_engine$get_commits_from_repos( + repos_names = repos_names, + since = since, + until = until, progress = progress - ) - return(commits_table_org) - }, .progress = if (private$scan_all && progress) { - "[GitHost:GitLab] Pulling commits..." - } else { - FALSE - }) %>% - purrr::list_rbind() - return(commits_table) + ) %>% + rest_engine$tailor_commits_info(org = org) %>% + rest_engine$prepare_commits_table() %>% + rest_engine$get_commits_authors_handles_and_names( + verbose = verbose, + progress = progress + ) + return(commits_table_org) + }, .progress = if (private$scan_all && progress) { + "[GitHost:GitLab] Pulling commits..." + } else { + FALSE + }) %>% + purrr::list_rbind() + return(commits_table) + } }, - # Use repositories either from parameter or, if not set, pull them from API - set_repositories = function(org, settings) { + get_commits_from_repos = function(since, + until, + verbose = TRUE, + progress = verbose) { if ("repo" %in% private$searching_scope) { - repos <- private$orgs_repos[[org]] - repos_names <- paste0(utils::URLencode(org, reserved = TRUE), "%2f", repos) - } else { - repos_table <- private$get_all_repos( - verbose = FALSE - ) - gitlab_web_url <- stringr::str_extract(private$api_url, "^.*?(?=api)") - repos <- stringr::str_remove(repos_table$repo_url, gitlab_web_url) - repos_names <- utils::URLencode(repos, reserved = TRUE) + rest_engine <- private$engines$rest + orgs <- names(private$orgs_repos) + commits_table <- purrr::map(orgs, function(org) { + commits_table_org <- NULL + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "rest", + scope = utils::URLdecode(org), + information = "Pulling commits" + ) + } + repos <- private$orgs_repos[[org]] + repos_names <- paste0(utils::URLencode(org, reserved = TRUE), "%2f", repos) + commits_table_org <- rest_engine$get_commits_from_repos( + repos_names = repos_names, + since = since, + until = until, + progress = progress + ) %>% + rest_engine$tailor_commits_info(org = org) %>% + rest_engine$prepare_commits_table() %>% + rest_engine$get_commits_authors_handles_and_names( + verbose = verbose, + progress = progress + ) + return(commits_table_org) + }, .progress = if (private$scan_all && progress) { + "[GitHost:GitLab] Pulling commits..." + } else { + FALSE + }) %>% + purrr::list_rbind() + return(commits_table) } - return(repos_names) + }, + + # Use repositories either from parameter or, if not set, pull them from API + get_repos_names = function(org) { + graphql_engine <- private$engines$graphql + type <- attr(org, "type") %||% "organization" + repos_names <- graphql_engine$get_repos_from_org( + org = utils::URLdecode(org), + type = type + ) |> + purrr::map_vec(~ .$node$repo_path) + org <- utils::URLencode(org, reserved = TRUE) + return(paste0(org, "%2f", repos_names)) }, are_non_text_files = function(file_path, host_files_structure) { From 39e90245cfc280ccde14596fe59163e13895d1ea Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 13 Dec 2024 10:11:28 +0000 Subject: [PATCH 29/99] Adjust releases to pull from both scopes, adjust tests. --- R/GitHost.R | 122 +++++++++++++++----- tests/testthat/_snaps/get_release-GitHub.md | 8 -- tests/testthat/test-01-get_commits-GitHub.R | 50 ++++++-- tests/testthat/test-get_release-GitHub.R | 84 ++++++++------ tests/testthat/test-get_release-GitLab.R | 65 ++++++++++- 5 files changed, 239 insertions(+), 90 deletions(-) diff --git a/R/GitHost.R b/R/GitHost.R index d851b6c8..5b3647e9 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -218,36 +218,22 @@ GitHost <- R6::R6Class( private$orgs <- private$engines$graphql$get_orgs() } until <- until %||% Sys.time() - release_logs_table <- purrr::map(private$orgs, function(org) { - org <- utils::URLdecode(org) - release_logs_table_org <- NULL - if (!private$scan_all && verbose) { - show_message( - host = private$host_name, - engine = "graphql", - scope = org, - information = "Pulling release logs" - ) - } - repos_names <- private$set_repositories( - org = org - ) - graphql_engine <- private$engines$graphql - if (length(repos_names) > 0) { - release_logs_table_org <- graphql_engine$get_release_logs_from_org( - org = org, - repos_names = repos_names - ) %>% - graphql_engine$prepare_releases_table(org, since, until) - } else { - releases_logs_table_org <- NULL - } - return(release_logs_table_org) - }, .progress = if (progress) { - glue::glue("[GitHost:{private$host_name}] Pulling release logs...") - } else { - FALSE - }) %>% + release_logs_from_orgs <- private$get_release_logs_from_orgs( + since = since, + until = until, + verbose = verbose, + progress= progress + ) + release_logs_from_repos <- private$get_release_logs_from_repos( + since = since, + until = until, + verbose = verbose, + progress= progress + ) + release_logs_table <- list( + release_logs_from_orgs, + release_logs_from_repos + ) |> purrr::list_rbind() return(release_logs_table) } @@ -1020,6 +1006,82 @@ GitHost <- R6::R6Class( private$prepare_files_table_from_rest() %>% private$add_repo_api_url() return(files_table) + }, + + get_release_logs_from_orgs = function(since, until, verbose, progress) { + if ("org" %in% private$searching_scope) { + release_logs_table <- purrr::map(private$orgs, function(org) { + org <- utils::URLdecode(org) + release_logs_table_org <- NULL + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = "Pulling release logs" + ) + } + repos_names <- private$get_repos_names( + org = org + ) + graphql_engine <- private$engines$graphql + if (length(repos_names) > 0) { + release_logs_table_org <- graphql_engine$get_release_logs_from_org( + org = org, + repos_names = repos_names + ) %>% + graphql_engine$prepare_releases_table( + org = org, + since = since, + until = until + ) + } else { + releases_logs_table_org <- NULL + } + return(release_logs_table_org) + }, .progress = if (progress) { + glue::glue("[GitHost:{private$host_name}] Pulling release logs...") + } else { + FALSE + }) %>% + purrr::list_rbind() + return(release_logs_table) + } + }, + + get_release_logs_from_repos = function(since, until, verbose, progress) { + if ("repo" %in% private$searching_scope) { + orgs <- names(private$orgs_repos) + release_logs_table <- purrr::map(orgs, function(org) { + org <- utils::URLdecode(org) + release_logs_table_org <- NULL + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = "Pulling release logs" + ) + } + graphql_engine <- private$engines$graphql + release_logs_table_org <- graphql_engine$get_release_logs_from_org( + org = org, + repos_names = private$orgs_repos[[org]] + ) %>% + graphql_engine$prepare_releases_table( + org = org, + since = since, + until = until + ) + return(release_logs_table_org) + }, .progress = if (progress) { + glue::glue("[GitHost:{private$host_name}] Pulling release logs...") + } else { + FALSE + }) %>% + purrr::list_rbind() + return(release_logs_table) + } } ) ) diff --git a/tests/testthat/_snaps/get_release-GitHub.md b/tests/testthat/_snaps/get_release-GitHub.md index a46f4267..116312b0 100644 --- a/tests/testthat/_snaps/get_release-GitHub.md +++ b/tests/testthat/_snaps/get_release-GitHub.md @@ -5,11 +5,3 @@ Output [1] "query GetReleasesFromRepo ($org: String!, $repo: String!) {\n repository(owner:$org, name:$repo){\n name\n url\n releases (last: 100) {\n nodes {\n name\n tagName\n publishedAt\n url\n description\n }\n }\n }\n }" -# `get_release_logs()` prints proper message when running - - Code - releases_table <- github_testhost$get_release_logs(since = "2023-05-01", until = "2023-09-30", - verbose = TRUE, progress = FALSE) - Message - i [Host:GitHub][Engine:GraphQl][Scope:test-org] Pulling release logs... - diff --git a/tests/testthat/test-01-get_commits-GitHub.R b/tests/testthat/test-01-get_commits-GitHub.R index 91140a8f..7765ae92 100644 --- a/tests/testthat/test-01-get_commits-GitHub.R +++ b/tests/testthat/test-01-get_commits-GitHub.R @@ -104,28 +104,53 @@ test_that("get_commits_from_orgs for GitHub works", { "graphql_engine$prepare_commits_table", test_mocker$use("gh_commits_table") ) - suppressMessages( - gh_commits_table <- github_testhost_repos_priv$get_commits_from_orgs( - since = "2023-03-01", - until = "2023-04-01", - verbose = FALSE, - progress = FALSE - ) + github_testhost_repos_priv$searching_scope <- "org" + gh_commits_from_orgs <- github_testhost_repos_priv$get_commits_from_orgs( + since = "2023-03-01", + until = "2023-04-01", + verbose = FALSE, + progress = FALSE ) expect_commits_table( - gh_commits_table + gh_commits_from_orgs ) - test_mocker$cache(gh_commits_table) + test_mocker$cache(gh_commits_from_orgs) +}) + + +test_that("get_commits_from_repos for GitHub works", { + mockery::stub( + github_testhost_repos_priv$get_commits_from_repos, + "graphql_engine$prepare_commits_table", + test_mocker$use("gh_commits_table") + ) + github_testhost_repos_priv$searching_scope <- "repo" + github_testhost_repos_priv$orgs_repos <- list("test_org" = "TestRepo") + gh_commits_from_repos <- github_testhost_repos_priv$get_commits_from_repos( + since = "2023-03-01", + until = "2023-04-01", + verbose = FALSE, + progress = FALSE + ) + expect_commits_table( + gh_commits_from_repos + ) + test_mocker$cache(gh_commits_from_repos) }) test_that("`get_commits()` retrieves commits in the table format", { mockery::stub( github_testhost$get_commits, "private$get_commits_from_orgs", - test_mocker$use("gh_commits_table") + test_mocker$use("gh_commits_from_orgs") + ) + mockery::stub( + github_testhost$get_commits, + "private$get_commits_from_repos", + test_mocker$use("gh_commits_from_repos") ) suppressMessages( - commits_table <- github_testhost$get_commits( + gh_commits_table <- github_testhost$get_commits( since = "2023-01-01", until = "2023-02-28", verbose = FALSE, @@ -133,8 +158,9 @@ test_that("`get_commits()` retrieves commits in the table format", { ) ) expect_commits_table( - commits_table + gh_commits_table ) + test_mocker$cache(gh_commits_table) }) test_that("get_commits for GitHub repositories works", { diff --git a/tests/testthat/test-get_release-GitHub.R b/tests/testthat/test-get_release-GitHub.R index 88fcb586..b23c8f56 100644 --- a/tests/testthat/test-get_release-GitHub.R +++ b/tests/testthat/test-get_release-GitHub.R @@ -33,73 +33,81 @@ test_that("`prepare_releases_table()` prepares releases table", { test_mocker$cache(releases_table) }) -test_that("`set_repositories` works when searching scope set to repo", { +test_that("`get_repos_names` works", { mockery::stub( - github_testhost_priv$set_repositories, - "private$get_all_repos", - test_mocker$use("gh_repos_table") - ) - github_testhost_priv$searching_scope <- "repo" - github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") - repos_names <- github_testhost_priv$set_repositories(org = "test_org") - expect_type(repos_names, "character") - expect_gt(length(repos_names), 0) -}) - -test_that("`set_repositories` works for whole orgs", { - mockery::stub( - github_testhost_priv$set_repositories, - "private$get_all_repos", - test_mocker$use("gh_repos_table") + github_testhost_priv$get_repos_names, + "graphql_engine$get_repos_from_org", + test_mocker$use("gh_repos_from_org") ) github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") github_testhost_priv$searching_scope <- "org" - repos_names <- github_testhost_priv$set_repositories() + repos_names <- github_testhost_priv$get_repos_names( + org = "test_org" + ) expect_type(repos_names, "character") expect_gt(length(repos_names), 0) test_mocker$cache(repos_names) }) -test_that("`get_release_logs()` pulls release logs in the table format", { +test_that("`get_release_logs_from_orgs()` works", { mockery::stub( - github_testhost$get_release_logs, + github_testhost_priv$get_release_logs_from_orgs, "graphql_engine$prepare_releases_table", test_mocker$use("releases_table") ) mockery::stub( - github_testhost$get_release_logs, - "private$set_repositories", + github_testhost_priv$get_release_logs_from_orgs, + "private$get_repos_names", test_mocker$use("repos_names") ) - releases_table <- github_testhost$get_release_logs( + github_testhost_priv$searching_scope <- "org" + releases_from_orgs <- github_testhost_priv$get_release_logs_from_orgs( since = "2023-05-01", until = "2023-09-30", verbose = FALSE, progress = FALSE ) - expect_releases_table(releases_table) - expect_gt(min(releases_table$published_at), as.POSIXct("2023-05-01")) - expect_lt(max(releases_table$published_at), as.POSIXct("2023-09-30")) - test_mocker$cache(releases_table) + expect_releases_table(releases_from_orgs) + test_mocker$cache(releases_from_orgs) }) -test_that("`get_release_logs()` prints proper message when running", { +test_that("`get_release_logs_from_repos()` works", { mockery::stub( - github_testhost$get_release_logs, + github_testhost_priv$get_release_logs_from_repos, "graphql_engine$prepare_releases_table", test_mocker$use("releases_table") ) + github_testhost_priv$searching_scope <- "repo" + github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + releases_from_repos <- github_testhost_priv$get_release_logs_from_repos( + since = "2023-05-01", + until = "2023-09-30", + verbose = FALSE, + progress = FALSE + ) + expect_releases_table(releases_from_repos) + test_mocker$cache(releases_from_repos) +}) + +test_that("`get_release_logs()` pulls release logs in the table format", { mockery::stub( github_testhost$get_release_logs, - "private$set_repositories", - test_mocker$use("repos_names") + "private$get_release_logs_from_repos", + test_mocker$use("releases_from_repos") ) - expect_snapshot( - releases_table <- github_testhost$get_release_logs( - since = "2023-05-01", - until = "2023-09-30", - verbose = TRUE, - progress = FALSE - ) + mockery::stub( + github_testhost$get_release_logs, + "private$get_release_logs_from_orgs", + test_mocker$use("releases_from_orgs") ) + releases_table <- github_testhost$get_release_logs( + since = "2023-05-01", + until = "2023-09-30", + verbose = FALSE, + progress = FALSE + ) + expect_releases_table(releases_table) + expect_gt(min(releases_table$published_at), as.POSIXct("2023-05-01")) + expect_lt(max(releases_table$published_at), as.POSIXct("2023-09-30")) + test_mocker$cache(releases_table) }) diff --git a/tests/testthat/test-get_release-GitLab.R b/tests/testthat/test-get_release-GitLab.R index fe79a434..97f64ed3 100644 --- a/tests/testthat/test-get_release-GitLab.R +++ b/tests/testthat/test-get_release-GitLab.R @@ -34,12 +34,73 @@ test_that("`prepare_releases_table()` prepares releases table", { test_mocker$cache(releases_table) }) -test_that("`get_release_logs()` pulls release logs in the table format", { +test_that("`get_repos_names` works", { mockery::stub( - gitlab_testhost$get_release_logs, + gitlab_testhost_priv$get_repos_names, + "graphql_engine$get_repos_from_org", + test_mocker$use("gl_repos_from_org") + ) + gitlab_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + gitlab_testhost_priv$searching_scope <- "org" + repos_names <- gitlab_testhost_priv$get_repos_names( + org = "test_org" + ) + expect_type(repos_names, "character") + expect_gt(length(repos_names), 0) + test_mocker$cache(repos_names) +}) + +test_that("`get_release_logs_from_orgs()` works", { + mockery::stub( + gitlab_testhost_priv$get_release_logs_from_orgs, + "graphql_engine$prepare_releases_table", + test_mocker$use("releases_table") + ) + mockery::stub( + gitlab_testhost_priv$get_release_logs_from_orgs, + "private$get_repos_names", + test_mocker$use("repos_names") + ) + gitlab_testhost_priv$searching_scope <- "org" + releases_from_orgs <- gitlab_testhost_priv$get_release_logs_from_orgs( + since = "2023-05-01", + until = "2023-09-30", + verbose = FALSE, + progress = FALSE + ) + expect_releases_table(releases_from_orgs) + test_mocker$cache(releases_from_orgs) +}) + +test_that("`get_release_logs_from_repos()` works", { + mockery::stub( + gitlab_testhost_priv$get_release_logs_from_repos, "graphql_engine$prepare_releases_table", test_mocker$use("releases_table") ) + gitlab_testhost_priv$searching_scope <- "repo" + gitlab_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + releases_from_repos <- gitlab_testhost_priv$get_release_logs_from_repos( + since = "2023-05-01", + until = "2023-09-30", + verbose = FALSE, + progress = FALSE + ) + expect_releases_table(releases_from_repos) + test_mocker$cache(releases_from_repos) +}) + +test_that("`get_release_logs()` pulls release logs in the table format", { + mockery::stub( + gitlab_testhost$get_release_logs, + "private$get_release_logs_from_repos", + test_mocker$use("releases_from_repos") + ) + mockery::stub( + gitlab_testhost$get_release_logs, + "private$get_release_logs_from_orgs", + test_mocker$use("releases_from_orgs") + ) releases_table <- gitlab_testhost$get_release_logs( since = "2023-08-01", until = "2024-06-30", From abc6d7ba1090d6c13d90ca9899117a3dd35f05aa Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 13 Dec 2024 13:46:08 +0000 Subject: [PATCH 30/99] Fixes in function parameters and in passing properly repos names to GraphQL requests, fixing and adjusting tests. In tests there was a need to change order of test calls: first should go repos, then commits, as get_repos_names was moved to tests for commits (from releases). --- R/EngineGraphQLGitHub.R | 12 +++++----- R/EngineGraphQLGitLab.R | 10 ++++---- R/GitHost.R | 10 ++++---- R/GitHostGitHub.R | 5 ++-- R/GitHostGitLab.R | 11 ++++----- ...its-GitHub.md => 02-get_commits-GitHub.md} | 0 ...GitStats.md => 02-get_commits-GitStats.md} | 0 ...-GitHub.R => test-02-get_commits-GitHub.R} | 21 ++++++++++++++++ ...-GitLab.R => test-02-get_commits-GitLab.R} | 0 ...Stats.R => test-02-get_commits-GitStats.R} | 0 tests/testthat/test-get_release-GitHub.R | 24 ++++--------------- tests/testthat/test-get_release-GitLab.R | 6 ++--- 12 files changed, 52 insertions(+), 47 deletions(-) rename tests/testthat/_snaps/{01-get_commits-GitHub.md => 02-get_commits-GitHub.md} (100%) rename tests/testthat/_snaps/{01-get_commits-GitStats.md => 02-get_commits-GitStats.md} (100%) rename tests/testthat/{test-01-get_commits-GitHub.R => test-02-get_commits-GitHub.R} (88%) rename tests/testthat/{test-01-get_commits-GitLab.R => test-02-get_commits-GitLab.R} (100%) rename tests/testthat/{test-01-get_commits-GitStats.R => test-02-get_commits-GitStats.R} (100%) diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index 5ce4ddae..b944ae05 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -297,7 +297,7 @@ EngineGraphQLGitHub <- R6::R6Class( }, # Prepare releases table. - prepare_releases_table = function(releases_response, org, date_from, date_until) { + prepare_releases_table = function(releases_response, org, since, until) { if (!is.null(releases_response)) { releases_table <- purrr::map(releases_response, function(release) { @@ -310,7 +310,7 @@ EngineGraphQLGitHub <- R6::R6Class( release_log = node$description ) }) %>% - purrr::list_rbind() %>% + purrr::list_rbind() |> dplyr::mutate( repo_name = release$data$repository$name, repo_url = release$data$repository$url @@ -321,14 +321,14 @@ EngineGraphQLGitHub <- R6::R6Class( ) return(release_table) }) %>% - purrr::list_rbind() %>% + purrr::list_rbind() |> dplyr::filter( - published_at <= as.POSIXct(date_until) + published_at <= as.POSIXct(until) ) - if (!is.null(date_from)) { + if (!is.null(since)) { releases_table <- releases_table %>% dplyr::filter( - published_at >= as.POSIXct(date_from) + published_at >= as.POSIXct(since) ) } } else { diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index 3f75ef19..13667721 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -368,7 +368,7 @@ EngineGraphQLGitLab <- R6::R6Class( response <- self$gql_response( gql_query = releases_from_repo_query, vars = list( - "project_path" = utils::URLdecode(repository) + "project_path" = paste0(org, "/", utils::URLdecode(repository)) ) ) return(response) @@ -378,7 +378,7 @@ EngineGraphQLGitLab <- R6::R6Class( }, # Prepare releases table. - prepare_releases_table = function(releases_response, org, date_from, date_until) { + prepare_releases_table = function(releases_response, org, since, until) { if (length(releases_response) > 0) { releases_table <- purrr::map(releases_response, function(release) { @@ -404,12 +404,12 @@ EngineGraphQLGitLab <- R6::R6Class( }) %>% purrr::list_rbind() %>% dplyr::filter( - published_at <= as.POSIXct(date_until) + published_at <= as.POSIXct(until) ) - if (!is.null(date_from)) { + if (!is.null(since)) { releases_table <- releases_table %>% dplyr::filter( - published_at >= as.POSIXct(date_from) + published_at >= as.POSIXct(since) ) } } else { diff --git a/R/GitHost.R b/R/GitHost.R index 5b3647e9..153ac788 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -1027,8 +1027,8 @@ GitHost <- R6::R6Class( graphql_engine <- private$engines$graphql if (length(repos_names) > 0) { release_logs_table_org <- graphql_engine$get_release_logs_from_org( - org = org, - repos_names = repos_names + repos_names = repos_names, + org = org ) %>% graphql_engine$prepare_releases_table( org = org, @@ -1059,14 +1059,14 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = org, + scope = paste0(org, "/", private$orgs_repos[[org]]), information = "Pulling release logs" ) } graphql_engine <- private$engines$graphql release_logs_table_org <- graphql_engine$get_release_logs_from_org( - org = org, - repos_names = private$orgs_repos[[org]] + repos_names = private$orgs_repos[[org]], + org = org ) %>% graphql_engine$prepare_releases_table( org = org, diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index 0b8abdbc..34998ccf 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -179,9 +179,10 @@ GitHostGitHub <- R6::R6Class( information = "Pulling commits" ) } + repos_names <- private$get_repos_names(org) commits_table_org <- graphql_engine$get_commits_from_repos( org = org, - repos_names = private$get_repos_names(org), + repos_names = repos_names, since = since, until = until, progress = progress @@ -211,7 +212,7 @@ GitHostGitHub <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = org, + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = "Pulling commits" ) } diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index 05884359..11c48118 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -243,7 +243,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", org = org ) commits_table_org <- rest_engine$get_commits_from_repos( - repos_names = repos_names, + repos_names = paste0(org, "%2f", repos_names), since = since, until = until, progress = progress @@ -274,16 +274,16 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", orgs <- names(private$orgs_repos) commits_table <- purrr::map(orgs, function(org) { commits_table_org <- NULL + repos <- private$orgs_repos[[org]] + repos_names <- paste0(utils::URLencode(org, reserved = TRUE), "%2f", repos) if (!private$scan_all && verbose) { show_message( host = private$host_name, engine = "rest", - scope = utils::URLdecode(org), + scope = utils::URLdecode(paste0(repos_names, collapse = "|")), information = "Pulling commits" ) } - repos <- private$orgs_repos[[org]] - repos_names <- paste0(utils::URLencode(org, reserved = TRUE), "%2f", repos) commits_table_org <- rest_engine$get_commits_from_repos( repos_names = repos_names, since = since, @@ -316,8 +316,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", type = type ) |> purrr::map_vec(~ .$node$repo_path) - org <- utils::URLencode(org, reserved = TRUE) - return(paste0(org, "%2f", repos_names)) + return(repos_names) }, are_non_text_files = function(file_path, host_files_structure) { diff --git a/tests/testthat/_snaps/01-get_commits-GitHub.md b/tests/testthat/_snaps/02-get_commits-GitHub.md similarity index 100% rename from tests/testthat/_snaps/01-get_commits-GitHub.md rename to tests/testthat/_snaps/02-get_commits-GitHub.md diff --git a/tests/testthat/_snaps/01-get_commits-GitStats.md b/tests/testthat/_snaps/02-get_commits-GitStats.md similarity index 100% rename from tests/testthat/_snaps/01-get_commits-GitStats.md rename to tests/testthat/_snaps/02-get_commits-GitStats.md diff --git a/tests/testthat/test-01-get_commits-GitHub.R b/tests/testthat/test-02-get_commits-GitHub.R similarity index 88% rename from tests/testthat/test-01-get_commits-GitHub.R rename to tests/testthat/test-02-get_commits-GitHub.R index 7765ae92..e0ba88e6 100644 --- a/tests/testthat/test-01-get_commits-GitHub.R +++ b/tests/testthat/test-02-get_commits-GitHub.R @@ -98,12 +98,33 @@ test_that("fill_empty_authors() works as expected", { ) }) +test_that("`get_repos_names` works", { + mockery::stub( + github_testhost_priv$get_repos_names, + "graphql_engine$get_repos_from_org", + test_mocker$use("gh_repos_from_org") + ) + github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + github_testhost_priv$searching_scope <- "org" + gh_repos_names <- github_testhost_priv$get_repos_names( + org = "test_org" + ) + expect_type(gh_repos_names, "character") + expect_gt(length(gh_repos_names), 0) + test_mocker$cache(gh_repos_names) +}) + test_that("get_commits_from_orgs for GitHub works", { mockery::stub( github_testhost_repos_priv$get_commits_from_orgs, "graphql_engine$prepare_commits_table", test_mocker$use("gh_commits_table") ) + mockery::stub( + github_testhost_repos_priv$get_commits_from_orgs, + "private$get_repos_names", + test_mocker$use("gh_repos_names") + ) github_testhost_repos_priv$searching_scope <- "org" gh_commits_from_orgs <- github_testhost_repos_priv$get_commits_from_orgs( since = "2023-03-01", diff --git a/tests/testthat/test-01-get_commits-GitLab.R b/tests/testthat/test-02-get_commits-GitLab.R similarity index 100% rename from tests/testthat/test-01-get_commits-GitLab.R rename to tests/testthat/test-02-get_commits-GitLab.R diff --git a/tests/testthat/test-01-get_commits-GitStats.R b/tests/testthat/test-02-get_commits-GitStats.R similarity index 100% rename from tests/testthat/test-01-get_commits-GitStats.R rename to tests/testthat/test-02-get_commits-GitStats.R diff --git a/tests/testthat/test-get_release-GitHub.R b/tests/testthat/test-get_release-GitHub.R index b23c8f56..3596902d 100644 --- a/tests/testthat/test-get_release-GitHub.R +++ b/tests/testthat/test-get_release-GitHub.R @@ -23,9 +23,9 @@ test_that("`get_releases_from_org()` pulls releases from the repositories", { test_that("`prepare_releases_table()` prepares releases table", { releases_table <- test_graphql_github$prepare_releases_table( releases_response = test_mocker$use("releases_from_repos"), - org = "r-world-devs", - date_from = "2023-05-01", - date_until = "2023-09-30" + org = "r-world-devs", + since = "2023-05-01", + until = "2023-09-30" ) expect_releases_table(releases_table) expect_gt(min(releases_table$published_at), as.POSIXct("2023-05-01")) @@ -33,22 +33,6 @@ test_that("`prepare_releases_table()` prepares releases table", { test_mocker$cache(releases_table) }) -test_that("`get_repos_names` works", { - mockery::stub( - github_testhost_priv$get_repos_names, - "graphql_engine$get_repos_from_org", - test_mocker$use("gh_repos_from_org") - ) - github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") - github_testhost_priv$searching_scope <- "org" - repos_names <- github_testhost_priv$get_repos_names( - org = "test_org" - ) - expect_type(repos_names, "character") - expect_gt(length(repos_names), 0) - test_mocker$cache(repos_names) -}) - test_that("`get_release_logs_from_orgs()` works", { mockery::stub( github_testhost_priv$get_release_logs_from_orgs, @@ -58,7 +42,7 @@ test_that("`get_release_logs_from_orgs()` works", { mockery::stub( github_testhost_priv$get_release_logs_from_orgs, "private$get_repos_names", - test_mocker$use("repos_names") + test_mocker$use("gh_repos_names") ) github_testhost_priv$searching_scope <- "org" releases_from_orgs <- github_testhost_priv$get_release_logs_from_orgs( diff --git a/tests/testthat/test-get_release-GitLab.R b/tests/testthat/test-get_release-GitLab.R index 97f64ed3..06e73b5d 100644 --- a/tests/testthat/test-get_release-GitLab.R +++ b/tests/testthat/test-get_release-GitLab.R @@ -24,9 +24,9 @@ test_that("`get_releases_from_org()` pulls releases from the repositories", { test_that("`prepare_releases_table()` prepares releases table", { releases_table <- test_graphql_gitlab$prepare_releases_table( releases_response = test_mocker$use("releases_from_repos"), - org = "test_org", - date_from = "2023-08-01", - date_until = "2024-06-30" + org = "test_org", + since = "2023-08-01", + until = "2024-06-30" ) expect_releases_table(releases_table) expect_gt(min(releases_table$published_at), as.POSIXct("2023-08-01")) From 93ad9dd75aec36aca34edae6188924719e995f4f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 13 Dec 2024 13:46:56 +0000 Subject: [PATCH 31/99] Fix lint. --- R/GitHost.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/GitHost.R b/R/GitHost.R index 153ac788..e9ffc4e2 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -222,13 +222,13 @@ GitHost <- R6::R6Class( since = since, until = until, verbose = verbose, - progress= progress + progress = progress ) release_logs_from_repos <- private$get_release_logs_from_repos( since = since, until = until, verbose = verbose, - progress= progress + progress = progress ) release_logs_table <- list( release_logs_from_orgs, From 469573a2a76faf61a50797dc2a5ffd2379022793 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 13 Dec 2024 16:23:56 +0000 Subject: [PATCH 32/99] Adjust get_repos_urls to new scope approach, fix setting orgs when pulling data from repos (when org is a user), change name of get_repos_individual to get_repos_from_repos, fix pulling releases when empty. --- R/EngineGraphQLGitHub.R | 2 +- R/EngineRestGitLab.R | 6 +- R/GitHost.R | 97 ++++++++++++++--------- R/GitHostGitHub.R | 4 +- R/GitHostGitLab.R | 4 +- tests/testthat/test-01-get_repos-GitHub.R | 8 +- 6 files changed, 76 insertions(+), 45 deletions(-) diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index b944ae05..9c2a653f 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -298,7 +298,7 @@ EngineGraphQLGitHub <- R6::R6Class( # Prepare releases table. prepare_releases_table = function(releases_response, org, since, until) { - if (!is.null(releases_response)) { + if (length(releases_response) > 0) { releases_table <- purrr::map(releases_response, function(release) { release_table <- purrr::map(release$data$repository$releases$nodes, function(node) { diff --git a/R/EngineRestGitLab.R b/R/EngineRestGitLab.R index 2ce2455d..c7887a14 100644 --- a/R/EngineRestGitLab.R +++ b/R/EngineRestGitLab.R @@ -168,8 +168,10 @@ EngineRestGitLab <- R6::R6Class( # Pull all repositories URLs from organization get_repos_urls = function(type, org, repos) { - repos_response <- self$response( - endpoint = paste0(private$endpoints[["organizations"]], utils::URLencode(org, reserved = TRUE), "/projects") + repos_response <- private$paginate_results( + endpoint = paste0(private$endpoints[["organizations"]], + utils::URLencode(org, reserved = TRUE), + "/projects") ) if (!is.null(repos)) { repos_response <- repos_response %>% diff --git a/R/GitHost.R b/R/GitHost.R index e9ffc4e2..ffad9966 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -407,9 +407,6 @@ GitHost <- R6::R6Class( ) private$repos_fullnames <- repos orgs_repos <- private$extract_repos_and_orgs(private$repos_fullnames) - orgs <- private$set_owner_type( - owners = names(orgs_repos) - ) private$repos <- unname(unlist(orgs_repos)) private$orgs_repos <- orgs_repos } @@ -571,16 +568,6 @@ GitHost <- R6::R6Class( return(orgs_repo_list) }, - # Set repositories - set_repos = function(org) { - if ("repo" %in% private$searching_scope) { - repos <- private$orgs_repos[[org]] - } else { - repos <- NULL - } - return(repos) - }, - # Filter repositories table by host filter_repos_by_host = function(repos_table) { dplyr::filter( @@ -603,7 +590,7 @@ GitHost <- R6::R6Class( repos_table <- purrr::list_rbind( list( private$get_repos_from_orgs(verbose, progress), - private$get_repos_individual(verbose, progress) + private$get_repos_from_repos(verbose, progress) ) ) return(repos_table) @@ -634,10 +621,12 @@ GitHost <- R6::R6Class( } }, - get_repos_individual = function(verbose, progress) { + get_repos_from_repos = function(verbose, progress) { if ("repo" %in% private$searching_scope) { graphql_engine <- private$engines$graphql - orgs <- names(private$orgs_repos) + orgs <- private$set_owner_type( + owners = names(private$orgs_repos) + ) purrr::map(orgs, function(org) { type <- attr(org, "type") %||% "organization" org <- utils::URLdecode(org) @@ -703,28 +692,62 @@ GitHost <- R6::R6Class( } private$orgs <- private$engines$graphql$get_orgs() } - rest_engine <- private$engines$rest - repos_vector <- purrr::map(private$orgs, function(org) { - org <- utils::URLdecode(org) - if (!private$scan_all && verbose) { - show_message( - host = private$host_name, - engine = "rest", - scope = org, - information = "Pulling repositories (URLS)" - ) - } - repos_urls <- rest_engine$get_repos_urls( - type = type, - org = org, - repos = private$set_repos(org) - ) - return(repos_urls) - }, .progress = progress) %>% - unlist() + repos_vector <- c( + private$get_repos_urls_from_orgs(type, verbose, progress), + private$get_repos_urls_from_repos(type, verbose, progress) + ) return(repos_vector) }, + get_repos_urls_from_orgs = function(type, verbose, progress) { + if ("org" %in% private$searching_scope) { + rest_engine <- private$engines$rest + repos_vector <- purrr::map(private$orgs, function(org) { + org <- utils::URLdecode(org) + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "rest", + scope = org, + information = "Pulling repositories (URLS)" + ) + } + repos_urls <- rest_engine$get_repos_urls( + type = type, + org = org, + repos = NULL + ) + return(repos_urls) + }, .progress = progress) %>% + unlist() + } + }, + + get_repos_urls_from_repos = function(type, verbose, progress) { + if ("repo" %in% private$searching_scope) { + rest_engine <- private$engines$rest + orgs <- names(private$orgs_repos) + repos_vector <- purrr::map(orgs, function(org) { + org <- utils::URLdecode(org) + if (!private$scan_all && verbose) { + show_message( + host = private$host_name, + engine = "rest", + scope = org, + information = "Pulling repositories (URLS)" + ) + } + repos_urls <- rest_engine$get_repos_urls( + type = type, + org = org, + repos = private$orgs_repos[[org]] + ) + return(repos_urls) + }, .progress = progress) %>% + unlist() + } + }, + # Pull repositories with code from whole Git Host get_repos_with_code_from_host = function(code, in_files = NULL, @@ -1051,7 +1074,9 @@ GitHost <- R6::R6Class( get_release_logs_from_repos = function(since, until, verbose, progress) { if ("repo" %in% private$searching_scope) { - orgs <- names(private$orgs_repos) + orgs <- private$set_owner_type( + owners = names(private$orgs_repos) + ) release_logs_table <- purrr::map(orgs, function(org) { org <- utils::URLdecode(org) release_logs_table_org <- NULL diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index 34998ccf..71e1c42a 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -205,7 +205,9 @@ GitHostGitHub <- R6::R6Class( get_commits_from_repos = function(since, until, verbose, progress) { if ("repo" %in% private$searching_scope) { graphql_engine <- private$engines$graphql - orgs <- names(private$orgs_repos) + orgs <- private$set_owner_type( + owners = names(private$orgs_repos) + ) commits_table <- purrr::map(orgs, function(org) { commits_table_org <- NULL if (!private$scan_all && verbose) { diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index 11c48118..8ebbdcc5 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -271,7 +271,9 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", progress = verbose) { if ("repo" %in% private$searching_scope) { rest_engine <- private$engines$rest - orgs <- names(private$orgs_repos) + orgs <- private$set_owner_type( + owners = names(private$orgs_repos) + ) commits_table <- purrr::map(orgs, function(org) { commits_table_org <- NULL repos <- private$orgs_repos[[org]] diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index 43c50f81..af001cb9 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -407,9 +407,9 @@ test_that("get_repos_from_org works", { test_mocker$cache(gh_repos_from_orgs) }) -test_that("get_repos_individual works", { +test_that("get_repos_from_repos works", { mockery::stub( - github_testhost_priv$get_repos_individual, + github_testhost_priv$get_repos_from_repos, "graphql_engine$prepare_repos_table", test_mocker$use("gh_repos_table") ) @@ -420,7 +420,7 @@ test_that("get_repos_individual works", { attr(test_org, "type") <- "organization" github_testhost_priv$orgs <- test_org - gh_repos_individual <- github_testhost_priv$get_repos_individual( + gh_repos_individual <- github_testhost_priv$get_repos_from_repos( verbose = FALSE, progress = FALSE ) @@ -438,7 +438,7 @@ test_that("`get_all_repos()` works as expected", { ) mockery::stub( github_testhost_priv$get_all_repos, - "private$get_repos_individual", + "private$get_repos_from_repos", test_mocker$use("gh_repos_individual") ) gh_repos_table <- github_testhost_priv$get_all_repos( From 2fa819bec6e8be59e7b7b8131e2535ac5df5372f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 13 Dec 2024 16:24:11 +0000 Subject: [PATCH 33/99] Add NEWS. --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 64459297..eaa5c2d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ ## Features: +- From now on it is possible to pass `orgs` and `repos` in `set_*_host()` functions ([#400](https://github.com/r-world-devs/GitStats/issues/400)). - Improved `get_commits_stats()` function ([#556](https://github.com/r-world-devs/GitStats/issues/556), [#557](https://github.com/r-world-devs/GitStats/issues/557)) with: - giving possibility to customize grouping variable by passing it with the `group_var` parameter, - changing name of the `time_interval` parameter to `time_aggregation`, From 06c713f14ba6fedd1087930fd15ae4c7d7430b5f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 16 Dec 2024 08:04:42 +0000 Subject: [PATCH 34/99] Typo. --- R/GitHost.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/GitHost.R b/R/GitHost.R index ffad9966..bb85651a 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -709,7 +709,7 @@ GitHost <- R6::R6Class( host = private$host_name, engine = "rest", scope = org, - information = "Pulling repositories (URLS)" + information = "Pulling repositories (URLs)" ) } repos_urls <- rest_engine$get_repos_urls( @@ -734,7 +734,7 @@ GitHost <- R6::R6Class( host = private$host_name, engine = "rest", scope = org, - information = "Pulling repositories (URLS)" + information = "Pulling repositories (URLs)" ) } repos_urls <- rest_engine$get_repos_urls( From 23b10ecd94b333b1714681363c9548837b70700b Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 16 Dec 2024 10:58:27 +0000 Subject: [PATCH 35/99] Fix tests. --- R/GitHost.R | 14 ++++- tests/testthat/helper-fixtures.R | 6 +- tests/testthat/setup.R | 18 +----- tests/testthat/test-01-get_repos-GitHub.R | 12 ++-- tests/testthat/test-02-get_commits-GitHub.R | 42 +++++--------- tests/testthat/test-02-get_commits-GitLab.R | 16 ------ .../test-get_files_structure-GitHub.R | 10 ++-- tests/testthat/test-get_release-GitHub.R | 7 +++ tests/testthat/test-get_urls_repos-GitHub.R | 56 ++++++++++++++++++- tests/testthat/test-get_urls_repos-GitLab.R | 54 +++++++++++------- 10 files changed, 139 insertions(+), 96 deletions(-) diff --git a/R/GitHost.R b/R/GitHost.R index bb85651a..fd81880c 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -692,9 +692,19 @@ GitHost <- R6::R6Class( } private$orgs <- private$engines$graphql$get_orgs() } + repos_urls_from_orgs <- private$get_repos_urls_from_orgs( + type = type, + verbose = verbose, + progress = progress + ) + repos_urls_from_repos <- private$get_repos_urls_from_repos( + type = type, + verbose = verbose, + progress = progress + ) repos_vector <- c( - private$get_repos_urls_from_orgs(type, verbose, progress), - private$get_repos_urls_from_repos(type, verbose, progress) + repos_urls_from_orgs, + repos_urls_from_repos ) return(repos_vector) }, diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R index e1773d7f..e80d3e7f 100644 --- a/tests/testthat/helper-fixtures.R +++ b/tests/testthat/helper-fixtures.R @@ -34,10 +34,10 @@ test_fixtures$github_repository_rest_response <- list( "id" = 627452680, "node_id" = "R_kgDOJWYrCA", "name" = "testRepo", - "full_name" = "test-org/TestRepo", + "full_name" = "test_org/TestRepo", "private" = FALSE, "owner" = list( - "login" = "test-org", + "login" = "test_org", "id" = 103638913, "node_id" = "O_kgDOBi1ngQ", "avatar_url" = "https://avatars.githubusercontent.com/u/103638913?v=4" @@ -529,7 +529,7 @@ test_fixtures$github_files_tree_response <- list( "repository" = list( "id" = "R_kgD0Ivtxsg", "name" = "TestRepo", - "url" = "https://github.com/test-org/TestRepo", + "url" = "https://github.com/test_org/TestRepo", "object" = list( "entries" = list( list( diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 9704d42a..69811c67 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -30,24 +30,10 @@ test_graphql_gitlab <- EngineGraphQLGitLab$new( ) test_graphql_gitlab_priv <- environment(test_graphql_gitlab$initialize)$private -github_testhost <- create_github_testhost(orgs = "test-org") +github_testhost <- create_github_testhost(orgs = "test_org") -github_testhost_priv <- create_github_testhost(orgs = "test-org", mode = "private") - -github_testhost_repos <- create_github_testhost( - repos = c("openpharma/DataFakeR", "r-world-devs/GitStats", "r-world-devs/cohortBuilder") -) - -github_testhost_repos_priv <- create_github_testhost( - repos = c("openpharma/DataFakeR", "r-world-devs/GitStats", "r-world-devs/cohortBuilder"), - mode = "private" -) +github_testhost_priv <- create_github_testhost(orgs = "test_org", mode = "private") gitlab_testhost <- create_gitlab_testhost(orgs = "mbtests") gitlab_testhost_priv <- create_gitlab_testhost(orgs = "mbtests", mode = "private") - -gitlab_testhost_repos <- create_gitlab_testhost( - repos = c("mbtests/gitstatstesting", "mbtests/gitstats-testing-2") -) - diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index af001cb9..6c01c398 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -408,18 +408,20 @@ test_that("get_repos_from_org works", { }) test_that("get_repos_from_repos works", { + test_org <- "test_org" + attr(test_org, "type") <- "organization" + mockery::stub( + github_testhost_priv$get_repos_from_repos, + "private$set_owner_type", + test_org + ) mockery::stub( github_testhost_priv$get_repos_from_repos, "graphql_engine$prepare_repos_table", test_mocker$use("gh_repos_table") ) github_testhost_priv$searching_scope <- c("org", "repo") - github_testhost_priv$repos_fullnames <- c("test_org/TestRepo") github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") - test_org <- "test_org" - attr(test_org, "type") <- "organization" - github_testhost_priv$orgs <- test_org - gh_repos_individual <- github_testhost_priv$get_repos_from_repos( verbose = FALSE, progress = FALSE diff --git a/tests/testthat/test-02-get_commits-GitHub.R b/tests/testthat/test-02-get_commits-GitHub.R index e0ba88e6..a42025e5 100644 --- a/tests/testthat/test-02-get_commits-GitHub.R +++ b/tests/testthat/test-02-get_commits-GitHub.R @@ -116,17 +116,17 @@ test_that("`get_repos_names` works", { test_that("get_commits_from_orgs for GitHub works", { mockery::stub( - github_testhost_repos_priv$get_commits_from_orgs, + github_testhost_priv$get_commits_from_orgs, "graphql_engine$prepare_commits_table", test_mocker$use("gh_commits_table") ) mockery::stub( - github_testhost_repos_priv$get_commits_from_orgs, + github_testhost_priv$get_commits_from_orgs, "private$get_repos_names", test_mocker$use("gh_repos_names") ) - github_testhost_repos_priv$searching_scope <- "org" - gh_commits_from_orgs <- github_testhost_repos_priv$get_commits_from_orgs( + github_testhost_priv$searching_scope <- "org" + gh_commits_from_orgs <- github_testhost_priv$get_commits_from_orgs( since = "2023-03-01", until = "2023-04-01", verbose = FALSE, @@ -141,13 +141,20 @@ test_that("get_commits_from_orgs for GitHub works", { test_that("get_commits_from_repos for GitHub works", { mockery::stub( - github_testhost_repos_priv$get_commits_from_repos, + github_testhost_priv$get_commits_from_repos, "graphql_engine$prepare_commits_table", test_mocker$use("gh_commits_table") ) - github_testhost_repos_priv$searching_scope <- "repo" - github_testhost_repos_priv$orgs_repos <- list("test_org" = "TestRepo") - gh_commits_from_repos <- github_testhost_repos_priv$get_commits_from_repos( + github_testhost_priv$searching_scope <- "repo" + github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + test_org <- "test_org" + attr(test_org, "type") <- "organization" + mockery::stub( + github_testhost_priv$get_commits_from_repos, + "private$set_owner_type", + test_org + ) + gh_commits_from_repos <- github_testhost_priv$get_commits_from_repos( since = "2023-03-01", until = "2023-04-01", verbose = FALSE, @@ -183,22 +190,3 @@ test_that("`get_commits()` retrieves commits in the table format", { ) test_mocker$cache(gh_commits_table) }) - -test_that("get_commits for GitHub repositories works", { - mockery::stub( - github_testhost_repos$get_commits, - "private$get_commits_from_orgs", - test_mocker$use("gh_commits_table") - ) - suppressMessages( - gh_commits_table <- github_testhost_repos$get_commits( - since = "2023-03-01", - until = "2023-04-01", - verbose = FALSE, - progress = FALSE - ) - ) - expect_commits_table( - gh_commits_table - ) -}) diff --git a/tests/testthat/test-02-get_commits-GitLab.R b/tests/testthat/test-02-get_commits-GitLab.R index 5f904133..e3e45d37 100644 --- a/tests/testthat/test-02-get_commits-GitLab.R +++ b/tests/testthat/test-02-get_commits-GitLab.R @@ -139,19 +139,3 @@ test_that("get_commits_from_orgs works", { ) test_mocker$cache(gl_commits_table) }) - -test_that("get_commits for GitLab works with repos implied", { - mockery::stub( - gitlab_testhost_repos$get_commits, - "private$get_commits_from_orgs", - test_mocker$use("gl_commits_table") - ) - gl_commits_table <- gitlab_testhost_repos$get_commits( - since = "2023-01-01", - until = "2023-06-01", - verbose = FALSE - ) - expect_commits_table( - gl_commits_table - ) -}) diff --git a/tests/testthat/test-get_files_structure-GitHub.R b/tests/testthat/test-get_files_structure-GitHub.R index 3c632d55..51dfad59 100644 --- a/tests/testthat/test-get_files_structure-GitHub.R +++ b/tests/testthat/test-get_files_structure-GitHub.R @@ -14,7 +14,7 @@ test_that("get_file_response works", { test_fixtures$github_files_tree_response ) gh_files_tree_response <- test_graphql_github_priv$get_file_response( - org = "test-org", + org = "test_org", repo = "TestRepo", def_branch = "master", file_path = "", @@ -55,7 +55,7 @@ test_that("get_files_structure_from_repo returns list with files and dirs vector files_and_dirs ) files_structure <- test_graphql_github_priv$get_files_structure_from_repo( - org = "test-org", + org = "test_org", repo = "TestRepo", def_branch = "master" ) @@ -77,13 +77,13 @@ test_that("get_files_structure_from_repo returns list of files up to 2 tier of d files_and_dirs <- test_mocker$use("files_and_dirs_list") ) files_structure_very_shallow <- test_graphql_github_priv$get_files_structure_from_repo( - org = "test-org", + org = "test_org", repo = "TestRepo", def_branch = "master", depth = 1L ) files_structure_shallow <- test_graphql_github_priv$get_files_structure_from_repo( - org = "test-org", + org = "test_org", repo = "TestRepo", def_branch = "master", depth = 2L @@ -149,7 +149,7 @@ test_that("GitHub GraphQL Engine pulls files structure with pattern from reposit test_mocker$use("md_files_structure") ) gh_md_files_structure <- test_graphql_github$get_files_structure_from_org( - org = "test-org", + org = "test_org", repos = "TestRepo", pattern = "\\.md|\\.qmd|\\.Rmd" ) diff --git a/tests/testthat/test-get_release-GitHub.R b/tests/testthat/test-get_release-GitHub.R index 3596902d..a576cfd6 100644 --- a/tests/testthat/test-get_release-GitHub.R +++ b/tests/testthat/test-get_release-GitHub.R @@ -61,6 +61,13 @@ test_that("`get_release_logs_from_repos()` works", { "graphql_engine$prepare_releases_table", test_mocker$use("releases_table") ) + test_org <- "test_org" + attr(test_org, "type") <- "organization" + mockery::stub( + github_testhost_priv$get_release_logs_from_repos, + "private$set_owner_type", + test_org + ) github_testhost_priv$searching_scope <- "repo" github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") releases_from_repos <- github_testhost_priv$get_release_logs_from_repos( diff --git a/tests/testthat/test-get_urls_repos-GitHub.R b/tests/testthat/test-get_urls_repos-GitHub.R index fd0c2e75..a0f379b3 100644 --- a/tests/testthat/test-get_urls_repos-GitHub.R +++ b/tests/testthat/test-get_urls_repos-GitHub.R @@ -34,7 +34,7 @@ test_that("get_repos_urls() works for individual repos", { test_mocker$cache(gh_repos_urls) }) -test_that("get_all_repos_urls prepares api repo_urls vector", { +test_that("get_repos_urls prepares api repo_urls vector", { github_testhost_priv <- create_github_testhost(orgs = "test-org", mode = "private") mockery::stub( @@ -52,13 +52,63 @@ test_that("get_all_repos_urls prepares api repo_urls vector", { test_mocker$cache(gh_api_repos_urls) }) -test_that("get_all_repos_urls prepares web repo_urls vector", { +test_that("get_repos_urls_from_orgs prepares web repo_urls vector", { mockery::stub( - github_testhost_priv$get_all_repos_urls, + github_testhost_priv$get_repos_urls_from_orgs, + "rest_engine$get_repos_urls", + test_mocker$use("gh_repos_urls") + ) + github_testhost_priv$searching_scope <- "org" + github_testhost_priv$orgs <- "test_org" + gh_repos_urls_from_orgs <- github_testhost_priv$get_repos_urls_from_orgs( + type = "web", + verbose = FALSE, + progress = FALSE + ) + expect_gt(length(gh_repos_urls_from_orgs), 0) + expect_true(any(grepl("test-org", gh_repos_urls_from_orgs))) + expect_true(all(grepl("https://testhost.com/", gh_repos_urls_from_orgs))) + test_mocker$cache(gh_repos_urls_from_orgs) +}) + +test_that("get_repos_urls_from_repos prepares web repo_urls vector", { + test_org <- "test_org" + attr(test_org, "type") <- "organization" + mockery::stub( + github_testhost_priv$get_repos_urls_from_repos, + "private$set_owner_type", + test_org + ) + mockery::stub( + github_testhost_priv$get_repos_urls_from_repos, "rest_engine$get_repos_urls", test_mocker$use("gh_repos_urls"), depth = 2L ) + github_testhost_priv$searching_scope <- c("repo") + github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + gh_repos_urls <- github_testhost_priv$get_repos_urls_from_repos( + type = "web", + verbose = FALSE, + progress = FALSE + ) + expect_gt(length(gh_repos_urls), 0) + expect_true(any(grepl("test-org", gh_repos_urls))) + expect_true(all(grepl("https://testhost.com/", gh_repos_urls))) + test_mocker$cache(gh_repos_urls) +}) + +test_that("get_all_repos_urls prepares web repo_urls vector", { + mockery::stub( + github_testhost_priv$get_all_repos_urls, + "private$get_repos_urls_from_orgs", + test_mocker$use("gh_repos_urls_from_orgs") + ) + mockery::stub( + github_testhost_priv$get_all_repos_urls, + "private$get_repos_urls_from_repos", + test_mocker$use("gh_repos_urls") + ) gh_repos_urls <- github_testhost_priv$get_all_repos_urls( type = "web", verbose = FALSE diff --git a/tests/testthat/test-get_urls_repos-GitLab.R b/tests/testthat/test-get_urls_repos-GitLab.R index 75f28141..17d4589b 100644 --- a/tests/testthat/test-get_urls_repos-GitLab.R +++ b/tests/testthat/test-get_urls_repos-GitLab.R @@ -1,12 +1,12 @@ test_that("get_repos_urls() works for org", { mockery::stub( test_rest_gitlab$get_repos_urls, - "self$response", + "private$paginate_results", test_fixtures$gitlab_repositories_rest_response ) gl_api_repos_urls <- test_rest_gitlab$get_repos_urls( type = "api", - org = "mbtests", + org = "test_org", repos = NULL ) expect_length( @@ -16,7 +16,7 @@ test_that("get_repos_urls() works for org", { test_mocker$cache(gl_api_repos_urls) gl_web_repos_urls <- test_rest_gitlab$get_repos_urls( type = "web", - org = "mbtests", + org = "test_org", repos = NULL ) expect_length( @@ -29,12 +29,12 @@ test_that("get_repos_urls() works for org", { test_that("get_repos_urls() works for individual repos", { mockery::stub( test_rest_gitlab$get_repos_urls, - "self$response", + "private$paginate_results", test_fixtures$gitlab_repositories_rest_response ) gl_api_repos_urls <- test_rest_gitlab$get_repos_urls( type = "api", - org = "mbtests", + org = "test_org", repos = c("testRepo1", "testRepo2") ) expect_length( @@ -54,44 +54,60 @@ test_that("get_repos_urls() works for individual repos", { test_mocker$cache(gl_web_repos_urls) }) -test_that("get_all_repos_urls prepares api repo_urls vector", { +test_that("get_repos_urls_from_orgs prepares api repo_urls vector", { mockery::stub( - gitlab_testhost_priv$get_all_repos_urls, + gitlab_testhost_priv$get_repos_urls_from_orgs, "rest_engine$get_repos_urls", test_mocker$use("gl_api_repos_urls") ) - gl_api_repos_urls <- gitlab_testhost_priv$get_all_repos_urls( + gitlab_testhost_priv$searching_scope <- "org" + gitlab_testhost_priv$orgs <- "test_org" + gl_api_repos_urls <- gitlab_testhost_priv$get_repos_urls_from_orgs( type = "api", - verbose = FALSE + verbose = FALSE, + progress = FALSE ) expect_gt(length(gl_api_repos_urls), 0) expect_true(all(grepl("api", gl_api_repos_urls))) test_mocker$cache(gl_api_repos_urls) +}) + +test_that("get_repos_urls_from_repos prepares api repo_urls vector", { mockery::stub( - gitlab_testhost_priv$get_all_repos_urls, + gitlab_testhost_priv$get_repos_urls_from_repos, "rest_engine$get_repos_urls", test_mocker$use("gl_web_repos_urls") ) - gl_web_repos_urls <- gitlab_testhost_priv$get_all_repos_urls( + gitlab_testhost_priv$searching_scope <- c("repo") + gitlab_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + gl_web_repos_urls <- gitlab_testhost_priv$get_repos_urls_from_repos( type = "web", - verbose = FALSE + verbose = FALSE, + progress = FALSE ) expect_gt(length(gl_web_repos_urls), 0) test_mocker$cache(gl_web_repos_urls) }) -test_that("get_all_repos_urls prepares web repo_urls vector", { + +test_that("get_all_repos_urls prepares api repo_urls vector", { mockery::stub( gitlab_testhost_priv$get_all_repos_urls, - "rest_engine$get_repos_urls", - test_mocker$use("gl_web_repos_urls") + "private$get_repos_urls_from_orgs", + test_mocker$use("gl_api_repos_urls") ) - gl_repos_urls <- gitlab_testhost_priv$get_all_repos_urls( - type = "web", + mockery::stub( + gitlab_testhost_priv$get_all_repos_urls, + "private$get_repos_urls_from_repos", + NULL + ) + gl_api_repos_urls <- gitlab_testhost_priv$get_all_repos_urls( + type = "api", verbose = FALSE ) - expect_gt(length(gl_repos_urls), 0) - expect_true(all(!grepl("api", gl_repos_urls))) + expect_true(all(grepl("api", gl_api_repos_urls))) + expect_gt(length(gl_api_repos_urls), 0) + test_mocker$cache(gl_api_repos_urls) }) test_that("`get_repo_url_from_response()` works", { From e470ace8ebd2917877ac31dd6bd783f1fdbfefbf Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 17 Dec 2024 12:16:33 +0000 Subject: [PATCH 36/99] Adjust get_files_content() to changes in scanning scope, remove possibility to pull non-text files, fix pagination when pulling GitLab files from org, some changes in style. In case of pagination NULL values were assinged to hasNextPage and endCursor. In case of removing possibility to pull content of non-text files - it seemed superfluous, made code very complex with custom methods for GitLab nota bene new REST method only for pulling content of non-text files (!). Pulling content should not end in pulling no content. --- R/EngineGraphQL.R | 2 +- R/EngineGraphQLGitHub.R | 20 +- R/EngineGraphQLGitLab.R | 26 +- R/GitHost.R | 302 +++++++++++++----- R/GitHostGitLab.R | 162 ++++------ R/GitStats.R | 17 +- R/gitstats_functions.R | 50 ++- R/global.R | 5 +- inst/get_files_workflow.R | 26 ++ man/get_files_content.Rd | 14 +- .../_snaps/get_files_structure-GitHub.md | 6 +- .../_snaps/get_files_structure-GitLab.md | 2 +- tests/testthat/helper-fixtures.R | 14 - .../testthat/test-get_files_content-GitHub.R | 88 +++-- .../testthat/test-get_files_content-GitLab.R | 37 +-- .../test-get_files_structure-GitHub.R | 11 +- .../test-get_files_structure-GitLab.R | 5 +- 17 files changed, 438 insertions(+), 349 deletions(-) diff --git a/R/EngineGraphQL.R b/R/EngineGraphQL.R index 7036b23a..c462180d 100644 --- a/R/EngineGraphQL.R +++ b/R/EngineGraphQL.R @@ -86,7 +86,7 @@ EngineGraphQL <- R6::R6Class( file_path <- host_files_structure[[org]][[repo]] } if (only_text_files) { - file_path <- file_path[!grepl(non_text_files_pattern, file_path)] + file_path <- file_path[grepl(text_files_pattern, file_path)] } return(file_path) } diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index 9c2a653f..fd596d15 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -175,9 +175,8 @@ EngineGraphQLGitHub <- R6::R6Class( get_files_from_org = function(org, type, repos, - file_paths, - host_files_structure, - only_text_files, + file_paths = NULL, + host_files_structure = NULL, verbose = TRUE, progress = TRUE) { repo_data <- private$get_repos_data( @@ -193,7 +192,6 @@ EngineGraphQLGitHub <- R6::R6Class( org = org, file_paths = file_paths, host_files_structure = host_files_structure, - only_text_files = only_text_files, progress = progress ) names(org_files_list) <- repositories @@ -230,10 +228,10 @@ EngineGraphQLGitHub <- R6::R6Class( # Pull all files from all repositories of an organization. get_files_structure_from_org = function(org, type, - repos, - pattern = NULL, - depth = Inf, - verbose = FALSE, + repos = NULL, + pattern = NULL, + depth = Inf, + verbose = FALSE, progress = TRUE) { repo_data <- private$get_repos_data( org = org, @@ -453,19 +451,17 @@ EngineGraphQLGitHub <- R6::R6Class( def_branches, org, host_files_structure, - only_text_files, file_paths, progress) { purrr::map2(repositories, def_branches, function(repo, def_branch) { if (!is.null(host_files_structure)) { file_paths <- private$get_path_from_files_structure( host_files_structure = host_files_structure, - only_text_files = only_text_files, org = org, repo = repo ) - } else if (is.null(host_files_structure) && only_text_files) { - file_paths <- file_paths[!grepl(non_text_files_pattern, file_paths)] + } else if (is.null(host_files_structure)) { + file_paths <- file_paths[grepl(text_files_pattern, file_paths)] } repo_files_list <- purrr::map(file_paths, function(file_path) { private$get_file_response( diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index 13667721..7b23c6e6 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -124,8 +124,8 @@ EngineGraphQLGitLab <- R6::R6Class( get_files_from_org = function(org, type, repos, - file_paths, - host_files_structure, + file_paths = NULL, + host_files_structure = NULL, only_text_files, verbose = FALSE, progress = FALSE) { @@ -139,8 +139,8 @@ EngineGraphQLGitLab <- R6::R6Class( only_text_files = only_text_files, org = org ) - } else if (is.null(host_files_structure) && only_text_files) { - file_paths <- file_paths[!grepl(non_text_files_pattern, file_paths)] + } else if (only_text_files) { + file_paths <- file_paths[grepl(text_files_pattern, file_paths)] } if (type == "organization") { while (next_page) { @@ -194,13 +194,13 @@ EngineGraphQLGitLab <- R6::R6Class( purrr::discard(~ length(.$repository$blobs$nodes) == 0) if (is.null(files_list)) files_list <- list() if (length(files_list) > 0) { - next_page <- files_response$pageInfo$hasNextPage + next_page <- projects$pageInfo$hasNextPage } else { next_page <- FALSE } if (is.null(next_page)) next_page <- FALSE if (next_page) { - end_cursor <- files_response$pageInfo$endCursor + end_cursor <- projects$pageInfo$endCursor } else { end_cursor <- "" } @@ -250,16 +250,16 @@ EngineGraphQLGitLab <- R6::R6Class( if (!is.null(host_files_structure)) { file_paths <- private$get_path_from_files_structure( host_files_structure = host_files_structure, - only_text_files = only_text_files, - org = org, - repo = repo + only_text_files = only_text_files, + org = org, + repo = repo ) } files_response <- tryCatch( { private$get_file_blobs_response( - org = org, - repo = repo, + org = org, + repo = repo, file_paths = file_paths ) }, @@ -272,7 +272,7 @@ EngineGraphQLGitLab <- R6::R6Class( }, # Prepare files table. - prepare_files_table = function(files_response, org, file_path) { + prepare_files_table = function(files_response, org) { if (!is.null(files_response)) { if (private$response_prepared_by_iteration(files_response)) { files_table <- purrr::map(files_response, function(response_data) { @@ -315,7 +315,7 @@ EngineGraphQLGitLab <- R6::R6Class( get_files_structure_from_org = function(org, type, - repos, + repos = NULL, pattern = NULL, depth = Inf, verbose = TRUE, diff --git a/R/GitHost.R b/R/GitHost.R index fd81880c..023f2a99 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -168,20 +168,35 @@ GitHost <- R6::R6Class( #' a table format. get_files_content = function(file_path, host_files_structure = NULL, - only_text_files = TRUE, - verbose = TRUE, - progress = TRUE) { - files_table <- if (!private$scan_all) { - private$get_files_content_from_orgs( - file_path = file_path, + verbose = TRUE, + progress = TRUE) { + if (is.null(host_files_structure)) { + if (!private$scan_all) { + files_content_from_orgs <- private$get_files_content_from_orgs( + file_path = file_path, + verbose = verbose, + progress = progress + ) + files_content_from_repos <- private$get_files_content_from_repos( + file_path = file_path, + verbose = verbose, + progress = progress + ) + files_table <- rbind( + files_content_from_orgs, + files_content_from_repos + ) + } else { + files_table <- private$get_files_content_from_host( + file_path = file_path, + verbose = verbose, + progress = progress + ) + } + } + if (!is.null(host_files_structure)) { + files_table <- private$get_files_content_from_files_structure( host_files_structure = host_files_structure, - only_text_files = only_text_files, - verbose = verbose, - progress = progress - ) - } else { - private$get_files_content_from_host( - file_path = file_path, verbose = verbose, progress = progress ) @@ -200,12 +215,22 @@ GitHost <- R6::R6Class( "i" = "Set `orgs` or `repos` arguments in `set_*_host()` if you wish to run this function." ), call = NULL) } - files_structure <- private$get_files_structure_from_orgs( - pattern = pattern, - depth = depth, - verbose = verbose, + files_structure_from_orgs <- private$get_files_structure_from_orgs( + pattern = pattern, + depth = depth, + verbose = verbose, + progress = progress + ) + files_structure_from_repos <- private$get_files_structure_from_repos( + pattern = pattern, + depth = depth, + verbose = verbose, progress = progress ) + files_structure <- append( + files_structure_from_orgs %||% list(), + files_structure_from_repos %||% list() + ) return(files_structure) }, @@ -473,7 +498,7 @@ GitHost <- R6::R6Class( cli::cli_abort( c( "x" = "{type} you provided does not exist or its name was passed - in a wrong way: {cli::col_red({endpoint})}", + in a wrong way: {cli::col_red({utils::URLdecode(endpoint)})}", "!" = "Please type your {tolower(type)} name as you see it in web URL.", "i" = "E.g. do not use spaces. {type} names as you see on the @@ -915,55 +940,115 @@ GitHost <- R6::R6Class( # Pull files content from organizations get_files_content_from_orgs = function(file_path, - host_files_structure = NULL, - only_text_files = TRUE, - verbose = TRUE, - progress = TRUE) { + verbose = TRUE, + progress = TRUE) { + if ("org" %in% private$searching_scope) { + graphql_engine <- private$engines$graphql + files_table <- purrr::map(private$orgs, function(org) { + if (verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = glue::glue("Pulling files content: [{paste0(file_path, collapse = ', ')}]") + ) + } + type <- attr(org, "type") %||% "organization" + graphql_engine$get_files_from_org( + org = org, + type = type, + repos = NULL, + file_paths = file_path, + verbose = verbose, + progress = progress + ) |> + graphql_engine$prepare_files_table( + org = org, + file_path = file_path + ) + }) |> + purrr::list_rbind() |> + private$add_repo_api_url() + return(files_table) + } + }, + + # Pull files content from organizations + get_files_content_from_repos = function(file_path, + verbose = TRUE, + progress = TRUE) { + if ("repo" %in% private$searching_scope) { + graphql_engine <- private$engines$graphql + orgs <- private$set_owner_type( + owners = names(private$orgs_repos) + ) + files_table <- purrr::map(orgs, function(org) { + if (verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = glue::glue("Pulling files content: [{paste0(file_path, collapse = ', ')}]") + ) + } + type <- attr(org, "type") %||% "organization" + graphql_engine$get_files_from_org( + org = org, + type = type, + repos = private$orgs_repos[[org]], + file_paths = file_path, + verbose = verbose, + progress = progress + ) |> + graphql_engine$prepare_files_table( + org = org, + file_path = file_path + ) + }) |> + purrr::list_rbind() |> + private$add_repo_api_url() + return(files_table) + } + }, + + get_files_content_from_files_structure = function(host_files_structure, + verbose = TRUE, + progress = TRUE) { graphql_engine <- private$engines$graphql - if (!is.null(host_files_structure)) { - if (verbose) { - cli::cli_alert_info(cli::col_green("I will make use of files structure stored in GitStats.")) - } - result <- private$get_orgs_and_repos_from_files_structure( - host_files_structure = host_files_structure + if (verbose) { + cli::cli_alert_info( + cli::col_green("I will make use of files structure stored in GitStats.") ) - orgs <- result$orgs - repos <- result$repos - } else { - orgs <- private$orgs - repos <- private$repos } + result <- private$get_orgs_and_repos_from_files_structure( + host_files_structure = host_files_structure + ) + orgs <- result$orgs + repos <- result$repos files_table <- purrr::map(orgs, function(org) { if (verbose) { - user_msg <- if (!is.null(host_files_structure)) { - "Pulling files from files structure" - } else { - glue::glue("Pulling files content: [{paste0(file_path, collapse = ', ')}]") - } show_message( - host = private$host_name, - engine = "graphql", - scope = org, - information = user_msg + host = private$host_name, + engine = "graphql", + scope = org, + information = "Pulling files from files structure" ) } type <- attr(org, "type") %||% "organization" graphql_engine$get_files_from_org( - org = org, - type = type, - repos = repos, - file_paths = file_path, + org = org, + type = type, + repos = repos, host_files_structure = host_files_structure, - only_text_files = only_text_files, - verbose = verbose, - progress = progress - ) %>% + verbose = verbose, + progress = progress + ) |> graphql_engine$prepare_files_table( - org = org, + org = org, file_path = file_path ) - }) %>% - purrr::list_rbind() %>% + }) |> + purrr::list_rbind() |> private$add_repo_api_url() return(files_table) }, @@ -980,43 +1065,92 @@ GitHost <- R6::R6Class( depth, verbose = TRUE, progress = TRUE) { - graphql_engine <- private$engines$graphql - files_structure_list <- purrr::map(private$orgs, function(org) { - if (verbose) { - user_info <- if (!is.null(pattern)) { - glue::glue("Pulling files structure...[files matching pattern: '{pattern}']") - } else { - glue::glue("Pulling files structure...") + if ("org" %in% private$searching_scope) { + graphql_engine <- private$engines$graphql + files_structure_list <- purrr::map(private$orgs, function(org) { + if (verbose) { + user_info <- if (!is.null(pattern)) { + glue::glue("Pulling files structure...[files matching pattern: '{pattern}']") + } else { + glue::glue("Pulling files structure...") + } + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = user_info + ) } - show_message( - host = private$host_name, - engine = "graphql", - scope = org, - information = user_info + type <- attr(org, "type") %||% "organization" + graphql_engine$get_files_structure_from_org( + org = org, + type = type, + pattern = pattern, + depth = depth, + verbose = verbose, + progress = progress + ) + }) + names(files_structure_list) <- private$orgs + files_structure_list <- files_structure_list %>% + purrr::discard(~ length(.) == 0) + if (length(files_structure_list) == 0 && verbose) { + cli::cli_alert_warning( + cli::col_yellow( + "For {private$host_name} no files structure found." + ) ) } - type <- attr(org, "type") %||% "organization" - graphql_engine$get_files_structure_from_org( - org = org, - type = type, - repos = private$repos, - pattern = pattern, - depth = depth, - verbose = verbose, - progress = progress + return(files_structure_list) + } + }, + + get_files_structure_from_repos = function(pattern, + depth, + verbose = TRUE, + progress = TRUE) { + if ("repo" %in% private$searching_scope) { + graphql_engine <- private$engines$graphql + orgs <- private$set_owner_type( + owners = names(private$orgs_repos) ) - }) - names(files_structure_list) <- private$orgs - files_structure_list <- files_structure_list %>% - purrr::discard(~ length(.) == 0) - if (length(files_structure_list) == 0 && verbose) { - cli::cli_alert_warning( - cli::col_yellow( - "For {private$host_name} no files structure found." + files_structure_list <- purrr::map(orgs, function(org) { + if (verbose) { + user_info <- if (!is.null(pattern)) { + glue::glue("Pulling files structure...[files matching pattern: '{pattern}']") + } else { + glue::glue("Pulling files structure...") + } + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = user_info + ) + } + type <- attr(org, "type") %||% "organization" + graphql_engine$get_files_structure_from_org( + org = org, + type = type, + repos = private$repos, + pattern = pattern, + depth = depth, + verbose = verbose, + progress = progress ) - ) + }) + names(files_structure_list) <- orgs + files_structure_list <- files_structure_list %>% + purrr::discard(~ length(.) == 0) + if (length(files_structure_list) == 0 && verbose) { + cli::cli_alert_warning( + cli::col_yellow( + "For {private$host_name} no files structure found." + ) + ) + } + return(files_structure_list) } - return(files_structure_list) }, # Pull files from host diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index 8ebbdcc5..e5370724 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -21,56 +21,6 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", if (verbose) { cli::cli_alert_success("Set connection to GitLab.") } - }, - - # Retrieve content of given text files from all repositories for a host in - # a table format. - get_files_content = function(file_path, - host_files_structure = NULL, - only_text_files = TRUE, - verbose = TRUE, - progress = TRUE) { - if (!private$scan_all && private$are_non_text_files(file_path, host_files_structure)) { - if (only_text_files) { - files_table <- private$get_files_content_from_orgs( - file_path = file_path, - host_files_structure = host_files_structure, - only_text_files = only_text_files, - verbose = verbose, - progress = progress - ) - } else { - text_files_table <- private$get_files_content_from_orgs( - file_path = file_path, - host_files_structure = host_files_structure, - only_text_files = TRUE, - verbose = verbose, - progress = progress - ) - non_text_files_table <- private$get_files_content_from_orgs_via_rest( - file_path = file_path, - host_files_structure = host_files_structure, - clean_files_content = FALSE, - only_non_text_files = TRUE, - verbose = verbose, - progress = progress - ) - files_table <- purrr::list_rbind( - list( - text_files_table, - non_text_files_table - ) - ) - } - } else { - files_table <- super$get_files_content( - file_path = file_path, - host_files_structure = host_files_structure, - verbose = verbose, - progress = progress - ) - } - return(files_table) } ), private = list( @@ -331,56 +281,82 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", } }, - # Pull files from orgs via rest - get_files_content_from_orgs_via_rest = function(file_path, - host_files_structure, - only_non_text_files, - clean_files_content, - verbose, - progress) { - rest_engine <- private$engines$rest - if (!is.null(host_files_structure)) { - if (verbose) { - cli::cli_alert_info(cli::col_green("I will make use of files structure stored in GitStats.")) - } - result <- private$get_orgs_and_repos_from_files_structure( - host_files_structure = host_files_structure + # Pull files content from organizations + get_files_content_from_repos = function(file_path, + verbose = TRUE, + progress = TRUE) { + if ("repo" %in% private$searching_scope) { + graphql_engine <- private$engines$graphql + orgs <- private$set_owner_type( + owners = names(private$orgs_repos) ) - orgs <- result$orgs - repos <- result$repos - } else { - orgs <- private$orgs - repos <- private$repos + files_table <- purrr::map(orgs, function(org) { + if (verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = glue::glue("Pulling files content: [{paste0(file_path, collapse = ', ')}]") + ) + } + type <- attr(org, "type") %||% "organization" + graphql_engine$get_files_from_org_per_repo( + org = org, + type = type, + repos = private$orgs_repos[[org]], + file_paths = file_path, + verbose = verbose, + progress = progress + ) |> + graphql_engine$prepare_files_table( + org = org, + file_path = file_path + ) + }) |> + purrr::list_rbind() |> + private$add_repo_api_url() + return(files_table) } + }, + + get_files_content_from_files_structure = function(host_files_structure, + verbose = TRUE, + progress = TRUE) { + graphql_engine <- private$engines$graphql if (verbose) { - user_msg <- if (!is.null(host_files_structure)) { - "Pulling files from files structure" - } else { - glue::glue("Pulling files content: [{paste0(file_path, collapse = ', ')}]") - } - show_message( - host = private$host_name, - engine = "rest", - information = user_msg + cli::cli_alert_info( + cli::col_green("I will make use of files structure stored in GitStats.") ) } + result <- private$get_orgs_and_repos_from_files_structure( + host_files_structure = host_files_structure + ) + orgs <- result$orgs + repos <- result$repos files_table <- purrr::map(orgs, function(org) { - if (!is.null(host_files_structure)) { - file_path <- host_files_structure[[org]] %>% unlist(use.names = FALSE) %>% unique() - } - if (only_non_text_files) { - file_path <- file_path[grepl(non_text_files_pattern, file_path)] + if (verbose) { + show_message( + host = private$host_name, + engine = "graphql", + scope = org, + information = "Pulling files from files structure" + ) } - files_table <- rest_engine$get_files( - file_paths = file_path, - clean_files_content = clean_files_content, - org = org, - verbose = FALSE, - progress = progress - ) %>% - rest_engine$prepare_files_table() - }, .progress = progress) %>% - purrr::list_rbind() %>% + type <- attr(org, "type") %||% "organization" + graphql_engine$get_files_from_org_per_repo( + org = org, + type = type, + repos = repos, + host_files_structure = host_files_structure, + verbose = verbose, + progress = progress + ) |> + graphql_engine$prepare_files_table( + org = org, + file_path = file_path + ) + }) |> + purrr::list_rbind() |> private$add_repo_api_url() return(files_table) } diff --git a/R/GitStats.R b/R/GitStats.R index a58518b8..58dd8070 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -194,14 +194,12 @@ GitStats <- R6::R6Class( get_files_content = function(file_path = NULL, use_files_structure = TRUE, - only_text_files = TRUE, cache = TRUE, verbose = TRUE, progress = verbose) { private$check_for_host() args_list <- list("file_path" = file_path, - "use_files_structure" = use_files_structure, - "only_text_files" = only_text_files) + "use_files_structure" = use_files_structure) trigger <- private$trigger_pulling( cache = cache, storage = "files", @@ -212,7 +210,6 @@ GitStats <- R6::R6Class( files <- private$get_files_content_from_hosts( file_path = file_path, use_files_structure = use_files_structure, - only_text_files = only_text_files, verbose = verbose, progress = progress ) %>% @@ -707,7 +704,6 @@ GitStats <- R6::R6Class( # Pull content of a text file in a table form get_files_content_from_hosts = function(file_path, use_files_structure, - only_text_files, verbose, progress) { purrr::map(private$hosts, function(host) { @@ -729,11 +725,10 @@ GitStats <- R6::R6Class( NULL } else { host$get_files_content( - file_path = file_path, + file_path = file_path, host_files_structure = host_files_structure, - only_text_files = only_text_files, - verbose = verbose, - progress = progress + verbose = verbose, + progress = progress ) } }) %>% @@ -1021,13 +1016,13 @@ GitStats <- R6::R6Class( print_orgs_and_repos = function() { orgs <- purrr::map(private$hosts, function(host) { host_priv <- environment(host$initialize)$private - if (host_priv$searching_scope == "org") { + if ("org" %in% host_priv$searching_scope) { orgs <- host_priv$orgs } }) repos <- purrr::map(private$hosts, function(host) { host_priv <- environment(host$initialize)$private - if (host_priv$searching_scope == "repo") { + if ("repo" %in% host_priv$searching_scope) { repos <- host_priv$repos_fullnames } }) diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index fb95e945..f858f05d 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -283,16 +283,15 @@ get_users <- function(gitstats, cache = TRUE, verbose = is_verbose(gitstats)) { gitstats$get_users( - logins = logins, - cache = cache, + logins = logins, + cache = cache, verbose = verbose ) } #' @title Get content of files #' @name get_files_content -#' @description Pull text files content for a given scope (orgs, repos or whole -#' git hosts). +#' @description Pulls content of text files. #' @param gitstats A GitStats object. #' @param file_path Optional. A standardized path to file(s) in repositories. #' May be a character vector if multiple files are to be pulled. If set to @@ -303,9 +302,6 @@ get_users <- function(gitstats, #' `get_files_structure()` function and kept in storage. If there is no #' `files_structure` in storage, an error will be returned. If `file_path` is #' defined, it will override `use_files_structure` parameter. -#' @param only_text_files A logical, `TRUE` by default. If set to `FALSE`, apart -#' from files with text content shows in table output also non-text files with -#' `NA` value for text content. #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last #' result from its storage. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing @@ -324,37 +320,35 @@ get_users <- function(gitstats, #' orgs = "mbtests" #' ) #' get_files_content( -#' gitstats_obj = my_gitstats, +#' gitstats = my_gitstats, #' file_path = c("LICENSE", "DESCRIPTION") #' ) #' #' # example with files structure #' files_structure <- get_files_structure( -#' gitstats_obj = my_gitstats, +#' gitstats = my_gitstats, #' pattern = "\\.Rmd", #' depth = 2L #' ) #' # get_files_content() will make use of pulled earlier files structure #' files_content <- get_files_content( -#' gitstats_obj = my_gitstats +#' gitstats = my_gitstats #' ) #' } #' @return A data.frame. #' @export get_files_content <- function(gitstats, - file_path = NULL, + file_path = NULL, use_files_structure = TRUE, - only_text_files = TRUE, - cache = TRUE, - verbose = is_verbose(gitstats), - progress = verbose) { + cache = TRUE, + verbose = is_verbose(gitstats), + progress = verbose) { gitstats$get_files_content( - file_path = file_path, + file_path = file_path, use_files_structure = use_files_structure, - only_text_files = only_text_files, - cache = cache, - verbose = verbose, - progress = progress + cache = cache, + verbose = verbose, + progress = progress ) } @@ -393,16 +387,16 @@ get_files_content <- function(gitstats, #' @return A list of vectors. #' @export get_files_structure <- function(gitstats, - pattern = NULL, - depth = Inf, - cache = TRUE, - verbose = is_verbose(gitstats), + pattern = NULL, + depth = Inf, + cache = TRUE, + verbose = is_verbose(gitstats), progress = verbose) { gitstats$get_files_structure( - pattern = pattern, - depth = depth, - cache = cache, - verbose = verbose, + pattern = pattern, + depth = depth, + cache = cache, + verbose = verbose, progress = progress ) } diff --git a/R/global.R b/R/global.R index df3db824..26726851 100644 --- a/R/global.R +++ b/R/global.R @@ -7,4 +7,7 @@ globalVariables(c( "contributors_n", "githost" )) -non_text_files_pattern <- "\\.(png||.jpg||.jpeg||.bmp||.gif||.tiff)$" +text_ext_files <- "\\.(txt|md|qmd|Rmd|markdown|yaml|yml|csv|json|xml|html|htm|css|js|r|py|sh|bat|ini|conf|log|sql|tsv|mdx)$" +no_ext_files <- "^[^\\.]+$" + +text_files_pattern <- paste0("(", text_ext_files, "|", no_ext_files, ")") diff --git a/inst/get_files_workflow.R b/inst/get_files_workflow.R index 8b7d2eff..6aa09d4d 100644 --- a/inst/get_files_workflow.R +++ b/inst/get_files_workflow.R @@ -13,6 +13,32 @@ get_files_content( file_path = c("LICENSE", "DESCRIPTION") ) +github_stats <- create_gitstats() |> + set_github_host( + orgs = c("r-world-devs"), + repos = "openpharma/DataFakeR" + ) |> + set_gitlab_host( + repos = "mbtests/graphql_tests" + ) + +github_stats + +get_files_content( + gitstats = github_stats, + file_path = "DESCRIPTION" +) + +datafaker_stats <- create_gitstats() |> + set_github_host( + repos = "openpharma/DataFakeR" + ) + +get_files_content( + gitstats = datafaker_stats, + file_path = "DESCRIPTION" +) + md_files_structure <- get_files_structure( gitstats = test_gitstats, pattern = "\\.md|.R", diff --git a/man/get_files_content.Rd b/man/get_files_content.Rd index 1aa1a830..f2acc20e 100644 --- a/man/get_files_content.Rd +++ b/man/get_files_content.Rd @@ -8,7 +8,6 @@ get_files_content( gitstats, file_path = NULL, use_files_structure = TRUE, - only_text_files = TRUE, cache = TRUE, verbose = is_verbose(gitstats), progress = verbose @@ -28,10 +27,6 @@ will try to pull data from \code{files_structure} (see below).} \code{files_structure} in storage, an error will be returned. If \code{file_path} is defined, it will override \code{use_files_structure} parameter.} -\item{only_text_files}{A logical, \code{TRUE} by default. If set to \code{FALSE}, apart -from files with text content shows in table output also non-text files with -\code{NA} value for text content.} - \item{cache}{A logical, if set to \code{TRUE} GitStats will retrieve the last result from its storage.} @@ -45,8 +40,7 @@ output is switched off.} A data.frame. } \description{ -Pull text files content for a given scope (orgs, repos or whole -git hosts). +Pulls content of text files. } \examples{ \dontrun{ @@ -60,19 +54,19 @@ git hosts). orgs = "mbtests" ) get_files_content( - gitstats_obj = my_gitstats, + gitstats = my_gitstats, file_path = c("LICENSE", "DESCRIPTION") ) # example with files structure files_structure <- get_files_structure( - gitstats_obj = my_gitstats, + gitstats = my_gitstats, pattern = "\\\\.Rmd", depth = 2L ) # get_files_content() will make use of pulled earlier files structure files_content <- get_files_content( - gitstats_obj = my_gitstats + gitstats = my_gitstats ) } } diff --git a/tests/testthat/_snaps/get_files_structure-GitHub.md b/tests/testthat/_snaps/get_files_structure-GitHub.md index 9a747432..1a29ddd2 100644 --- a/tests/testthat/_snaps/get_files_structure-GitHub.md +++ b/tests/testthat/_snaps/get_files_structure-GitHub.md @@ -8,11 +8,9 @@ # when files_structure is empty, appropriate message is returned Code - github_testhost_priv$get_files_structure_from_orgs(pattern = "\\.png", depth = 1L, + github_testhost_priv$get_files_structure_from_repos(pattern = "\\.png", depth = 1L, verbose = TRUE) Message - i [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling files structure...[files matching pattern: '\.png']... - i [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling files structure...[files matching pattern: '\.png']... ! For GitHub no files structure found. Output named list() @@ -20,7 +18,7 @@ # get_files_content makes use of files_structure Code - files_content <- github_testhost_priv$get_files_content_from_orgs(file_path = NULL, + files_content <- github_testhost_priv$get_files_content_from_files_structure( host_files_structure = test_mocker$use("gh_files_structure_from_orgs")) Message i I will make use of files structure stored in GitStats. diff --git a/tests/testthat/_snaps/get_files_structure-GitLab.md b/tests/testthat/_snaps/get_files_structure-GitLab.md index 342d0ead..6f4c26ad 100644 --- a/tests/testthat/_snaps/get_files_structure-GitLab.md +++ b/tests/testthat/_snaps/get_files_structure-GitLab.md @@ -8,7 +8,7 @@ # get_files_content makes use of files_structure Code - files_content <- gitlab_testhost_priv$get_files_content_from_orgs(file_path = NULL, + files_content <- gitlab_testhost_priv$get_files_content_from_files_structure( host_files_structure = test_mocker$use("gl_files_structure_from_orgs")) Message i I will make use of files structure stored in GitStats. diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R index e80d3e7f..8d7e1ee8 100644 --- a/tests/testthat/helper-fixtures.R +++ b/tests/testthat/helper-fixtures.R @@ -419,20 +419,6 @@ test_fixtures$gitlab_file_repo_response <- list( ) ) -test_fixtures$github_png_file_response <- list( - "data" = list( - "repository" = list( - "repo_id" = "01010101", - "repo_name" = "TestProject", - "repo_url" = "https://github.com/r-world-devs/GitStats", - "file" = list( - "text" = NULL, - "byteSize" = 50L - ) - ) - ) -) - test_fixtures$gitlab_search_response <- list( list( "basename" = "test", diff --git a/tests/testthat/test-get_files_content-GitHub.R b/tests/testthat/test-get_files_content-GitHub.R index ec9183c8..886eb3bb 100644 --- a/tests/testthat/test-get_files_content-GitHub.R +++ b/tests/testthat/test-get_files_content-GitHub.R @@ -47,23 +47,6 @@ test_that("GitHub GraphQL Engine pulls file response", { test_mocker$cache(github_file_response) }) -test_that("GitHub GraphQL Engine pulls png file response", { - mockery::stub( - test_graphql_github_priv$get_file_response, - "self$gql_response", - test_fixtures$github_png_file_response - ) - github_png_file_response <- test_graphql_github_priv$get_file_response( - org = "r-world-devs", - repo = "GitStats", - def_branch = "master", - file_path = "man/figures/logo.png", - files_query = test_mocker$use("gh_file_blobs_from_repo_query") - ) - expect_github_files_response(github_png_file_response) - test_mocker$cache(github_png_file_response) -}) - test_that("get_repositories_with_files works", { mockery::stub( test_graphql_github_priv$get_repositories_with_files, @@ -73,11 +56,10 @@ test_that("get_repositories_with_files works", { gh_repositories_with_files <- test_graphql_github_priv$get_repositories_with_files( repositories = c("test_repo_1", "test_repo_2", "test_repo_3", "test_repo_4", "test_repo_5"), def_branches = c("test_branch_1", "test_branch_2", "test_branch_3", "test_branch_4", "test_branch_5"), - org = "test-org", - file_paths = "test_files", - only_text_files = TRUE, + org = "test_org", + file_paths = "test_files.txt", host_files_structure = NULL, - progress = FALSE + progress = FALSE ) expect_type(gh_repositories_with_files, "list") test_mocker$cache(gh_repositories_with_files) @@ -95,13 +77,12 @@ test_that("GitHub GraphQL Engine pulls files from organization", { test_mocker$use("gh_repositories_with_files") ) github_files_response <- test_graphql_github$get_files_from_org( - org = "test_org", - repos = NULL, - file_paths = "test_files", - only_text_files = TRUE, + org = "test_org", + repos = NULL, + file_paths = "test_files.txt", host_files_structure = NULL, - verbose = FALSE, - progress = FALSE + verbose = FALSE, + progress = FALSE ) expect_github_files_response(github_files_response) test_mocker$cache(github_files_response) @@ -117,44 +98,59 @@ test_that("GitHubHost prepares table from files response", { test_mocker$cache(gh_files_table) }) -test_that("GitHubHost prepares table from files with no content", { - empty_files_response <- test_mocker$use("github_files_response") %>% - purrr::map(function(test_repo) { - test_repo$test_files$file$text <- NULL - return(test_repo) - }) - gh_empty_files_table <- test_graphql_github$prepare_files_table( - files_response = empty_files_response, - org = "test_org", - file_path = "test_files" +test_that("get_files_content_from_orgs for GitHub works", { + mockery::stub( + github_testhost_priv$get_files_content_from_orgs, + "graphql_engine$prepare_files_table", + test_mocker$use("gh_files_table") + ) + github_testhost_priv$searching_scope <- "org" + gh_files_from_orgs <- github_testhost_priv$get_files_content_from_orgs( + file_path = "DESCRIPTION", + verbose = FALSE ) - expect_files_table(gh_empty_files_table) - expect_true(all(is.na(gh_empty_files_table$file_content))) - test_mocker$cache(gh_empty_files_table) + expect_files_table( + gh_files_from_orgs, + with_cols = "api_url" + ) + test_mocker$cache(gh_files_from_orgs) }) -test_that("get_files_content_from_orgs for GitHub works", { +test_that("get_files_content_from_repos for GitHub works", { + test_org <- "test_org" + attr(test_org, "type") <- "organization" mockery::stub( - github_testhost_priv$get_files_content_from_orgs, + github_testhost_priv$get_files_content_from_repos, + "private$set_owner_type", + test_org + ) + mockery::stub( + github_testhost_priv$get_files_content_from_repos, "graphql_engine$prepare_files_table", test_mocker$use("gh_files_table") ) - gh_files_table <- github_testhost_priv$get_files_content_from_orgs( + github_testhost_priv$searching_scope <- "repo" + gh_files_from_repos <- github_testhost_priv$get_files_content_from_repos( file_path = "DESCRIPTION", verbose = FALSE ) expect_files_table( - gh_files_table, + gh_files_from_repos, with_cols = "api_url" ) - test_mocker$cache(gh_files_table) + test_mocker$cache(gh_files_from_repos) }) test_that("`get_files_content()` pulls files in the table format", { mockery::stub( github_testhost$get_files_content, "private$get_files_content_from_orgs", - test_mocker$use("gh_files_table") + test_mocker$use("gh_files_from_orgs") + ) + mockery::stub( + github_testhost$get_files_content, + "private$get_files_content_from_repos", + test_mocker$use("gh_files_from_repos") ) gh_files_table <- github_testhost$get_files_content( file_path = "DESCRIPTION" diff --git a/tests/testthat/test-get_files_content-GitLab.R b/tests/testthat/test-get_files_content-GitLab.R index 1c84606a..3aeeff65 100644 --- a/tests/testthat/test-get_files_content-GitLab.R +++ b/tests/testthat/test-get_files_content-GitLab.R @@ -78,15 +78,16 @@ test_that("GitLab GraphQL Engine pulls files from org by iterating over repos", "private$get_file_blobs_response", test_mocker$use("gl_file_blobs_response") ) - gl_files_from_org <- test_graphql_gitlab$get_files_from_org_per_repo( - org = "mbtests", - repos = "graphql_tests", + gl_files_from_org_per_repo <- test_graphql_gitlab$get_files_from_org_per_repo( + org = "test_org", + repos = "TestProject", file_paths = c("project_metadata.yaml", "README.md") ) expect_gitlab_files_from_org_by_repos_response( - response = gl_files_from_org, + response = gl_files_from_org_per_repo, expected_files = c("project_metadata.yaml", "README.md") ) + test_mocker$cache(gl_files_from_org_per_repo) }) test_that("is query error is FALSE when response is empty (non query error)", { @@ -111,11 +112,16 @@ test_that("Gitlab GraphQL switches to pulling files per repositories when query "private$is_complexity_error", TRUE ) + mockery::stub( + test_graphql_gitlab$get_files_from_org, + "self$get_files_from_org_per_repo", + test_mocker$use("gl_files_from_org_per_repo") + ) gitlab_files_response_by_repos <- test_graphql_gitlab$get_files_from_org( org = "mbtests", type = "organization", repos = NULL, - file_paths = c("DESCRIPTION", "project_metadata.yaml", "README.md"), + file_paths = c("project_metadata.yaml", "README.md"), host_files_structure = NULL, only_text_files = TRUE, verbose = FALSE, @@ -123,7 +129,7 @@ test_that("Gitlab GraphQL switches to pulling files per repositories when query ) expect_gitlab_files_from_org_by_repos_response( response = gitlab_files_response_by_repos, - expected_files = c("DESCRIPTION", "project_metadata.yaml", "README.md") + expected_files = c("project_metadata.yaml", "README.md") ) test_mocker$cache(gitlab_files_response_by_repos) }) @@ -144,8 +150,7 @@ test_that("checker properly identifies gitlab files responses", { test_that("GitLab prepares table from files response", { gl_files_table <- test_graphql_gitlab$prepare_files_table( files_response = test_mocker$use("gitlab_files_response"), - org = "mbtests", - file_path = "meta_data.yaml" + org = "mbtests" ) expect_files_table(gl_files_table) test_mocker$cache(gl_files_table) @@ -154,8 +159,7 @@ test_that("GitLab prepares table from files response", { test_that("GitLab prepares table from files response prepared in alternative way", { gl_files_table <- test_graphql_gitlab$prepare_files_table( files_response = test_mocker$use("gitlab_files_response_by_repos"), - org = "mbtests", - file_path = "meta_data.yaml" + org = "mbtests" ) expect_files_table(gl_files_table) }) @@ -177,16 +181,3 @@ test_that("get_files_content_from_orgs for GitLab works", { ) test_mocker$cache(gl_files_table) }) - -test_that("`get_files_content()` pulls files in the table format", { - mockery::stub( - gitlab_testhost$get_files_content, - "super$get_files_content", - test_mocker$use("gl_files_table") - ) - gl_files_table <- gitlab_testhost$get_files_content( - file_path = "README.md" - ) - expect_files_table(gl_files_table, with_cols = "api_url") - test_mocker$cache(gl_files_table) -}) diff --git a/tests/testthat/test-get_files_structure-GitHub.R b/tests/testthat/test-get_files_structure-GitHub.R index 51dfad59..ed0ba9f8 100644 --- a/tests/testthat/test-get_files_structure-GitHub.R +++ b/tests/testthat/test-get_files_structure-GitHub.R @@ -163,6 +163,7 @@ test_that("get_files_structure_from_orgs", { "graphql_engine$get_files_structure_from_org", test_mocker$use("gh_md_files_structure") ) + github_testhost_priv$searching_scope <- "org" gh_files_structure_from_orgs <- github_testhost_priv$get_files_structure_from_orgs( pattern = "\\.md|\\.qmd|\\.Rmd", depth = Inf, @@ -196,13 +197,14 @@ test_that("when files_structure is empty, appropriate message is returned", { mode = "private" ) mockery::stub( - github_testhost_priv$get_files_structure_from_orgs, + github_testhost_priv$get_files_structure_from_repos, "graphql_engine$get_files_structure_from_org", list() |> purrr::set_names() ) + github_testhost_priv$searching_scope <- "repo" expect_snapshot( - github_testhost_priv$get_files_structure_from_orgs( + github_testhost_priv$get_files_structure_from_repos( pattern = "\\.png", depth = 1L, verbose = TRUE @@ -249,13 +251,12 @@ test_that("get_files_structure pulls files structure for repositories in orgs", test_that("get_files_content makes use of files_structure", { mockery::stub( - github_testhost_priv$get_files_content_from_orgs, + github_testhost_priv$get_files_content_from_files_structure, "private$add_repo_api_url", test_mocker$use("gh_files_table") ) expect_snapshot( - files_content <- github_testhost_priv$get_files_content_from_orgs( - file_path = NULL, + files_content <- github_testhost_priv$get_files_content_from_files_structure( host_files_structure = test_mocker$use("gh_files_structure_from_orgs") ) ) diff --git a/tests/testthat/test-get_files_structure-GitLab.R b/tests/testthat/test-get_files_structure-GitLab.R index e2bf6450..1a8e3401 100644 --- a/tests/testthat/test-get_files_structure-GitLab.R +++ b/tests/testthat/test-get_files_structure-GitLab.R @@ -229,13 +229,12 @@ test_that("get_files_structure pulls files structure for repositories in orgs", test_that("get_files_content makes use of files_structure", { mockery::stub( - gitlab_testhost_priv$get_files_content_from_orgs, + gitlab_testhost_priv$get_files_content_from_files_structure, "private$add_repo_api_url", test_mocker$use("gl_files_table") ) expect_snapshot( - files_content <- gitlab_testhost_priv$get_files_content_from_orgs( - file_path = NULL, + files_content <- gitlab_testhost_priv$get_files_content_from_files_structure( host_files_structure = test_mocker$use("gl_files_structure_from_orgs") ) ) From 8eb451650aee0b4c15f2d246be9ff59fe57a02d6 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 17 Dec 2024 13:25:34 +0000 Subject: [PATCH 37/99] Fixes. --- R/EngineGraphQL.R | 5 +--- R/EngineGraphQLGitHub.R | 2 +- R/EngineGraphQLGitLab.R | 2 -- R/EngineRestGitHub.R | 29 +++++++++++++++++++ R/GitHost.R | 11 +++---- R/GitHostGitHub.R | 29 ------------------- R/GitHostGitLab.R | 6 ++-- R/gitstats_functions.R | 2 +- man/get_files_structure.Rd | 2 +- .../testthat/test-get_files_content-GitHub.R | 3 +- .../test-get_files_structure-GitHub.R | 1 - .../test-get_files_structure-GitLab.R | 1 - 12 files changed, 40 insertions(+), 53 deletions(-) diff --git a/R/EngineGraphQL.R b/R/EngineGraphQL.R index c462180d..99d7f7aa 100644 --- a/R/EngineGraphQL.R +++ b/R/EngineGraphQL.R @@ -75,7 +75,6 @@ EngineGraphQL <- R6::R6Class( }, get_path_from_files_structure = function(host_files_structure, - only_text_files, org, repo = NULL) { if (is.null(repo)) { @@ -85,9 +84,7 @@ EngineGraphQL <- R6::R6Class( } else { file_path <- host_files_structure[[org]][[repo]] } - if (only_text_files) { - file_path <- file_path[grepl(text_files_pattern, file_path)] - } + file_path <- file_path[grepl(text_files_pattern, file_path)] return(file_path) } ) diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index fd596d15..19220e49 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -202,7 +202,7 @@ EngineGraphQLGitHub <- R6::R6Class( }, # Prepare files table. - prepare_files_table = function(files_response, org, file_path) { + prepare_files_table = function(files_response, org) { if (!is.null(files_response)) { files_table <- purrr::map(files_response, function(repository) { purrr::imap(repository, function(file_data, file_name) { diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index 7b23c6e6..1c3e60f4 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -136,7 +136,6 @@ EngineGraphQLGitLab <- R6::R6Class( if (!is.null(host_files_structure)) { file_paths <- private$get_path_from_files_structure( host_files_structure = host_files_structure, - only_text_files = only_text_files, org = org ) } else if (only_text_files) { @@ -250,7 +249,6 @@ EngineGraphQLGitLab <- R6::R6Class( if (!is.null(host_files_structure)) { file_paths <- private$get_path_from_files_structure( host_files_structure = host_files_structure, - only_text_files = only_text_files, org = org, repo = repo ) diff --git a/R/EngineRestGitHub.R b/R/EngineRestGitHub.R index 86ec5e91..2b07e4eb 100644 --- a/R/EngineRestGitHub.R +++ b/R/EngineRestGitHub.R @@ -24,6 +24,28 @@ EngineRestGitHub <- R6::R6Class( return(files_list) }, + # Prepare files table from REST API. + prepare_files_table = function(files_list) { + files_table <- NULL + if (!is.null(files_list)) { + files_table <- purrr::map(files_list, function(file_data) { + repo_fullname <- private$get_repo_fullname(file_data$url) + org_repo <- stringr::str_split_1(repo_fullname, "/") + data.frame( + "repo_name" = org_repo[2], + "repo_id" = NA_character_, + "organization" = org_repo[1], + "file_path" = file_data$path, + "file_content" = file_data$content, + "file_size" = file_data$size, + "repo_url" = private$set_repo_url(file_data$url) + ) + }) %>% + purrr::list_rbind() + } + return(files_table) + }, + # Pulling repositories where code appears get_repos_by_code = function(code, org = NULL, @@ -302,6 +324,13 @@ EngineRestGitHub <- R6::R6Class( purrr::map(search_result, ~ self$response(.$url), .progress = glue::glue("Adding file [{filename}] info...")) %>% unique() + }, + + # Get repository full name + get_repo_fullname = function(file_url) { + stringr::str_remove_all(file_url, + paste0(private$endpoints$repositories, "/")) %>% + stringr::str_replace_all("/contents.*", "") } ) ) diff --git a/R/GitHost.R b/R/GitHost.R index 023f2a99..493bb4f5 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -963,8 +963,7 @@ GitHost <- R6::R6Class( progress = progress ) |> graphql_engine$prepare_files_table( - org = org, - file_path = file_path + org = org ) }) |> purrr::list_rbind() |> @@ -1001,8 +1000,7 @@ GitHost <- R6::R6Class( progress = progress ) |> graphql_engine$prepare_files_table( - org = org, - file_path = file_path + org = org ) }) |> purrr::list_rbind() |> @@ -1044,8 +1042,7 @@ GitHost <- R6::R6Class( progress = progress ) |> graphql_engine$prepare_files_table( - org = org, - file_path = file_path + org = org ) }) |> purrr::list_rbind() |> @@ -1170,7 +1167,7 @@ GitHost <- R6::R6Class( verbose = verbose, progress = progress ) %>% - private$prepare_files_table_from_rest() %>% + rest_engine$prepare_files_table() %>% private$add_repo_api_url() return(files_table) }, diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index 71e1c42a..84e0b1d7 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -252,35 +252,6 @@ GitHostGitHub <- R6::R6Class( return(repos_names) }, - # Prepare files table from REST API. - prepare_files_table_from_rest = function(files_list) { - files_table <- NULL - if (!is.null(files_list)) { - files_table <- purrr::map(files_list, function(file_data) { - repo_fullname <- private$get_repo_fullname(file_data$url) - org_repo <- stringr::str_split_1(repo_fullname, "/") - data.frame( - "repo_name" = org_repo[2], - "repo_id" = NA_character_, - "organization" = org_repo[1], - "file_path" = file_data$path, - "file_content" = file_data$content, - "file_size" = file_data$size, - "repo_url" = private$set_repo_url(file_data$url) - ) - }) %>% - purrr::list_rbind() - } - return(files_table) - }, - - # Get repository full name - get_repo_fullname = function(file_url) { - stringr::str_remove_all(file_url, - paste0(private$endpoints$repositories, "/")) %>% - stringr::str_replace_all("/contents.*", "") - }, - # Get repository url set_repo_url = function(repo_fullname) { paste0(private$endpoints$repositories, "/", repo_fullname) diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index e5370724..ff4f041d 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -309,8 +309,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", progress = progress ) |> graphql_engine$prepare_files_table( - org = org, - file_path = file_path + org = org ) }) |> purrr::list_rbind() |> @@ -352,8 +351,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", progress = progress ) |> graphql_engine$prepare_files_table( - org = org, - file_path = file_path + org = org ) }) |> purrr::list_rbind() |> diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index f858f05d..e38fce10 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -380,7 +380,7 @@ get_files_content <- function(gitstats, #' orgs = "mbtests" #' ) #' get_files_structure( -#' gitstats_obj = my_gitstats, +#' gitstats = my_gitstats, #' pattern = "\\.md" #' ) #' } diff --git a/man/get_files_structure.Rd b/man/get_files_structure.Rd index f02be215..db33b9a4 100644 --- a/man/get_files_structure.Rd +++ b/man/get_files_structure.Rd @@ -51,7 +51,7 @@ Pulls file structure for a given repository. orgs = "mbtests" ) get_files_structure( - gitstats_obj = my_gitstats, + gitstats = my_gitstats, pattern = "\\\\.md" ) } diff --git a/tests/testthat/test-get_files_content-GitHub.R b/tests/testthat/test-get_files_content-GitHub.R index 886eb3bb..54c896ce 100644 --- a/tests/testthat/test-get_files_content-GitHub.R +++ b/tests/testthat/test-get_files_content-GitHub.R @@ -91,8 +91,7 @@ test_that("GitHub GraphQL Engine pulls files from organization", { test_that("GitHubHost prepares table from files response", { gh_files_table <- test_graphql_github$prepare_files_table( files_response = test_mocker$use("github_files_response"), - org = "r-world-devs", - file_path = "LICENSE" + org = "r-world-devs" ) expect_files_table(gh_files_table) test_mocker$cache(gh_files_table) diff --git a/tests/testthat/test-get_files_structure-GitHub.R b/tests/testthat/test-get_files_structure-GitHub.R index ed0ba9f8..0c300350 100644 --- a/tests/testthat/test-get_files_structure-GitHub.R +++ b/tests/testthat/test-get_files_structure-GitHub.R @@ -220,7 +220,6 @@ test_that("get_path_from_files_structure gets file path from files structure", { test_graphql_github <- environment(test_graphql_github$initialize)$private file_path <- test_graphql_github$get_path_from_files_structure( host_files_structure = test_mocker$use("gh_files_structure_from_orgs"), - only_text_files = FALSE, org = "test_org", repo = "TestRepo" ) diff --git a/tests/testthat/test-get_files_structure-GitLab.R b/tests/testthat/test-get_files_structure-GitLab.R index 1a8e3401..64f08825 100644 --- a/tests/testthat/test-get_files_structure-GitLab.R +++ b/tests/testthat/test-get_files_structure-GitLab.R @@ -198,7 +198,6 @@ test_that("get_path_from_files_structure gets file path from files structure", { test_graphql_gitlab <- environment(test_graphql_gitlab$initialize)$private file_path <- test_graphql_gitlab$get_path_from_files_structure( host_files_structure = test_mocker$use("gl_files_structure_from_orgs"), - only_text_files = TRUE, org = "mbtests" # this will need fixing and repo parameter must come back ) expect_equal(typeof(file_path), "character") From 831b15bb5678617fa96dadff27ff3e52140b4d49 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 08:37:12 +0000 Subject: [PATCH 38/99] Lint. --- R/EngineGraphQLGitHub.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index 19220e49..64a52c1b 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -49,7 +49,7 @@ EngineGraphQLGitHub <- R6::R6Class( while (next_page) { repos_response <- private$get_repos_page( login = org, - type = type, + type = type, repo_cursor = repo_cursor ) repositories <- if (type == "organization") { From d6f271b05e6d850b412c417d78d416cbe49a7360 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 08:38:33 +0000 Subject: [PATCH 39/99] Fix printing ID of the GitLab repositories in table, style to make code prettier. --- R/EngineGraphQLGitLab.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index 1c3e60f4..a4010946 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -82,23 +82,24 @@ EngineGraphQLGitLab <- R6::R6Class( if (length(repos_list) > 0) { repos_table <- purrr::map(repos_list, function(repo) { repo <- repo$node - repo$default_branch <- repo$repository$rootRef %||% "" + repo[["repo_id"]] <- sub(".*/(\\d+)$", "\\1", repo$repo_id) + repo[["default_branch"]] <- repo$repository$rootRef %||% "" repo$repository <- NULL - repo$languages <- if (length(repo$languages) > 0) { + repo[["languages"]] <- if (length(repo$languages) > 0) { purrr::map_chr(repo$languages, ~ .$name) %>% paste0(collapse = ", ") } else { "" } - repo$created_at <- gts_to_posixt(repo$created_at) - repo$issues_open <- repo$issues$opened - repo$issues_closed <- repo$issues$closed + repo[["created_at"]] <- gts_to_posixt(repo$created_at) + repo[["issues_open"]] <- repo$issues$opened + repo[["issues_closed"]] <- repo$issues$closed repo$issues <- NULL - repo$last_activity_at <- as.POSIXct(repo$last_activity_at) - repo$organization <- repo$namespace$path + repo[["last_activity_at"]] <- as.POSIXct(repo$last_activity_at) + repo[["organization"]] <- repo$namespace$path repo$namespace <- NULL repo$repo_path <- NULL # temporary to close issue 338 - data.frame(repo) + return(data.frame(repo)) }) %>% purrr::list_rbind() %>% dplyr::relocate( From 22f077b9110bee381ba7b21a3831ce6e6c2b4023 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 08:40:37 +0000 Subject: [PATCH 40/99] Apply searching for code both by orgs and repositories. --- R/EngineRestGitHub.R | 131 ++++++++++++++++----- R/EngineRestGitLab.R | 100 ++++++++++++---- R/GitHost.R | 263 +++++++++++++++++++++++++++++++------------ R/GitHostGitHub.R | 6 +- R/GitHostGitLab.R | 5 +- R/GitStats.R | 17 ++- 6 files changed, 396 insertions(+), 126 deletions(-) diff --git a/R/EngineRestGitHub.R b/R/EngineRestGitHub.R index 2b07e4eb..0add30e4 100644 --- a/R/EngineRestGitHub.R +++ b/R/EngineRestGitHub.R @@ -48,33 +48,34 @@ EngineRestGitHub <- R6::R6Class( # Pulling repositories where code appears get_repos_by_code = function(code, - org = NULL, - filename = NULL, - in_path = FALSE, - output = "table_full", - verbose = TRUE, - progress = TRUE) { - user_query <- if (!is.null(org)) { - paste0('+user:', org) - } else { - '' - } - query <- if (!in_path) { - paste0('"', code, '"', user_query) - } else { - paste0('"', code, '"+in:path', user_query) - } - if (!is.null(filename)) { - query <- paste0(query, '+in:file+filename:', filename) + org = NULL, + repos = NULL, + filename = NULL, + in_path = FALSE, + output = "table_full", + verbose = TRUE, + progress = TRUE) { + if (!is.null(org)) { + search_result <- private$search_for_code( + code = code, + org = org, + filename = filename, + in_path = in_path, + verbose = verbose, + progress = progress + ) } - search_endpoint <- paste0(private$endpoints[["search"]], query) - if (verbose) cli::cli_alert_info("Searching for code [{code}]...") - total_n <- self$response(search_endpoint)[["total_count"]] - if (length(total_n) > 0) { - search_result <- private$search_response( - search_endpoint = search_endpoint, - total_n = total_n + if (!is.null(repos)) { + search_result <- private$search_repos_for_code( + code = code, + repos = repos, + filename = filename, + in_path = in_path, + verbose = verbose, + progress = progress ) + } + if (length(search_result) > 0) { if (output == "table_full" || output == "table_min") { search_output <- private$map_search_into_repos( search_response = search_result, @@ -129,8 +130,14 @@ EngineRestGitHub <- R6::R6Class( #' Pull all repositories URLS from organization get_repos_urls = function(type, org, repos) { - repos_response <- self$response( - endpoint = paste0(private$endpoints[["organizations"]], org, "/repos") + owner_type <- attr(org, "type") %||% "organization" + if (owner_type == "user") { + repo_endpoint <- paste0(private$endpoints[["users"]], org, "/repos") + } else { + repo_endpoint <- paste0(private$endpoints[["organizations"]], org, "/repos") + } + repos_response <- private$paginate_results( + endpoint = repo_endpoint ) if (!is.null(repos)) { repos_response <- repos_response %>% @@ -216,12 +223,82 @@ EngineRestGitHub <- R6::R6Class( self$rest_api_url, "/orgs/" ) + private$endpoints[["users"]] <- paste0( + self$rest_api_url, + "/users/" + ) private$endpoints[["repositories"]] <- paste0( self$rest_api_url, "/repos/" ) }, + search_for_code = function(code, + org, + filename, + in_path, + verbose, + progress) { + user_query <- if (!is.null(org)) { + paste0('+user:', org) + } else { + '' + } + query <- if (!in_path) { + paste0('"', code, '"', user_query) + } else { + paste0('"', code, '"+in:path', user_query) + } + if (!is.null(filename)) { + query <- paste0(query, '+in:file+filename:', filename) + } + search_endpoint <- paste0(private$endpoints[["search"]], query) + if (verbose) cli::cli_alert_info("Searching for code [{code}]...") + total_n <- self$response(search_endpoint)[["total_count"]] + search_result <- if (length(total_n) > 0) { + private$search_response( + search_endpoint = search_endpoint, + total_n = total_n + ) + } else { + list() + } + return(search_result) + }, + + search_repos_for_code = function(code, + repos, + filename, + in_path, + verbose, + progress) { + if (verbose) cli::cli_alert_info("Searching for code [{code}]...") + search_result <- purrr::map(repos, function(repo) { + repo_query <- paste0('+repo:', repo) + query <- if (!in_path) { + paste0('"', code, '"', repo_query) + } else { + paste0('"', code, '"+in:path', repo_query) + } + if (!is.null(filename)) { + query <- paste0(query, '+in:file+filename:', filename) + } + search_endpoint <- paste0(private$endpoints[["search"]], query) + total_n <- self$response(search_endpoint)[["total_count"]] + result <- if (length(total_n) > 0) { + private$search_response( + search_endpoint = search_endpoint, + total_n = total_n + ) + } else { + list() + } + return(result) + }) |> + purrr::list_flatten() + return(search_result) + }, + # A wrapper for proper pagination of GitHub search REST API # @param search_endpoint A character, a search endpoint # @param total_n Number of results diff --git a/R/EngineRestGitLab.R b/R/EngineRestGitLab.R index c7887a14..791e7b1e 100644 --- a/R/EngineRestGitLab.R +++ b/R/EngineRestGitLab.R @@ -62,19 +62,31 @@ EngineRestGitLab <- R6::R6Class( # filtering by language. For more information look here: # https://gitlab.com/gitlab-org/gitlab/-/issues/340333 get_repos_by_code = function(code, - org = NULL, + org = NULL, + repos = NULL, filename = NULL, - in_path = FALSE, - output = "table_full", - verbose = TRUE, + in_path = FALSE, + output = "table_full", + verbose = TRUE, progress = TRUE) { - search_response <- private$search_for_code( - code = code, - filename = filename, - in_path = in_path, - org = org, - verbose = verbose - ) + if (!is.null(org)) { + search_response <- private$search_for_code( + code = code, + filename = filename, + in_path = in_path, + org = utils::URLencode(org, reserved = TRUE), + verbose = verbose + ) + } + if (!is.null(repos)) { + search_response <- private$search_repos_for_code( + code = code, + filename = filename, + in_path = in_path, + repos = repos, + verbose = verbose + ) + } if (output == "raw") { search_output <- search_response } else if (output == "table_full" || output == "table_min") { @@ -319,21 +331,25 @@ EngineRestGitLab <- R6::R6Class( # Set search endpoint set_search_endpoint = function(org = NULL) { - groups_search <- if (!private$scan_all) { - private$set_groups_search_endpoint(org) + scope_endpoint <- if (!is.null(org)) { + paste0("/groups/", private$get_group_id(org)) } else { "" } - private$endpoints[["search"]] <- paste0( + paste0( self$rest_api_url, - groups_search, + scope_endpoint, "/search?scope=blobs&search=" ) }, - # set groups search endpoint - set_groups_search_endpoint = function(org) { - paste0("/groups/", private$get_group_id(org)) + set_projects_search_endpoint = function(repo) { + paste0( + self$rest_api_url, + "/projects/", + utils::URLencode(repo, reserved = TRUE), + "/search?scope=blobs&search=" + ) }, # Iterator over pulling pages of repositories. @@ -359,7 +375,7 @@ EngineRestGitLab <- R6::R6Class( page <- 1 still_more_hits <- TRUE full_repos_list <- list() - private$set_search_endpoint(org) + search_endpoint <- private$set_search_endpoint(org) if (verbose) cli::cli_alert_info("Searching for code [{code}]...") if (!in_path) { query <- paste0("%22", code, "%22") @@ -372,7 +388,7 @@ EngineRestGitLab <- R6::R6Class( while (still_more_hits | page < page_max) { search_result <- self$response( paste0( - private$endpoints[["search"]], + search_endpoint, query, "&per_page=100&page=", page @@ -389,11 +405,53 @@ EngineRestGitLab <- R6::R6Class( return(full_repos_list) }, + search_repos_for_code = function(code, + repos, + filename = NULL, + in_path = FALSE, + page_max = 1e6, + verbose = TRUE) { + if (verbose) cli::cli_alert_info("Searching for code [{code}]...") + if (!in_path) { + query <- paste0("%22", code, "%22") + } else { + query <- paste0("path:", code) + } + if (!is.null(filename)) { + query <- paste0(query, "%20filename:", filename) + } + search_response <- purrr::map(repos, function(repo) { + page <- 1 + still_more_hits <- TRUE + full_repos_list <- list() + search_endpoint <- private$set_projects_search_endpoint(repo) + while (still_more_hits | page < page_max) { + search_result <- self$response( + paste0( + search_endpoint, + query, + "&per_page=100&page=", + page + ) + ) + if (length(search_result) == 0) { + still_more_hits <- FALSE + break() + } else { + full_repos_list <- append(full_repos_list, search_result) + page <- page + 1 + } + } + return(full_repos_list) + }) |> + purrr::list_flatten() + return(search_response) + }, + # Parse search response into repositories output map_search_into_repos = function(search_response, progress) { repos_ids <- purrr::map_chr(search_response, ~ as.character(.$project_id)) %>% unique() - repos_list <- purrr::map(repos_ids, function(repo_id) { content <- self$response( endpoint = paste0(private$endpoints[["projects"]], repo_id) diff --git a/R/GitHost.R b/R/GitHost.R index 493bb4f5..3f6a496e 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -86,38 +86,32 @@ GitHost <- R6::R6Class( }, # Get repositories URLS from the Git host - get_repos_urls = function(type = "web", + get_repos_urls = function(type = "web", with_code = NULL, - in_files = NULL, + in_files = NULL, with_file = NULL, - verbose = TRUE, - progress = TRUE) { + verbose = TRUE, + progress = TRUE) { if (!is.null(with_code)) { - repo_urls <- private$get_repos_with_code( - code = with_code, + repo_urls <- private$get_repos_urls_with_code( + type = type, + code = with_code, in_files = in_files, - output = "raw", - verbose = verbose - ) %>% - private$get_repo_url_from_response( - type = type, - progress = progress - ) + verbose = verbose, + progress = progress + ) } else if (!is.null(with_file)) { - repo_urls <- private$get_repos_with_code( - code = with_file, + repo_urls <- private$get_repos_urls_with_code( + type = type, + code = with_file, in_path = TRUE, - output = "raw", - verbose = verbose - ) %>% - private$get_repo_url_from_response( - type = type, - progress = progress - ) + verbose = verbose, + progress = progress + ) } else { repo_urls <- private$get_all_repos_urls( - type = type, - verbose = verbose, + type = type, + verbose = verbose, progress = progress ) } @@ -453,7 +447,7 @@ GitHost <- R6::R6Class( repo <- NULL } return(repo) - }) %>% + }, .progress = verbose) %>% purrr::keep(~ length(.) > 0) %>% unlist() if (length(repos) == 0) { @@ -668,7 +662,7 @@ GitHost <- R6::R6Class( type = type ) |> graphql_engine$prepare_repos_table() |> - dplyr::filter(repo_name == private$orgs_repos[[org]]) + dplyr::filter(repo_name %in% private$orgs_repos[[org]]) return(repos_table) }, .progress = progress) |> purrr::list_rbind() @@ -677,11 +671,11 @@ GitHost <- R6::R6Class( # Pull repositories with specific code get_repos_with_code = function(code, - in_files = NULL, - in_path = FALSE, - output = "table_full", - verbose = TRUE, - progress = TRUE) { + in_files = NULL, + in_path = FALSE, + output = "table_full", + verbose = TRUE, + progress = TRUE) { if (private$scan_all) { repos_table <- private$get_repos_with_code_from_host( code = code, @@ -693,7 +687,15 @@ GitHost <- R6::R6Class( ) } if (!private$scan_all) { - repos_table <- private$get_repos_with_code_from_orgs( + repos_from_org <- private$get_repos_with_code_from_orgs( + code = code, + in_files = in_files, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress + ) + repos_from_repos <- private$get_repos_with_code_from_repos( code = code, in_files = in_files, in_path = in_path, @@ -701,6 +703,7 @@ GitHost <- R6::R6Class( verbose = verbose, progress = progress ) + repos_table <- rbind(repos_from_org, repos_from_repos) } return(repos_table) }, @@ -710,8 +713,8 @@ GitHost <- R6::R6Class( if (private$scan_all && is.null(private$orgs)) { if (verbose) { show_message( - host = private$host_name, - engine = "graphql", + host = private$host_name, + engine = "graphql", information = "Pulling all organizations" ) } @@ -741,15 +744,15 @@ GitHost <- R6::R6Class( org <- utils::URLdecode(org) if (!private$scan_all && verbose) { show_message( - host = private$host_name, - engine = "rest", - scope = org, + host = private$host_name, + engine = "rest", + scope = org, information = "Pulling repositories (URLs)" ) } repos_urls <- rest_engine$get_repos_urls( type = type, - org = org, + org = org, repos = NULL ) return(repos_urls) @@ -761,20 +764,21 @@ GitHost <- R6::R6Class( get_repos_urls_from_repos = function(type, verbose, progress) { if ("repo" %in% private$searching_scope) { rest_engine <- private$engines$rest - orgs <- names(private$orgs_repos) + orgs <- private$set_owner_type( + owners = names(private$orgs_repos) + ) repos_vector <- purrr::map(orgs, function(org) { - org <- utils::URLdecode(org) if (!private$scan_all && verbose) { show_message( - host = private$host_name, - engine = "rest", - scope = org, + host = private$host_name, + engine = "rest", + scope = org, information = "Pulling repositories (URLs)" ) } repos_urls <- rest_engine$get_repos_urls( type = type, - org = org, + org = org, repos = private$orgs_repos[[org]] ) return(repos_urls) @@ -847,66 +851,185 @@ GitHost <- R6::R6Class( output = "table_full", verbose = TRUE, progress = TRUE) { - repos_list <- purrr::map(private$orgs, function(org) { + if ("org" %in% private$searching_scope) { + repos_list <- purrr::map(private$orgs, function(org) { + if (verbose) { + show_message( + host = private$host_name, + engine = "rest", + scope = utils::URLdecode(org), + code = code, + information = "Pulling repositories" + ) + } + rest_engine <- private$engines$rest + if (is.null(in_files)) { + repos_response <- rest_engine$get_repos_by_code( + org = org, + code = code, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress + ) + } else { + repos_response <- purrr::map(in_files, function(filename) { + rest_engine$get_repos_by_code( + org = org, + code = code, + filename = filename, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress + ) + }) %>% + purrr::list_flatten() + } + if (output != "raw") { + repos_table <- repos_response %>% + rest_engine$tailor_repos_response( + output = output + ) %>% + rest_engine$prepare_repos_table( + output = output, + verbose = verbose + ) + if (output == "table_full") { + repos_table <- repos_table %>% + rest_engine$get_repos_issues( + progress = progress + ) + } + return(repos_table) + } else { + return(repos_response) + } + }, .progress = progress) + if (output != "raw") { + repos_output <- purrr::list_rbind(repos_list) + } else { + repos_output <- purrr::list_flatten(repos_list) + } + return(repos_output) + } + }, + + # Pull repositories with code from given organizations + get_repos_with_code_from_repos = function(code, + in_files = NULL, + in_path = FALSE, + output = "table_full", + verbose = TRUE, + progress = TRUE) { + orgs <- names(private$orgs_repos) + if ("repo" %in% private$searching_scope) { if (verbose) { show_message( host = private$host_name, engine = "rest", - scope = utils::URLdecode(org), + scope = utils::URLdecode(paste0(orgs, collapse = "|")), code = code, information = "Pulling repositories" ) } rest_engine <- private$engines$rest if (is.null(in_files)) { - repos_response <- rest_engine$get_repos_by_code( - org = org, - code = code, - in_path = in_path, - output = output, - verbose = verbose, + repos_output <- rest_engine$get_repos_by_code( + repos = private$repos_fullnames, + code = code, + in_path = in_path, + output = output, + verbose = verbose, progress = progress ) } else { - repos_response <- purrr::map(in_files, function(filename) { + repos_output <- purrr::map(in_files, function(filename) { rest_engine$get_repos_by_code( - org = org, - code = code, + repos = private$repos_fullnames, + code = code, filename = filename, - in_path = in_path, - output = output, - verbose = verbose, + in_path = in_path, + output = output, + verbose = verbose, progress = progress ) }) %>% purrr::list_flatten() } if (output != "raw") { - repos_table <- repos_response %>% + repos_output <- repos_output %>% rest_engine$tailor_repos_response( output = output ) %>% rest_engine$prepare_repos_table( - output = output, + output = output, verbose = verbose ) if (output == "table_full") { - repos_table <- repos_table %>% + repos_output <- repos_output %>% rest_engine$get_repos_issues( progress = progress ) } - return(repos_table) - } else { - return(repos_response) } - }, .progress = progress) - if (output != "raw") { - repos_output <- purrr::list_rbind(repos_list) - } else { - repos_output <- purrr::list_flatten(repos_list) + return(repos_output) } - return(repos_output) + }, + + get_repos_urls_with_code = function(type, + code, + in_files = NULL, + in_path = FALSE, + verbose, + progress) { + repos_urls_from_orgs <- private$get_repos_urls_with_code_from_orgs( + type = type, + code = code, + in_files = in_files, + in_path = in_path, + verbose = verbose, + progress = progress + ) + repos_urls_from_repos <- private$get_repos_urls_with_code_from_repos( + type = type, + code = code, + in_files = in_files, + in_path = in_path, + verbose = verbose, + progress = progress + ) + repos_urls <- c(repos_urls_from_orgs, repos_urls_from_repos) + return(repos_urls) + }, + + get_repos_urls_with_code_from_orgs = function(type, code, in_files, in_path, verbose, progress) { + private$get_repos_with_code_from_orgs( + code = code, + in_files = in_files, + in_path = in_path, + output = "raw", + verbose = verbose + ) |> + private$get_repo_url_from_response( + type = type, + progress = progress + ) + }, + + get_repos_urls_with_code_from_repos = function(type, code, in_files, in_path, verbose, progress) { + private$get_repos_with_code_from_repos( + code = code, + in_files = in_files, + in_path = in_path, + output = "raw", + verbose = verbose + ) |> + private$get_repo_url_from_response( + type = type, + repos_fullnames = private$repos_fullnames, + progress = progress + ) }, add_platform = function(repos_table) { diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index 84e0b1d7..f0ac14fc 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -155,7 +155,11 @@ GitHostGitHub <- R6::R6Class( }, # Get projects URL from search response - get_repo_url_from_response = function(search_response, type, progress = TRUE) { + get_repo_url_from_response = function(search_response, repos_fullnames = NULL, type, progress = TRUE) { + if (!is.null(repos_fullnames)) { + search_response <- search_response |> + purrr::keep(~ .$repository$full_name %in% repos_fullnames) + } purrr::map_vec(search_response, function(project) { if (type == "api") { project$repository$url diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index ff4f041d..4f2f0957 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -153,8 +153,8 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", }, # Get projects API URL from search response - get_repo_url_from_response = function(search_response, type, progress = TRUE) { - purrr::map_vec(search_response, function(response) { + get_repo_url_from_response = function(search_response, type, repos_fullnames = NULL, progress = TRUE) { + repo_urls <- purrr::map_vec(search_response, function(response) { api_url <- paste0(private$api_url, "/projects/", response$project_id) if (type == "api") { return(api_url) @@ -171,6 +171,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", } else { FALSE }) + return(repo_urls) }, get_commits_from_orgs = function(since, diff --git a/R/GitStats.R b/R/GitStats.R index 58dd8070..b4fa288e 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -114,14 +114,21 @@ GitStats <- R6::R6Class( with_files = with_files, verbose = verbose, progress = progress - ) %>% - private$set_object_class( + ) + if (!is.null(repos_urls)) { + repos_urls <- private$set_object_class( + object = repos_urls, class = "repos_urls", attr_list = args_list ) - private$save_to_storage( - table = repos_urls - ) + private$save_to_storage( + table = repos_urls + ) + } else if (verbose) { + cli::cli_alert_warning( + cli::col_yellow("No findings.") + ) + } } else { repos_urls <- private$get_from_storage( table = "repos_urls", From 99fded804a63d30b78a0d7e86342da8dc442a167 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 08:41:27 +0000 Subject: [PATCH 41/99] Fix printing GitLab org in repo table when it is a subgroup. Earlier it was printing only the parent group. --- R/GQLQueryGitLab.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/GQLQueryGitLab.R b/R/GQLQueryGitLab.R index 6f6270bf..fa3def62 100644 --- a/R/GQLQueryGitLab.R +++ b/R/GQLQueryGitLab.R @@ -238,7 +238,7 @@ GQLQueryGitLab <- R6::R6Class("GQLQueryGitLab", opened } namespace { - path + path: fullPath } repo_url: webUrl } From 58540b02458ba2784a8c44ddbb18990d50c1bf3f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 09:00:38 +0000 Subject: [PATCH 42/99] Update vignettes. --- vignettes/set_hosts.Rmd | 49 +++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/vignettes/set_hosts.Rmd b/vignettes/set_hosts.Rmd index 2e4d35c9..ec7386e8 100644 --- a/vignettes/set_hosts.Rmd +++ b/vignettes/set_hosts.Rmd @@ -21,40 +21,40 @@ To make `GitStats` work you need to set hosts after creating `gitstats`. You can set GitLab host with `set_gitlab_host()` and GitHub host with `set_github_host()` or both. -When setting hosts you need to take into account: +When setting hosts you need to consider: * Do you wish to connect to `private` or `public` hosts? -* What `scanning scope` do you wish to set? Do you want to scan specific `organizations` or `repositories` or maybe whole git platforms? +* What `scanning scope` do you wish to set? Do you want to scan specific `organizations` and/or `repositories` or maybe whole git platforms? * Do you have `tokens` set up and stored in your environment variables that grant you access to APIs? ## Public and private hosts -If you connect to public hosts you simply call `set_github_host()` or `set_gitlab_host()` function without specifying `host` parameter. +If you connect to **public** hosts you simply call `set_github_host()` or `set_gitlab_host()` function without specifying `host` parameter. ```{r, eval = FALSE} library(GitStats) -git_stats <- create_gitstats() %>% +git_stats <- create_gitstats() |> set_github_host( orgs = c("r-world-devs", "openpharma"), token = Sys.getenv("GITHUB_PAT") - ) %>% + ) |> set_gitlab_host( orgs = c("mbtests"), token = Sys.getenv("GITLAB_PAT_PUBLIC") ) ``` -If you wish to connect to internal GitHub or GitLab, you need to pass names of the hosts to `host` parameter. Remember also to have tokens set up properly for these hosts (on tokens read below). +If you wish to connect to **internal** GitHub or GitLab, you need to pass names of the hosts to `host` parameter. Remember also to have tokens set up properly for these hosts (on tokens read below). ```r -git_stats <- create_gitstats() %>% +git_stats <- create_gitstats() |> set_github_host( host = "github.internal.com", orgs = c("org_1", "org_2", "org_3"), token = Sys.getenv("YOUR_GITHUB_PAT") - ) %>% + ) |> set_gitlab_host( host = "internal.host.com", orgs = c("internal_org"), @@ -64,20 +64,20 @@ git_stats <- create_gitstats() %>% ### API versions -GitStats is configured to connect to GitHub API (version 3) and GitLab API (version 4). +`GitStats` is configured to connect to GitHub API (version 3) and GitLab API (version 4). ## Scanning scope -When setting hosts you choose what scanning scope of your GitStats will be: +When setting hosts you choose what scanning scope of your `GitStats` will be: * `organizations/groups` - in this case you need to pass character arguments (names of organizations (in case of GitHub) or groups (in case of GitLab)) to `orgs` parameter. ```{r, eval = FALSE} -git_stats <- create_gitstats() %>% +git_stats <- create_gitstats() |> set_github_host( orgs = c("r-world-devs", "openpharma"), token = Sys.getenv("GITHUB_PAT") - ) %>% + ) |> set_gitlab_host( orgs = c("mbtests"), token = Sys.getenv("GITLAB_PAT_PUBLIC") @@ -87,25 +87,36 @@ git_stats <- create_gitstats() %>% * `repositories` - in this case you need to pass full names of repositories (`{org_name}/{repo_name}`) to the `repos` parameter. ```{r, eval = FALSE} -git_stats <- create_gitstats() %>% +git_stats <- create_gitstats() |> set_github_host( repos = c("r-world-devs/GitStats", "r-world-devs/shinyCohortBuilder", "openpharma/DataFakeR"), token = Sys.getenv("GITHUB_PAT") - ) %>% + ) |> set_gitlab_host( repos = "mbtests/gitstatstesting", token = Sys.getenv("GITLAB_PAT_PUBLIC") ) ``` -* `whole hosts` - this is possible for the time being only in case of private hosts, as public ones are deemed to be too large. To set whole Git platform to be scanned just set hosts without specifying `orgs` or `repos`. On the other hand, remember that to connect with internal host, you need to pass argument to `host` parameter. +* `organizations/groups` and `repositories` - you can define both at the same time: + +```{r, eval = FALSE} +git_stats <- create_gitstats() |> + set_github_host( + orgs = "openpharma", + repos = c("r-world-devs/GitStats", "r-world-devs/shinyCohortBuilder"), + token = Sys.getenv("GITHUB_PAT") + ) +``` + +* `whole hosts` - this is possible for the time being only in case of private hosts, as public ones are deemed to be too large. To set whole Git platform to be scanned just set hosts **without specifying** `orgs` or `repos`. On the other hand, remember that to connect with internal host, you need to pass argument to `host` parameter. ```r -git_stats <- create_gitstats() %>% +git_stats <- create_gitstats() |> set_github_host( host = "github.internal.com", token = Sys.getenv("YOUR_GITHUB_PAT") - ) %>% + ) |> set_gitlab_host( host = "internal.host.com", token = Sys.getenv("YOUR_GITLAB_PAT") @@ -128,10 +139,10 @@ When creating tokens you will be asked to set access scopes of the tokens. For ` If you have your access tokens stored in environment variables with such names as `GITHUB_PAT` or `GITHUB_PAT_*` and `GITLAB_PAT` or `GITLAB_PAT_*` you do not need to specify them in `set_*_host()` functions, `GitStats` will automatically find them. ```{r, eval = FALSE} -git_stats <- create_gitstats() %>% +git_stats <- create_gitstats() |> set_github_host( orgs = c("r-world-devs", "openpharma") - ) %>% + ) |> set_gitlab_host( orgs = c("mbtests") ) From ef59a17f9e17331e5fbd36dd3c9aa661bf14ca0f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 10:19:11 +0000 Subject: [PATCH 43/99] Update workflow. --- inst/package_usage_workflow.R | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/inst/package_usage_workflow.R b/inst/package_usage_workflow.R index d7cf27e8..15db6f50 100644 --- a/inst/package_usage_workflow.R +++ b/inst/package_usage_workflow.R @@ -1,19 +1,36 @@ devtools::load_all(".") -test_gitstats <- create_gitstats() %>% +openpharma_stats <- create_gitstats() %>% set_github_host( orgs = "openpharma" ) -get_R_package_usage(test_gitstats, packages = "no_such_package") +get_R_package_usage(openpharma_stats, packages = "no_such_package") get_R_package_usage( - test_gitstats, + openpharma_stats, packages = c("purrr", "shiny") ) get_R_package_usage( - test_gitstats, + openpharma_stats, packages = c("dplyr", "shiny"), split_output = TRUE ) + +pharmaverse_stats <- create_gitstats() %>% + set_github_host( + orgs = "pharmaverse" + ) + +get_R_package_usage(pharmaverse_stats, + packages = c("purrr", "shiny")) + +rwd_stats <- create_gitstats() %>% + set_github_host( + repos = "openpharma/DataFakeR", + orgs = "r-world-devs" + ) + +get_R_package_usage(rwd_stats, + packages = c("purrr", "shiny")) From dd6321a1af6c7bed6c63eeb7c409742bb314ba89 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 10:19:44 +0000 Subject: [PATCH 44/99] Fix tests. --- tests/testthat/_snaps/01-get_repos-GitLab.md | 2 +- tests/testthat/test-01-get_repos-GitHub.R | 75 ++++++++++++++++---- tests/testthat/test-get_urls_repos-GitHub.R | 68 ++++++++++++++---- tests/testthat/test-get_urls_repos-GitLab.R | 55 ++++++++++++-- 4 files changed, 170 insertions(+), 30 deletions(-) diff --git a/tests/testthat/_snaps/01-get_repos-GitLab.md b/tests/testthat/_snaps/01-get_repos-GitLab.md index 632c8019..8b4158e8 100644 --- a/tests/testthat/_snaps/01-get_repos-GitLab.md +++ b/tests/testthat/_snaps/01-get_repos-GitLab.md @@ -3,5 +3,5 @@ Code gl_repos_by_org_query Output - [1] "\n query GetReposByOrg($org: ID! $repo_cursor: String!) {\n group(fullPath: $org) {\n projects(first: 100 after: $repo_cursor) {\n \n count\n pageInfo {\n hasNextPage\n endCursor\n }\n edges {\n node {\n repo_id: id\n repo_name: name\n repo_path: path\n ... on Project {\n repository {\n rootRef\n }\n }\n stars: starCount\n forks: forksCount\n created_at: createdAt\n last_activity_at: lastActivityAt\n languages {\n name\n }\n issues: issueStatusCounts {\n all\n closed\n opened\n }\n namespace {\n path\n }\n repo_url: webUrl\n }\n }\n }\n }\n }" + [1] "\n query GetReposByOrg($org: ID! $repo_cursor: String!) {\n group(fullPath: $org) {\n projects(first: 100 after: $repo_cursor) {\n \n count\n pageInfo {\n hasNextPage\n endCursor\n }\n edges {\n node {\n repo_id: id\n repo_name: name\n repo_path: path\n ... on Project {\n repository {\n rootRef\n }\n }\n stars: starCount\n forks: forksCount\n created_at: createdAt\n last_activity_at: lastActivityAt\n languages {\n name\n }\n issues: issueStatusCounts {\n all\n closed\n opened\n }\n namespace {\n path: fullPath\n }\n repo_url: webUrl\n }\n }\n }\n }\n }" diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index 6c01c398..54afe647 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -128,48 +128,73 @@ test_that("Mapping search result to repositories works", { test_mocker$cache(gh_mapped_repos) }) -test_that("`get_repos_by_code()` returns repos output for code search in files", { +test_that("`search_for_code()` returns repos output for code search in files", { mockery::stub( - test_rest_github$get_repos_by_code, + test_rest_github_priv$search_for_code, "self$response", list("total_count" = 3L) ) mockery::stub( - test_rest_github$get_repos_by_code, + test_rest_github_priv$search_for_code, "private$search_response", test_fixtures$github_search_response ) mockery::stub( - test_rest_github$get_repos_by_code, + test_rest_github_priv$search_for_code, "private$map_search_into_repos", test_mocker$use("gh_mapped_repos") ) - gh_repos_by_code <- test_rest_github$get_repos_by_code( + gh_search_for_code <- test_rest_github_priv$search_for_code( code = "test_code", filename = "test_file", + in_path = FALSE, org = "test_org", - verbose = FALSE + verbose = FALSE, + progress = FALSE ) - expect_gh_repos_rest_response(gh_repos_by_code) - test_mocker$cache(gh_repos_by_code) + expect_gh_search_response(gh_search_for_code$items) + test_mocker$cache(gh_search_for_code) }) -test_that("`get_repos_by_code()` for GitHub prepares a raw search response", { +test_that("`search_repos_for_code()` returns repos output for code search in files", { mockery::stub( - test_rest_github$get_repos_by_code, + test_rest_github_priv$search_repos_for_code, "self$response", list("total_count" = 3L) ) mockery::stub( - test_rest_github$get_repos_by_code, + test_rest_github_priv$search_repos_for_code, "private$search_response", test_fixtures$github_search_response ) mockery::stub( - test_rest_github$get_repos_by_code, + test_rest_github_priv$search_repos_for_code, "private$map_search_into_repos", test_mocker$use("gh_mapped_repos") ) + gh_search_repos_for_code <- test_rest_github_priv$search_repos_for_code( + code = "test_code", + filename = "test_file", + in_path = FALSE, + repos = c("TestRepo", "TestRepo1"), + verbose = FALSE, + progress = FALSE + ) + expect_gh_search_response(gh_search_repos_for_code$items) + test_mocker$cache(gh_search_repos_for_code) +}) + +test_that("`get_repos_by_code()` for GitHub prepares a raw search response", { + mockery::stub( + test_rest_github$get_repos_by_code, + "private$search_for_code", + test_mocker$use("gh_search_for_code") + ) + mockery::stub( + test_rest_github$get_repos_by_code, + "private$search_repos_for_code", + test_mocker$use("gh_search_repos_for_code") + ) gh_repos_by_code_raw <- test_rest_github$get_repos_by_code( code = "test_code", org = "test_org", @@ -180,6 +205,32 @@ test_that("`get_repos_by_code()` for GitHub prepares a raw search response", { test_mocker$cache(gh_repos_by_code_raw) }) +test_that("`get_repos_by_code()` for GitHub prepares a repository output", { + mockery::stub( + test_rest_github$get_repos_by_code, + "private$search_for_code", + test_mocker$use("gh_search_for_code") + ) + mockery::stub( + test_rest_github$get_repos_by_code, + "private$search_repos_for_code", + test_mocker$use("gh_search_repos_for_code") + ) + mockery::stub( + test_rest_github$get_repos_by_code, + "private$map_search_into_repos", + test_mocker$use("gh_mapped_repos") + ) + gh_repos_by_code <- test_rest_github$get_repos_by_code( + code = "test_code", + org = "test_org", + output = "table_min", + verbose = FALSE + ) + expect_gh_repos_rest_response(gh_repos_by_code) + test_mocker$cache(gh_repos_by_code) +}) + test_that("GitHub tailors precisely `repos_list`", { gh_repos_by_code <- test_mocker$use("gh_repos_by_code") gh_repos_by_code_tailored <- diff --git a/tests/testthat/test-get_urls_repos-GitHub.R b/tests/testthat/test-get_urls_repos-GitHub.R index a0f379b3..72c0a3ab 100644 --- a/tests/testthat/test-get_urls_repos-GitHub.R +++ b/tests/testthat/test-get_urls_repos-GitHub.R @@ -1,12 +1,12 @@ test_that("get_repos_urls() works for whole orgs", { mockery::stub( test_rest_github$get_repos_urls, - "self$response", + "private$paginate_results", test_fixtures$github_repositories_rest_response ) gh_repos_urls <- test_rest_github$get_repos_urls( type = "web", - org = "test-org", + org = "test_org", repos = NULL ) expect_length( @@ -19,12 +19,12 @@ test_that("get_repos_urls() works for whole orgs", { test_that("get_repos_urls() works for individual repos", { mockery::stub( test_rest_github$get_repos_urls, - "self$response", + "private$paginate_results", test_fixtures$github_repositories_rest_response ) gh_repos_urls <- test_rest_github$get_repos_urls( type = "web", - org = "test-org", + org = "test_org", repos = c("testRepo", "testRepo2") ) expect_length( @@ -35,14 +35,13 @@ test_that("get_repos_urls() works for individual repos", { }) test_that("get_repos_urls prepares api repo_urls vector", { - github_testhost_priv <- create_github_testhost(orgs = "test-org", - mode = "private") mockery::stub( test_rest_github$get_repos_urls, - "self$response", + "private$paginate_results", test_fixtures$github_repositories_rest_response ) gh_api_repos_urls <- test_rest_github$get_repos_urls( + org = "test_org", repos = NULL, type = "api" ) @@ -136,17 +135,62 @@ test_that("get_repo_url_from_response retrieves repositories URLS", { test_mocker$cache(gh_repo_web_urls) }) -test_that("get_repos_urls returns repositories URLS", { +test_that("get_repos_urls_with_code_from_orgs returns repositories URLS", { mockery::stub( - github_testhost$get_repos_urls, + github_testhost_priv$get_repos_urls_with_code_from_orgs, "private$get_repo_url_from_response", test_mocker$use("gh_repo_web_urls") ) - gh_repos_urls_with_code_in_files <- github_testhost$get_repos_urls( + gh_repos_urls_with_code_from_orgs <- github_testhost_priv$get_repos_urls_with_code_from_orgs( type = "web", - with_code = "shiny", + code = "shiny", in_files = "DESCRIPTION", - verbose = FALSE + in_path = FALSE, + verbose = FALSE, + progress= FALSE + ) + expect_type(gh_repos_urls_with_code_from_orgs, "character") + expect_gt(length(gh_repos_urls_with_code_from_orgs), 0) + test_mocker$cache(gh_repos_urls_with_code_from_orgs) +}) + +test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { + mockery::stub( + github_testhost_priv$get_repos_urls_with_code_from_repos, + "private$get_repo_url_from_response", + test_mocker$use("gh_repo_web_urls") + ) + gh_repos_urls_with_code_from_repos <- github_testhost_priv$get_repos_urls_with_code_from_repos( + type = "web", + code = "shiny", + in_files = "DESCRIPTION", + in_path = FALSE, + verbose = FALSE, + progress = FALSE + ) + expect_type(gh_repos_urls_with_code_from_repos, "character") + expect_gt(length(gh_repos_urls_with_code_from_repos), 0) + test_mocker$cache(gh_repos_urls_with_code_from_repos) +}) + +test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { + mockery::stub( + github_testhost_priv$get_repos_urls_with_code, + "private$get_repos_urls_with_code_from_orgs", + test_mocker$use("gh_repos_urls_with_code_from_orgs") + ) + mockery::stub( + github_testhost_priv$get_repos_urls_with_code, + "private$get_repos_urls_with_code_from_repos", + test_mocker$use("gh_repos_urls_with_code_from_repos") + ) + gh_repos_urls_with_code_in_files <- github_testhost_priv$get_repos_urls_with_code( + type = "web", + code = "shiny", + in_files = "DESCRIPTION", + in_path = FALSE, + verbose = FALSE, + progress = FALSE ) expect_type(gh_repos_urls_with_code_in_files, "character") expect_gt(length(gh_repos_urls_with_code_in_files), 0) diff --git a/tests/testthat/test-get_urls_repos-GitLab.R b/tests/testthat/test-get_urls_repos-GitLab.R index 17d4589b..91fb72dc 100644 --- a/tests/testthat/test-get_urls_repos-GitLab.R +++ b/tests/testthat/test-get_urls_repos-GitLab.R @@ -121,17 +121,62 @@ test_that("`get_repo_url_from_response()` works", { test_mocker$cache(gl_repo_web_urls) }) -test_that("get_repos_urls returns repositories URLS", { +test_that("get_repos_urls_with_code_from_orgs returns repositories URLS", { mockery::stub( - gitlab_testhost$get_repos_urls, + gitlab_testhost_priv$get_repos_urls_with_code_from_orgs, "private$get_repo_url_from_response", test_mocker$use("gl_repo_web_urls") ) - gl_repos_urls_with_code_in_files <- gitlab_testhost$get_repos_urls( + gl_repos_urls_with_code_from_orgs <- gitlab_testhost_priv$get_repos_urls_with_code_from_orgs( type = "web", - with_code = "shiny", + code = "shiny", in_files = "DESCRIPTION", - verbose = FALSE + in_path = FALSE, + verbose = FALSE, + progress = FALSE + ) + expect_type(gl_repos_urls_with_code_from_orgs, "character") + expect_gt(length(gl_repos_urls_with_code_from_orgs), 0) + test_mocker$cache(gl_repos_urls_with_code_from_orgs) +}) + +test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { + mockery::stub( + gitlab_testhost_priv$get_repos_urls_with_code_from_repos, + "private$get_repo_url_from_response", + test_mocker$use("gl_repo_web_urls") + ) + gl_repos_urls_with_code_from_repos <- gitlab_testhost_priv$get_repos_urls_with_code_from_repos( + type = "web", + code = "shiny", + in_files = "DESCRIPTION", + in_path = FALSE, + verbose = FALSE, + progress = FALSE + ) + expect_type(gl_repos_urls_with_code_from_repos, "character") + expect_gt(length(gl_repos_urls_with_code_from_repos), 0) + test_mocker$cache(gl_repos_urls_with_code_from_repos) +}) + +test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { + mockery::stub( + gitlab_testhost_priv$get_repos_urls_with_code, + "private$get_repos_urls_with_code_from_orgs", + test_mocker$use("gl_repos_urls_with_code_from_orgs") + ) + mockery::stub( + gitlab_testhost_priv$get_repos_urls_with_code, + "private$get_repos_urls_with_code_from_repos", + test_mocker$use("gl_repos_urls_with_code_from_repos") + ) + gl_repos_urls_with_code_in_files <- gitlab_testhost_priv$get_repos_urls_with_code( + type = "web", + code = "shiny", + in_files = "DESCRIPTION", + in_path = FALSE, + verbose = FALSE, + progress = FALSE ) expect_type(gl_repos_urls_with_code_in_files, "character") expect_gt(length(gl_repos_urls_with_code_in_files), 0) From 200669dfbd571e4314a895da9cb02c9d95b3656a Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 11:06:24 +0000 Subject: [PATCH 45/99] Add test. --- tests/testthat/test-get_urls_repos-GitHub.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/testthat/test-get_urls_repos-GitHub.R b/tests/testthat/test-get_urls_repos-GitHub.R index 72c0a3ab..8d6e95f2 100644 --- a/tests/testthat/test-get_urls_repos-GitHub.R +++ b/tests/testthat/test-get_urls_repos-GitHub.R @@ -196,3 +196,22 @@ test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { expect_gt(length(gh_repos_urls_with_code_in_files), 0) test_mocker$cache(gh_repos_urls_with_code_in_files) }) + +test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { + mockery::stub( + github_testhost$get_repos_urls, + "private$get_repos_urls_with_code", + test_mocker$use("gh_repos_urls_with_code_in_files") + ) + gh_repos_urls_with_code_in_files <- github_testhost$get_repos_urls( + type = "web", + with_code = "shiny", + in_files = "DESCRIPTION", + with_file = NULL, + verbose = FALSE, + progress = FALSE + ) + expect_type(gh_repos_urls_with_code_in_files, "character") + expect_gt(length(gh_repos_urls_with_code_in_files), 0) + test_mocker$cache(gh_repos_urls_with_code_in_files) +}) From d2b243766e2d600085b97d32c2643baaab9e1c73 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 11:24:45 +0000 Subject: [PATCH 46/99] Add test to fix coverage. --- tests/testthat/test-01-get_repos-GitHub.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index 54afe647..0e34136e 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -637,5 +637,16 @@ test_that("`get_repos()` pulls table in minimalist version", { repo_cols = repo_min_colnames, with_cols = c("api_url", "platform") ) + gh_repos_table_min <- github_testhost$get_repos( + add_contributors = FALSE, + with_file = "test_file", + output = "table_min", + verbose = FALSE + ) + expect_repos_table( + gh_repos_table_min, + repo_cols = repo_min_colnames, + with_cols = c("api_url", "platform") + ) test_mocker$cache(gh_repos_table_min) }) From 59a34d2309a744b4fba2afc66fd0d585e5dfc41a Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 11:56:53 +0000 Subject: [PATCH 47/99] Add tests to meet coverage threshold. --- tests/testthat/_snaps/01-get_repos-GitLab.md | 14 +++++++ .../_snaps/get_files_structure-GitHub.md | 5 +++ tests/testthat/_snaps/helpers.md | 8 ++++ tests/testthat/test-01-get_repos-GitLab.R | 18 +++++++++ .../test-get_files_structure-GitHub.R | 11 ++++++ tests/testthat/test-get_urls_repos-GitHub.R | 37 ++++++++++++++++++- tests/testthat/test-helpers.R | 10 +++++ 7 files changed, 101 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/01-get_repos-GitLab.md b/tests/testthat/_snaps/01-get_repos-GitLab.md index 8b4158e8..9e412a4f 100644 --- a/tests/testthat/_snaps/01-get_repos-GitLab.md +++ b/tests/testthat/_snaps/01-get_repos-GitLab.md @@ -5,3 +5,17 @@ Output [1] "\n query GetReposByOrg($org: ID! $repo_cursor: String!) {\n group(fullPath: $org) {\n projects(first: 100 after: $repo_cursor) {\n \n count\n pageInfo {\n hasNextPage\n endCursor\n }\n edges {\n node {\n repo_id: id\n repo_name: name\n repo_path: path\n ... on Project {\n repository {\n rootRef\n }\n }\n stars: starCount\n forks: forksCount\n created_at: createdAt\n last_activity_at: lastActivityAt\n languages {\n name\n }\n issues: issueStatusCounts {\n all\n closed\n opened\n }\n namespace {\n path: fullPath\n }\n repo_url: webUrl\n }\n }\n }\n }\n }" +# get_repos_from_org prints proper message + + Code + gl_repos_from_orgs <- gitlab_testhost_priv$get_repos_from_orgs(verbose = TRUE, + progress = FALSE) + Message + i [Host:GitLab][Engine:GraphQl][Scope:mbtests] Pulling repositories... + +# get_repos_from_repos prints proper message + + Code + gl_repos_from_repos <- gitlab_testhost_priv$get_repos_from_repos(verbose = TRUE, + progress = FALSE) + diff --git a/tests/testthat/_snaps/get_files_structure-GitHub.md b/tests/testthat/_snaps/get_files_structure-GitHub.md index 1a29ddd2..e00cecfe 100644 --- a/tests/testthat/_snaps/get_files_structure-GitHub.md +++ b/tests/testthat/_snaps/get_files_structure-GitHub.md @@ -15,6 +15,11 @@ Output named list() +# get_files_structure aborts when scope to scan whole host + + x This feature is not applicable to scan whole Git Host (time consuming). + i Set `orgs` or `repos` arguments in `set_*_host()` if you wish to run this function. + # get_files_content makes use of files_structure Code diff --git a/tests/testthat/_snaps/helpers.md b/tests/testthat/_snaps/helpers.md index d1167c04..896199b3 100644 --- a/tests/testthat/_snaps/helpers.md +++ b/tests/testthat/_snaps/helpers.md @@ -10,6 +10,14 @@ gitlab_testhost_priv$set_searching_scope(orgs = NULL, repos = "mbtests/GitStatsTesting", verbose = TRUE) +# `set_searching_scope` sets scope to whole host + + Code + gitlab_testhost_priv$set_searching_scope(orgs = NULL, repos = NULL, verbose = TRUE) + Message + i No `orgs` nor `repos` specified. + i Searching scope set to [all]. + # When token is empty throw error Code diff --git a/tests/testthat/test-01-get_repos-GitLab.R b/tests/testthat/test-01-get_repos-GitLab.R index fffd5153..f4c40890 100644 --- a/tests/testthat/test-01-get_repos-GitLab.R +++ b/tests/testthat/test-01-get_repos-GitLab.R @@ -158,6 +158,24 @@ test_that("`prepare_repos_table()` prepares repos table", { test_mocker$cache(gl_repos_table) }) +test_that("get_repos_from_org prints proper message", { + mockery::stub( + gitlab_testhost_priv$get_repos_from_orgs, + "graphql_engine$prepare_repos_table", + test_mocker$use("gl_repos_table") + ) + expect_snapshot( + gl_repos_from_orgs <- gitlab_testhost_priv$get_repos_from_orgs( + verbose = TRUE, + progress = FALSE + ) + ) + expect_repos_table( + gl_repos_from_orgs + ) + test_mocker$cache(gl_repos_from_orgs) +}) + test_that("GitHost adds `repo_api_url` column to GitLab repos table", { repos_table <- test_mocker$use("gl_repos_table") gl_repos_table_with_api_url <- gitlab_testhost_priv$add_repo_api_url(repos_table) diff --git a/tests/testthat/test-get_files_structure-GitHub.R b/tests/testthat/test-get_files_structure-GitHub.R index 0c300350..cc6fc82c 100644 --- a/tests/testthat/test-get_files_structure-GitHub.R +++ b/tests/testthat/test-get_files_structure-GitHub.R @@ -248,6 +248,17 @@ test_that("get_files_structure pulls files structure for repositories in orgs", test_mocker$cache(gh_files_structure_from_orgs) }) +test_that("get_files_structure aborts when scope to scan whole host", { + github_testhost$.__enclos_env__$private$scan_all <- TRUE + expect_snapshot_error( + github_testhost$get_files_structure( + pattern = "\\.md|\\.qmd", + depth = 1L, + verbose = FALSE + ) + ) +}) + test_that("get_files_content makes use of files_structure", { mockery::stub( github_testhost_priv$get_files_content_from_files_structure, diff --git a/tests/testthat/test-get_urls_repos-GitHub.R b/tests/testthat/test-get_urls_repos-GitHub.R index 8d6e95f2..613fe956 100644 --- a/tests/testthat/test-get_urls_repos-GitHub.R +++ b/tests/testthat/test-get_urls_repos-GitHub.R @@ -173,7 +173,7 @@ test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { test_mocker$cache(gh_repos_urls_with_code_from_repos) }) -test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { +test_that("get_repos_urls_with_code returns repositories URLS", { mockery::stub( github_testhost_priv$get_repos_urls_with_code, "private$get_repos_urls_with_code_from_orgs", @@ -197,7 +197,7 @@ test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { test_mocker$cache(gh_repos_urls_with_code_in_files) }) -test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { +test_that("get_repos_urls returns repositories URLS", { mockery::stub( github_testhost$get_repos_urls, "private$get_repos_urls_with_code", @@ -215,3 +215,36 @@ test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { expect_gt(length(gh_repos_urls_with_code_in_files), 0) test_mocker$cache(gh_repos_urls_with_code_in_files) }) + +test_that("get_repos_urls returns repositories URLS", { + mockery::stub( + github_testhost$get_repos_urls, + "private$get_repos_urls_with_code", + test_mocker$use("gh_repos_urls_with_code_in_files") + ) + gh_repos_urls_with_code_in_files <- github_testhost$get_repos_urls( + type = "web", + with_file = "DESCRIPTION", + verbose = FALSE, + progress = FALSE + ) + expect_type(gh_repos_urls_with_code_in_files, "character") + expect_gt(length(gh_repos_urls_with_code_in_files), 0) + test_mocker$cache(gh_repos_urls_with_code_in_files) +}) + +test_that("get_repos_urls_with_code_from_repos returns repositories URLS", { + mockery::stub( + github_testhost$get_repos_urls, + "private$get_all_repos_urls", + test_mocker$use("gh_repos_urls") + ) + gh_repos_urls <- github_testhost$get_repos_urls( + type = "web", + verbose = FALSE, + progress = FALSE + ) + expect_type(gh_repos_urls, "character") + expect_gt(length(gh_repos_urls), 0) + test_mocker$cache(gh_repos_urls) +}) diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index d95052ac..827bcd41 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -7,6 +7,16 @@ test_that("`set_searching_scope` does not throw error when `orgs` or `repos` are ) }) +test_that("`set_searching_scope` sets scope to whole host", { + gitlab_testhost_priv$is_public <- FALSE + expect_snapshot( + gitlab_testhost_priv$set_searching_scope(orgs = NULL, repos = NULL, verbose = TRUE) + ) + expect_true( + gitlab_testhost_priv$scan_all + ) +}) + test_that("`extract_repos_and_orgs` extracts fullnames vector into a list of GitLab organizations with assigned repositories", { repos_fullnames <- c( "mbtests/gitstatstesting", "mbtests/gitstats-testing-2", "mbtests/subgroup/test-project-in-subgroup" From b621cab388c2a18ddc73b3acb853d36ff1335ceb Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 12:40:55 +0000 Subject: [PATCH 48/99] Add tests, adjust code. --- R/GitHost.R | 9 ++-- R/test_helpers.R | 22 +++++++++ tests/testthat/_snaps/01-get_repos-GitHub.md | 7 +++ tests/testthat/_snaps/01-get_repos-GitLab.md | 6 --- .../testthat/_snaps/02-get_commits-GitHub.md | 8 ++++ tests/testthat/_snaps/get_release-GitHub.md | 8 ++++ tests/testthat/test-01-get_repos-GitHub.R | 48 +++++++++++++++++++ tests/testthat/test-02-get_commits-GitHub.R | 39 ++++++++++++--- tests/testthat/test-get_release-GitHub.R | 27 +++++++++++ 9 files changed, 158 insertions(+), 16 deletions(-) diff --git a/R/GitHost.R b/R/GitHost.R index 3f6a496e..c7aa46b2 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -125,7 +125,8 @@ GitHost <- R6::R6Class( progress = TRUE) { if (private$scan_all && is.null(private$orgs) && verbose) { cli::cli_alert_info("[{private$host_name}][Engine:{cli::col_yellow('GraphQL')}] Pulling all organizations...") - private$orgs <- private$engines$graphql$get_orgs() + graphql_engine <- private$engines$graphql + private$orgs <- graphql_engine$get_orgs() } commits_from_orgs <- private$get_commits_from_orgs( since = since, @@ -234,7 +235,8 @@ GitHost <- R6::R6Class( if (verbose) { cli::cli_alert_info("[{private$host_name}][Engine:{cli::col_yellow('GraphQL')}] Pulling all organizations...") } - private$orgs <- private$engines$graphql$get_orgs() + graphql_engine <- private$engines$graphql + private$orgs <- graphql_engine$get_orgs() } until <- until %||% Sys.time() release_logs_from_orgs <- private$get_release_logs_from_orgs( @@ -604,7 +606,8 @@ GitHost <- R6::R6Class( information = "Pulling all organizations" ) } - private$orgs <- private$engines$graphql$get_orgs() + graphql_engine <- private$engines$graphql + private$orgs <- graphql_engine$get_orgs() } repos_table <- purrr::list_rbind( list( diff --git a/R/test_helpers.R b/R/test_helpers.R index 056e2a86..18afac35 100644 --- a/R/test_helpers.R +++ b/R/test_helpers.R @@ -127,6 +127,28 @@ create_github_testhost <- function(host = NULL, return(test_host) } +#' @noRd +create_github_testhost_all <- function(host = NULL, + orgs = NULL, + repos = NULL, + token = NULL, + mode = "") { + suppressMessages( + test_host <- GitHostGitHubTest$new( + host = NULL, + token = token, + orgs = orgs, + repos = repos + ) + ) + test_host$.__enclos_env__$private$orgs <- NULL + test_host$.__enclos_env__$private$scan_all <- TRUE + if (mode == "private") { + test_host <- environment(test_host$initialize)$private + } + return(test_host) +} + #' @noRd create_gitlab_testhost <- function(host = NULL, orgs = NULL, diff --git a/tests/testthat/_snaps/01-get_repos-GitHub.md b/tests/testthat/_snaps/01-get_repos-GitHub.md index 2edba7cf..bafe0009 100644 --- a/tests/testthat/_snaps/01-get_repos-GitHub.md +++ b/tests/testthat/_snaps/01-get_repos-GitHub.md @@ -5,3 +5,10 @@ Output [1] "\n query GetReposByOrg($login: String!) {\n repositoryOwner(login: $login) {\n ... on Organization {\n \n repositories(first: 100) {\n totalCount\n pageInfo {\n endCursor\n hasNextPage\n }\n nodes {\n repo_id: id\n repo_name: name\n default_branch: defaultBranchRef {\n name\n }\n stars: stargazerCount\n forks: forkCount\n created_at: createdAt\n last_activity_at: pushedAt\n languages (first: 5) { nodes {name} }\n issues_open: issues (first: 100 states: [OPEN]) {\n totalCount\n }\n issues_closed: issues (first: 100 states: [CLOSED]) {\n totalCount\n }\n organization: owner {\n login\n }\n repo_url: url\n }\n }\n \n }\n }\n }" +# `get_all_repos()` is set to scan whole git host + + Code + gh_repos <- github_testhost_all_priv$get_all_repos(verbose = TRUE, progress = FALSE) + Message + i [Host:GitHub][Engine:GraphQl] Pulling all organizations... + diff --git a/tests/testthat/_snaps/01-get_repos-GitLab.md b/tests/testthat/_snaps/01-get_repos-GitLab.md index 9e412a4f..d84ced03 100644 --- a/tests/testthat/_snaps/01-get_repos-GitLab.md +++ b/tests/testthat/_snaps/01-get_repos-GitLab.md @@ -13,9 +13,3 @@ Message i [Host:GitLab][Engine:GraphQl][Scope:mbtests] Pulling repositories... -# get_repos_from_repos prints proper message - - Code - gl_repos_from_repos <- gitlab_testhost_priv$get_repos_from_repos(verbose = TRUE, - progress = FALSE) - diff --git a/tests/testthat/_snaps/02-get_commits-GitHub.md b/tests/testthat/_snaps/02-get_commits-GitHub.md index 5fe49647..8c59addf 100644 --- a/tests/testthat/_snaps/02-get_commits-GitHub.md +++ b/tests/testthat/_snaps/02-get_commits-GitHub.md @@ -9,3 +9,11 @@ i GraphQL response error +# `get_commits()` is set to scan whole git host + + Code + gh_commits_table <- github_testhost_all$get_commits(since = "2023-01-01", + until = "2023-02-28", verbose = TRUE, progress = FALSE) + Message + i [GitHub][Engine:GraphQL] Pulling all organizations... + diff --git a/tests/testthat/_snaps/get_release-GitHub.md b/tests/testthat/_snaps/get_release-GitHub.md index 116312b0..94bc9e8e 100644 --- a/tests/testthat/_snaps/get_release-GitHub.md +++ b/tests/testthat/_snaps/get_release-GitHub.md @@ -5,3 +5,11 @@ Output [1] "query GetReleasesFromRepo ($org: String!, $repo: String!) {\n repository(owner:$org, name:$repo){\n name\n url\n releases (last: 100) {\n nodes {\n name\n tagName\n publishedAt\n url\n description\n }\n }\n }\n }" +# `get_release_logs()` is set to scan whole git host + + Code + gh_releases_table <- github_testhost_all$get_release_logs(since = "2023-01-01", + until = "2023-02-28", verbose = TRUE, progress = FALSE) + Message + i [GitHub][Engine:GraphQL] Pulling all organizations... + diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index 0e34136e..e86c31bf 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -395,6 +395,7 @@ test_that("`get_repos_with_code_from_host()` pulls raw response", { ) expect_type(repos_with_code_from_host_raw, "list") expect_gt(length(repos_with_code_from_host_raw), 0) + test_mocker$cache(repos_with_code_from_host_raw) }) test_that("get_repos_with_code() works", { @@ -432,6 +433,25 @@ test_that("get_repos_with_code() works", { test_mocker$cache(github_repos_with_code_min) }) + +test_that("get_repos_with_code() scans whole host", { + mockery::stub( + github_testhost_priv$get_repos_with_code, + "private$get_repos_with_code_from_host", + test_mocker$use("repos_with_code_from_host_raw") + ) + github_testhost_priv$scan_all <- TRUE + github_repos_with_code_raw <- github_testhost_priv$get_repos_with_code( + code = "test-code", + output = "raw", + verbose = FALSE, + progress = FALSE + ) + expect_type(github_repos_with_code_raw, "list") + expect_gt(length(github_repos_with_code_raw), 0) + github_testhost_priv$scan_all <- FALSE +}) + test_that("GitHub prepares repos table from repositories response", { gh_repos_table <- test_graphql_github$prepare_repos_table( repos_list = test_mocker$use("gh_repos_from_org") @@ -504,6 +524,34 @@ test_that("`get_all_repos()` works as expected", { test_mocker$cache(gh_repos_table) }) +test_that("`get_all_repos()` is set to scan whole git host", { + github_testhost_all_priv <- create_github_testhost_all( + orgs = "test_org", + mode = "private" + ) + mockery::stub( + github_testhost_all_priv$get_all_repos, + "graphql_engine$get_orgs", + "test_org" + ) + mockery::stub( + github_testhost_all_priv$get_all_repos, + "private$get_repos_from_orgs", + test_mocker$use("gh_repos_from_orgs") + ) + mockery::stub( + github_testhost_all_priv$get_all_repos, + "private$get_repos_from_repos", + test_mocker$use("gh_repos_individual") + ) + expect_snapshot( + gh_repos <- github_testhost_all_priv$get_all_repos( + verbose = TRUE, + progress = FALSE + ) + ) +}) + test_that("GitHost adds `repo_api_url` column to GitHub repos table", { repos_table <- test_mocker$use("gh_repos_table") gh_repos_table_with_api_url <- github_testhost_priv$add_repo_api_url(repos_table) diff --git a/tests/testthat/test-02-get_commits-GitHub.R b/tests/testthat/test-02-get_commits-GitHub.R index a42025e5..38f0c5e1 100644 --- a/tests/testthat/test-02-get_commits-GitHub.R +++ b/tests/testthat/test-02-get_commits-GitHub.R @@ -177,16 +177,41 @@ test_that("`get_commits()` retrieves commits in the table format", { "private$get_commits_from_repos", test_mocker$use("gh_commits_from_repos") ) - suppressMessages( - gh_commits_table <- github_testhost$get_commits( - since = "2023-01-01", - until = "2023-02-28", - verbose = FALSE, - progress = FALSE - ) + gh_commits_table <- github_testhost$get_commits( + since = "2023-01-01", + until = "2023-02-28", + verbose = FALSE, + progress = FALSE ) expect_commits_table( gh_commits_table ) test_mocker$cache(gh_commits_table) }) + +test_that("`get_commits()` is set to scan whole git host", { + github_testhost_all <- create_github_testhost_all(orgs = "test_org") + mockery::stub( + github_testhost_all$get_commits, + "graphql_engine$get_orgs", + "test_org" + ) + mockery::stub( + github_testhost_all$get_commits, + "private$get_commits_from_orgs", + test_mocker$use("gh_commits_from_orgs") + ) + mockery::stub( + github_testhost_all$get_commits, + "private$get_commits_from_repos", + test_mocker$use("gh_commits_from_repos") + ) + expect_snapshot( + gh_commits_table <- github_testhost_all$get_commits( + since = "2023-01-01", + until = "2023-02-28", + verbose = TRUE, + progress = FALSE + ) + ) +}) diff --git a/tests/testthat/test-get_release-GitHub.R b/tests/testthat/test-get_release-GitHub.R index a576cfd6..19b8f49a 100644 --- a/tests/testthat/test-get_release-GitHub.R +++ b/tests/testthat/test-get_release-GitHub.R @@ -102,3 +102,30 @@ test_that("`get_release_logs()` pulls release logs in the table format", { expect_lt(max(releases_table$published_at), as.POSIXct("2023-09-30")) test_mocker$cache(releases_table) }) + +test_that("`get_release_logs()` is set to scan whole git host", { + github_testhost_all <- create_github_testhost_all(orgs = "test_org") + mockery::stub( + github_testhost_all$get_release_logs, + "graphql_engine$get_orgs", + "test_org" + ) + mockery::stub( + github_testhost_all$get_release_logs, + "private$get_release_logs_from_repos", + test_mocker$use("releases_from_repos") + ) + mockery::stub( + github_testhost_all$get_release_logs, + "private$get_release_logs_from_orgs", + test_mocker$use("releases_from_orgs") + ) + expect_snapshot( + gh_releases_table <- github_testhost_all$get_release_logs( + since = "2023-01-01", + until = "2023-02-28", + verbose = TRUE, + progress = FALSE + ) + ) +}) From b85ed501a3f25acdf646372992f8c6f3fb149a6f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 19 Dec 2024 15:34:42 +0000 Subject: [PATCH 49/99] Fix after code review. --- R/EngineGraphQLGitLab.R | 6 +----- tests/testthat/test-get_files_content-GitLab.R | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index a4010946..5251b0f8 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -127,7 +127,6 @@ EngineGraphQLGitLab <- R6::R6Class( repos, file_paths = NULL, host_files_structure = NULL, - only_text_files, verbose = FALSE, progress = FALSE) { org <- URLdecode(org) @@ -139,7 +138,7 @@ EngineGraphQLGitLab <- R6::R6Class( host_files_structure = host_files_structure, org = org ) - } else if (only_text_files) { + } else { file_paths <- file_paths[grepl(text_files_pattern, file_paths)] } if (type == "organization") { @@ -177,7 +176,6 @@ EngineGraphQLGitLab <- R6::R6Class( repos = repos, file_paths = file_paths, host_files_structure = host_files_structure, - only_text_files = only_text_files, verbose = verbose, progress = progress ) @@ -219,7 +217,6 @@ EngineGraphQLGitLab <- R6::R6Class( repos = repos, file_paths = file_paths, host_files_structure = host_files_structure, - only_text_files = only_text_files, verbose = verbose, progress = progress ) @@ -235,7 +232,6 @@ EngineGraphQLGitLab <- R6::R6Class( repos, file_paths = NULL, host_files_structure = NULL, - only_text_files = TRUE, verbose = FALSE, progress = FALSE) { if (is.null(repos)) { diff --git a/tests/testthat/test-get_files_content-GitLab.R b/tests/testthat/test-get_files_content-GitLab.R index 3aeeff65..acacb91e 100644 --- a/tests/testthat/test-get_files_content-GitLab.R +++ b/tests/testthat/test-get_files_content-GitLab.R @@ -60,7 +60,6 @@ test_that("GitLab GraphQL Engine pulls files from a group", { type = "organization", repos = NULL, file_paths = "meta_data.yaml", - only_text_files = TRUE, host_files_structure = NULL ) expect_gitlab_files_from_org_response(gitlab_files_response) @@ -123,7 +122,6 @@ test_that("Gitlab GraphQL switches to pulling files per repositories when query repos = NULL, file_paths = c("project_metadata.yaml", "README.md"), host_files_structure = NULL, - only_text_files = TRUE, verbose = FALSE, progress = FALSE ) From 4f357b5233506a525dbc397db85af7db7360362f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 20 Dec 2024 08:03:10 +0000 Subject: [PATCH 50/99] Add tests. --- R/GitHost.R | 7 +- tests/testthat/_snaps/01-get_repos-GitHub.md | 47 ++++++++ .../testthat/_snaps/get_urls_repos-GitHub.md | 8 ++ tests/testthat/test-01-get_repos-GitHub.R | 105 ++++++++++++++++-- tests/testthat/test-get_urls_repos-GitHub.R | 39 ++++++- 5 files changed, 189 insertions(+), 17 deletions(-) create mode 100644 tests/testthat/_snaps/get_urls_repos-GitHub.md diff --git a/R/GitHost.R b/R/GitHost.R index c7aa46b2..d373f1c4 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -721,7 +721,8 @@ GitHost <- R6::R6Class( information = "Pulling all organizations" ) } - private$orgs <- private$engines$graphql$get_orgs() + graphql_engine <- private$engines$graphql + private$orgs <- graphql_engine$get_orgs() } repos_urls_from_orgs <- private$get_repos_urls_from_orgs( type = type, @@ -823,14 +824,14 @@ GitHost <- R6::R6Class( verbose = verbose, progress = progress ) - }) %>% + }) |> purrr::list_flatten() } if (output != "raw") { repos_table <- repos_response %>% rest_engine$tailor_repos_response( output = output - ) %>% + ) |> rest_engine$prepare_repos_table( output = output, verbose = verbose diff --git a/tests/testthat/_snaps/01-get_repos-GitHub.md b/tests/testthat/_snaps/01-get_repos-GitHub.md index bafe0009..42efdf6e 100644 --- a/tests/testthat/_snaps/01-get_repos-GitHub.md +++ b/tests/testthat/_snaps/01-get_repos-GitHub.md @@ -5,6 +5,53 @@ Output [1] "\n query GetReposByOrg($login: String!) {\n repositoryOwner(login: $login) {\n ... on Organization {\n \n repositories(first: 100) {\n totalCount\n pageInfo {\n endCursor\n hasNextPage\n }\n nodes {\n repo_id: id\n repo_name: name\n default_branch: defaultBranchRef {\n name\n }\n stars: stargazerCount\n forks: forkCount\n created_at: createdAt\n last_activity_at: pushedAt\n languages (first: 5) { nodes {name} }\n issues_open: issues (first: 100 states: [OPEN]) {\n totalCount\n }\n issues_closed: issues (first: 100 states: [CLOSED]) {\n totalCount\n }\n organization: owner {\n login\n }\n repo_url: url\n }\n }\n \n }\n }\n }" +# `get_repos_with_code_from_orgs()` pulls raw response + + Code + repos_with_code_from_orgs_raw <- github_testhost_priv$ + get_repos_with_code_from_orgs(code = "shiny", in_files = c("DESCRIPTION", + "NAMESPACE"), output = "raw", verbose = TRUE) + Message + i [Host:GitHub][Engine:REST][Scope:test_org] Pulling repositories... + +# `get_repos_with_code_from_host()` pulls and parses output into table + + Code + repos_with_code_from_host_table <- github_testhost_priv$ + get_repos_with_code_from_host(code = "DESCRIPTION", in_path = TRUE, output = "table_full", + verbose = TRUE) + Message + i [Host:GitHub][Engine:REST] Pulling repositories... + +# `get_repos_with_code_from_repos()` works + + Code + repos_with_code_from_repos_full <- github_testhost_priv$ + get_repos_with_code_from_repos(code = "tests", output = "table_full", + verbose = TRUE) + Message + i [Host:GitHub][Engine:REST][Scope:] Pulling repositories... + i Preparing repositories table... + +# `get_repos_with_code_from_orgs()` pulls minimum version of table + + Code + repos_with_code_from_repos_min <- github_testhost_priv$ + get_repos_with_code_from_repos(code = "tests", in_files = "DESCRIPTION", + output = "table_min", verbose = TRUE) + Message + i [Host:GitHub][Engine:REST][Scope:] Pulling repositories... + i Preparing repositories table... + +# `get_repos_with_code_from_host()` pulls raw response + + Code + repos_with_code_from_host_raw <- github_testhost_priv$ + get_repos_with_code_from_host(code = "shiny", in_files = c("DESCRIPTION", + "NAMESPACE"), output = "raw", verbose = TRUE) + Message + i [Host:GitHub][Engine:REST] Pulling repositories... + # `get_all_repos()` is set to scan whole git host Code diff --git a/tests/testthat/_snaps/get_urls_repos-GitHub.md b/tests/testthat/_snaps/get_urls_repos-GitHub.md new file mode 100644 index 00000000..aeb16a31 --- /dev/null +++ b/tests/testthat/_snaps/get_urls_repos-GitHub.md @@ -0,0 +1,8 @@ +# get_repos_urls_from_orgs prepares web repo_urls vector + + Code + gh_repos_urls_from_orgs <- github_testhost_priv$get_repos_urls_from_orgs(type = "web", + verbose = TRUE, progress = FALSE) + Message + i [Host:GitHub][Engine:REST][Scope:test_org] Pulling repositories (URLs)... + diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index e86c31bf..3036fb20 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -371,27 +371,112 @@ test_that("`get_repos_with_code_from_orgs()` pulls raw response", { "rest_engine$get_repos_by_code", test_mocker$use("gh_repos_by_code_raw") ) - repos_with_code_from_orgs_raw <- github_testhost_priv$get_repos_with_code_from_orgs( - code = "shiny", - in_files = c("DESCRIPTION", "NAMESPACE"), - output = "raw", - verbose = FALSE + expect_snapshot( + repos_with_code_from_orgs_raw <- github_testhost_priv$get_repos_with_code_from_orgs( + code = "shiny", + in_files = c("DESCRIPTION", "NAMESPACE"), + output = "raw", + verbose = TRUE + ) ) expect_type(repos_with_code_from_orgs_raw, "list") expect_gt(length(repos_with_code_from_orgs_raw), 0) }) +test_that("`get_repos_with_code_from_host()` pulls and parses output into table", { + mockery::stub( + github_testhost_priv$get_repos_with_code_from_host, + "rest_engine$get_repos_by_code", + test_mocker$use("gh_repos_by_code_raw") + ) + mockery::stub( + github_testhost_priv$get_repos_with_code_from_host, + "rest_engine$prepare_repos_table", + test_mocker$use("gh_repos_by_code_table") + ) + mockery::stub( + github_testhost_priv$get_repos_with_code_from_host, + "rest_engine$get_repos_issues", + test_mocker$use("gh_repos_by_code_table") + ) + expect_snapshot( + repos_with_code_from_host_table <- github_testhost_priv$get_repos_with_code_from_host( + code = "DESCRIPTION", + in_path = TRUE, + output = "table_full", + verbose = TRUE + ) + ) + expect_repos_table(repos_with_code_from_host_table) +}) + +test_that("`get_repos_with_code_from_repos()` works", { + github_testhost_priv <- create_github_testhost( + repos = c("TestRepo1", "TestRepo2"), + mode = "private" + ) + mockery::stub( + github_testhost_priv$get_repos_with_code_from_repos, + "rest_engine$get_repos_by_code", + test_mocker$use("gh_repos_by_code") + ) + mockery::stub( + github_testhost_priv$get_repos_with_code_from_repos, + "rest_engine$get_repos_issues", + test_mocker$use("gh_repos_by_code_table") + ) + github_testhost_priv$searching_scope <- c("repo") + expect_snapshot( + repos_with_code_from_repos_full <- github_testhost_priv$get_repos_with_code_from_repos( + code = "tests", + output = "table_full", + verbose = TRUE + ) + ) + expect_repos_table(repos_with_code_from_repos_full) +}) + +test_that("`get_repos_with_code_from_repos()` pulls minimum version of table", { + github_testhost_priv <- create_github_testhost( + repos = c("TestRepo1", "TestRepo2"), + mode = "private" + ) + mockery::stub( + github_testhost_priv$get_repos_with_code_from_repos, + "rest_engine$get_repos_by_code", + test_mocker$use("gh_repos_by_code") + ) + mockery::stub( + github_testhost_priv$get_repos_with_code_from_repos, + "rest_engine$get_repos_issues", + test_mocker$use("gh_repos_by_code_table") + ) + github_testhost_priv$searching_scope <- c("org", "repo") + expect_snapshot( + repos_with_code_from_repos_min <- github_testhost_priv$get_repos_with_code_from_repos( + code = "tests", + in_files = "DESCRIPTION", + output = "table_min", + verbose = TRUE + ) + ) + expect_repos_table(repos_with_code_from_repos_min, + repo_cols = repo_min_colnames) +}) + test_that("`get_repos_with_code_from_host()` pulls raw response", { mockery::stub( github_testhost_priv$get_repos_with_code_from_host, "rest_engine$get_repos_by_code", test_mocker$use("gh_repos_by_code_raw") ) - repos_with_code_from_host_raw <- github_testhost_priv$get_repos_with_code_from_host( - code = "shiny", - in_files = c("DESCRIPTION", "NAMESPACE"), - output = "raw", - verbose = FALSE + expect_snapshot( + repos_with_code_from_host_raw <- github_testhost_priv$get_repos_with_code_from_host( + code = "shiny", + in_files = c("DESCRIPTION", "NAMESPACE"), + output = "raw", + verbose = TRUE + ) ) expect_type(repos_with_code_from_host_raw, "list") expect_gt(length(repos_with_code_from_host_raw), 0) diff --git a/tests/testthat/test-get_urls_repos-GitHub.R b/tests/testthat/test-get_urls_repos-GitHub.R index 613fe956..0a9c008e 100644 --- a/tests/testthat/test-get_urls_repos-GitHub.R +++ b/tests/testthat/test-get_urls_repos-GitHub.R @@ -59,10 +59,12 @@ test_that("get_repos_urls_from_orgs prepares web repo_urls vector", { ) github_testhost_priv$searching_scope <- "org" github_testhost_priv$orgs <- "test_org" - gh_repos_urls_from_orgs <- github_testhost_priv$get_repos_urls_from_orgs( - type = "web", - verbose = FALSE, - progress = FALSE + expect_snapshot( + gh_repos_urls_from_orgs <- github_testhost_priv$get_repos_urls_from_orgs( + type = "web", + verbose = TRUE, + progress = FALSE + ) ) expect_gt(length(gh_repos_urls_from_orgs), 0) expect_true(any(grepl("test-org", gh_repos_urls_from_orgs))) @@ -118,6 +120,35 @@ test_that("get_all_repos_urls prepares web repo_urls vector", { test_mocker$cache(gh_repos_urls) }) +test_that("get_all_repos_urls is set to scan whole host", { + github_testhost_all_priv <- create_github_testhost_all( + orgs = "test_org", + mode = "private" + ) + mockery::stub( + github_testhost_all_priv$get_all_repos_urls, + "graphql_engine$get_orgs", + "test_org" + ) + mockery::stub( + github_testhost_all_priv$get_all_repos_urls, + "private$get_repos_urls_from_orgs", + test_mocker$use("gh_repos_urls_from_orgs") + ) + mockery::stub( + github_testhost_all_priv$get_all_repos_urls, + "private$get_repos_urls_from_repos", + test_mocker$use("gh_repos_urls") + ) + gh_repos_urls <- github_testhost_all_priv$get_all_repos_urls( + type = "web", + verbose = FALSE + ) + expect_gt(length(gh_repos_urls), 0) + expect_true(any(grepl("test-org", gh_repos_urls))) + expect_true(all(grepl("https://testhost.com/", gh_repos_urls))) +}) + test_that("get_repo_url_from_response retrieves repositories URLS", { gh_repo_api_urls <- github_testhost_priv$get_repo_url_from_response( search_response = test_mocker$use("gh_search_repos_response"), From 065b6f6c8bdb1055f764d75819ef36590be1aa29 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 20 Dec 2024 12:04:28 +0000 Subject: [PATCH 51/99] Add tests, fix GitLab commits tests. --- tests/testthat/_snaps/01-get_repos-GitHub.md | 19 ++- tests/testthat/_snaps/01-get_repos-GitLab.md | 16 +++ .../testthat/_snaps/02-get_commits-GitLab.md | 16 +++ tests/testthat/_snaps/get_release-GitLab.md | 8 ++ .../testthat/_snaps/get_urls_repos-GitHub.md | 13 ++ tests/testthat/helper-expect-responses.R | 2 +- tests/testthat/helper-fixtures.R | 4 +- tests/testthat/test-01-get_repos-GitHub.R | 20 +-- tests/testthat/test-01-get_repos-GitLab.R | 118 +++++++++++++++++- tests/testthat/test-02-get_commits-GitLab.R | 61 ++++++++- tests/testthat/test-get_release-GitLab.R | 12 +- tests/testthat/test-get_urls_repos-GitHub.R | 18 +-- 12 files changed, 271 insertions(+), 36 deletions(-) create mode 100644 tests/testthat/_snaps/02-get_commits-GitLab.md diff --git a/tests/testthat/_snaps/01-get_repos-GitHub.md b/tests/testthat/_snaps/01-get_repos-GitHub.md index 42efdf6e..043a672d 100644 --- a/tests/testthat/_snaps/01-get_repos-GitHub.md +++ b/tests/testthat/_snaps/01-get_repos-GitHub.md @@ -33,7 +33,7 @@ i [Host:GitHub][Engine:REST][Scope:] Pulling repositories... i Preparing repositories table... -# `get_repos_with_code_from_orgs()` pulls minimum version of table +# `get_repos_with_code_from_repos()` pulls minimum version of table Code repos_with_code_from_repos_min <- github_testhost_priv$ @@ -52,6 +52,14 @@ Message i [Host:GitHub][Engine:REST] Pulling repositories... +# get_repos_from_repos works + + Code + gh_repos_individual <- github_testhost_priv$get_repos_from_repos(verbose = TRUE, + progress = FALSE) + Message + i [Host:GitHub][Engine:GraphQl][Scope:test_org] Pulling repositories... + # `get_all_repos()` is set to scan whole git host Code @@ -59,3 +67,12 @@ Message i [Host:GitHub][Engine:GraphQl] Pulling all organizations... +# `get_repos_contributors()` works on GitHost level + + Code + gh_repos_with_contributors <- github_testhost_priv$get_repos_contributors( + repos_table = test_mocker$use("gh_repos_table_with_platform"), verbose = TRUE, + progress = FALSE) + Message + i [Host:GitHub][Engine:REST] Pulling contributors... + diff --git a/tests/testthat/_snaps/01-get_repos-GitLab.md b/tests/testthat/_snaps/01-get_repos-GitLab.md index d84ced03..f13ca202 100644 --- a/tests/testthat/_snaps/01-get_repos-GitLab.md +++ b/tests/testthat/_snaps/01-get_repos-GitLab.md @@ -5,6 +5,22 @@ Output [1] "\n query GetReposByOrg($org: ID! $repo_cursor: String!) {\n group(fullPath: $org) {\n projects(first: 100 after: $repo_cursor) {\n \n count\n pageInfo {\n hasNextPage\n endCursor\n }\n edges {\n node {\n repo_id: id\n repo_name: name\n repo_path: path\n ... on Project {\n repository {\n rootRef\n }\n }\n stars: starCount\n forks: forksCount\n created_at: createdAt\n last_activity_at: lastActivityAt\n languages {\n name\n }\n issues: issueStatusCounts {\n all\n closed\n opened\n }\n namespace {\n path: fullPath\n }\n repo_url: webUrl\n }\n }\n }\n }\n }" +# `search_for_code()` works + + Code + gl_search_repos_by_code <- test_rest_gitlab_priv$search_for_code(code = "test", + filename = "TESTFILE", verbose = TRUE, page_max = 2) + Message + i Searching for code [test]... + +# `search_repos_for_code()` works + + Code + gl_search_repos_by_code <- test_rest_gitlab_priv$search_repos_for_code(code = "test", + repos = "TestRepo", filename = "TESTFILE", verbose = TRUE, page_max = 2) + Message + i Searching for code [test]... + # get_repos_from_org prints proper message Code diff --git a/tests/testthat/_snaps/02-get_commits-GitLab.md b/tests/testthat/_snaps/02-get_commits-GitLab.md new file mode 100644 index 00000000..13418dda --- /dev/null +++ b/tests/testthat/_snaps/02-get_commits-GitLab.md @@ -0,0 +1,16 @@ +# get_commits_from_orgs works + + Code + gl_commits_table <- gitlab_testhost_priv$get_commits_from_orgs(since = "2023-03-01", + until = "2023-04-01", verbose = TRUE, progress = FALSE) + Message + i [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits... + +# get_commits_from_repos works + + Code + gl_commits_table <- gitlab_testhost_priv$get_commits_from_repos(since = "2023-03-01", + until = "2023-04-01", verbose = TRUE, progress = FALSE) + Message + i [Host:GitLab][Engine:REST][Scope:test_org/TestRepo] Pulling commits... + diff --git a/tests/testthat/_snaps/get_release-GitLab.md b/tests/testthat/_snaps/get_release-GitLab.md index 7c4fb7a8..b456d7ca 100644 --- a/tests/testthat/_snaps/get_release-GitLab.md +++ b/tests/testthat/_snaps/get_release-GitLab.md @@ -5,3 +5,11 @@ Output [1] "query GetReleasesFromRepo($project_path: ID!) {\n project(fullPath: $project_path) {\n name\n webUrl\n \t\t\t\t\t\treleases {\n nodes{\n name\n tagName\n releasedAt\n links {\n selfUrl\n }\n description\n }\n }\n }\n }" +# `get_release_logs_from_repos()` works + + Code + releases_from_repos <- gitlab_testhost_priv$get_release_logs_from_repos(since = "2023-05-01", + until = "2023-09-30", verbose = TRUE, progress = FALSE) + Message + i [Host:GitLab][Engine:GraphQl][Scope:test_org/TestRepo] Pulling release logs... + diff --git a/tests/testthat/_snaps/get_urls_repos-GitHub.md b/tests/testthat/_snaps/get_urls_repos-GitHub.md index aeb16a31..34365484 100644 --- a/tests/testthat/_snaps/get_urls_repos-GitHub.md +++ b/tests/testthat/_snaps/get_urls_repos-GitHub.md @@ -6,3 +6,16 @@ Message i [Host:GitHub][Engine:REST][Scope:test_org] Pulling repositories (URLs)... +# get_repos_urls_from_repos prepares web repo_urls vector + + Code + gh_repos_urls <- github_testhost_priv$get_repos_urls_from_repos(type = "web", + verbose = TRUE, progress = FALSE) + Message + i [Host:GitHub][Engine:REST][Scope:test_org] Pulling repositories (URLs)... + +# get_all_repos_urls prepares web repo_urls vector + + Code + gh_repos_urls <- github_testhost_priv$get_all_repos_urls(type = "web", verbose = TRUE) + diff --git a/tests/testthat/helper-expect-responses.R b/tests/testthat/helper-expect-responses.R index 32dba117..acffaad3 100644 --- a/tests/testthat/helper-expect-responses.R +++ b/tests/testthat/helper-expect-responses.R @@ -96,7 +96,7 @@ expect_gl_commit_rest_response <- function(object) { "list" ) expect_list_contains( - object[[1]], + object, c( "id", "short_id", "created_at", "parent_ids", "title", "message", "author_name", "author_email", "authored_date", "committer_name", diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R index 8d7e1ee8..a464927c 100644 --- a/tests/testthat/helper-fixtures.R +++ b/tests/testthat/helper-fixtures.R @@ -311,9 +311,7 @@ gitlab_commit <- list( ) ) -test_fixtures$gitlab_commits_response <- list( - rep(gitlab_commit, 5) -) +test_fixtures$gitlab_commits_response <- rep(list(gitlab_commit), 5) test_fixtures$github_file_response <- list( "data" = list( diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index 3036fb20..ee7798f6 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -578,9 +578,11 @@ test_that("get_repos_from_repos works", { ) github_testhost_priv$searching_scope <- c("org", "repo") github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") - gh_repos_individual <- github_testhost_priv$get_repos_from_repos( - verbose = FALSE, - progress = FALSE + expect_snapshot( + gh_repos_individual <- github_testhost_priv$get_repos_from_repos( + verbose = TRUE, + progress = FALSE + ) ) expect_repos_table( gh_repos_individual @@ -679,7 +681,7 @@ test_that("`get_repos_contributors()` adds contributors to repos table", { ) gh_repos_with_contributors <- test_rest_github$get_repos_contributors( repos_table = test_mocker$use("gh_repos_table_with_platform"), - progress = FALSE + progress = FALSE ) expect_repos_table( gh_repos_with_contributors, @@ -698,10 +700,12 @@ test_that("`get_repos_contributors()` works on GitHost level", { "rest_engine$get_repos_contributors", test_mocker$use("gh_repos_with_contributors") ) - gh_repos_with_contributors <- github_testhost_priv$get_repos_contributors( - repos_table = test_mocker$use("gh_repos_table_with_platform"), - verbose = FALSE, - progress = FALSE + expect_snapshot( + gh_repos_with_contributors <- github_testhost_priv$get_repos_contributors( + repos_table = test_mocker$use("gh_repos_table_with_platform"), + verbose = TRUE, + progress = FALSE + ) ) expect_repos_table( gh_repos_with_contributors, diff --git a/tests/testthat/test-01-get_repos-GitLab.R b/tests/testthat/test-01-get_repos-GitLab.R index f4c40890..695eb38d 100644 --- a/tests/testthat/test-01-get_repos-GitLab.R +++ b/tests/testthat/test-01-get_repos-GitLab.R @@ -119,6 +119,39 @@ test_that("`get_repos_from_org()` does not fail when GraphQL response is not com ) }) +test_that("`search_for_code()` works", { + mockery::stub( + test_rest_gitlab_priv$search_for_code, + "self$response", + list() + ) + expect_snapshot( + gl_search_repos_by_code <- test_rest_gitlab_priv$search_for_code( + code = "test", + filename = "TESTFILE", + verbose = TRUE, + page_max = 2 + ) + ) +}) + +test_that("`search_repos_for_code()` works", { + mockery::stub( + test_rest_gitlab_priv$search_repos_for_code, + "self$response", + list() + ) + expect_snapshot( + gl_search_repos_by_code <- test_rest_gitlab_priv$search_repos_for_code( + code = "test", + repos = "TestRepo", + filename = "TESTFILE", + verbose = TRUE, + page_max = 2 + ) + ) +}) + test_that("`map_search_into_repos()` works", { gl_search_response <- test_fixtures$gitlab_search_response test_mocker$cache(gl_search_response) @@ -132,7 +165,7 @@ test_that("`map_search_into_repos()` works", { test_mocker$cache(gl_search_repos_by_code) }) -test_that("`get_repos_languages` works", { +test_that("`get_repos_languages()` works", { repos_list <- test_mocker$use("gl_search_repos_by_code") repos_list[[1]]$id <- "45300912" mockery::stub( @@ -140,12 +173,63 @@ test_that("`get_repos_languages` works", { "self$response", test_fixtures$gitlab_languages_response ) - repos_list_with_languages <- test_rest_gitlab_priv$get_repos_languages( + gl_repos_list_with_languages <- test_rest_gitlab_priv$get_repos_languages( repos_list = repos_list, progress = FALSE ) - purrr::walk(repos_list_with_languages, ~ expect_list_contains(., "languages")) - expect_equal(repos_list_with_languages[[1]]$languages, c("Python", "R")) + purrr::walk(gl_repos_list_with_languages, ~ expect_list_contains(., "languages")) + expect_equal(gl_repos_list_with_languages[[1]]$languages, c("Python", "R")) + test_mocker$cache(gl_repos_list_with_languages) +}) + +test_that("`get_repos_by_code()` works", { + mockery::stub( + test_rest_gitlab$get_repos_by_code, + "private$search_for_code", + test_fixtures$gitlab_search_response + ) + mockery::stub( + test_rest_gitlab$get_repos_by_code, + "private$map_search_into_repos", + test_mocker$use("gl_search_repos_by_code") + ) + mockery::stub( + test_rest_gitlab$get_repos_by_code, + "private$get_repos_languages", + test_mocker$use("gl_repos_list_with_languages") + ) + gl_repos_by_code <- test_rest_gitlab$get_repos_by_code( + code = "test", + org = "test_org" + ) + expect_gl_repos_rest_response( + gl_repos_by_code + ) +}) + +test_that("`get_repos_by_code()` works", { + mockery::stub( + test_rest_gitlab$get_repos_by_code, + "private$search_repos_for_code", + test_fixtures$gitlab_search_response + ) + mockery::stub( + test_rest_gitlab$get_repos_by_code, + "private$map_search_into_repos", + test_mocker$use("gl_search_repos_by_code") + ) + mockery::stub( + test_rest_gitlab$get_repos_by_code, + "private$get_repos_languages", + test_mocker$use("gl_repos_list_with_languages") + ) + gl_repos_by_code <- test_rest_gitlab$get_repos_by_code( + code = "test", + repos = c("TestRepo1", "TestRepo2") + ) + expect_gl_repos_rest_response( + gl_repos_by_code + ) }) test_that("`prepare_repos_table()` prepares repos table", { @@ -186,7 +270,10 @@ test_that("GitHost adds `repo_api_url` column to GitLab repos table", { test_that("`tailor_repos_response()` tailors precisely `repos_list`", { gl_repos_by_code <- test_mocker$use("gl_search_repos_by_code") gl_repos_by_code_tailored <- - test_rest_gitlab$tailor_repos_response(gl_repos_by_code) + test_rest_gitlab$tailor_repos_response( + repos_response = gl_repos_by_code, + output = "table_full" + ) gl_repos_by_code_tailored %>% expect_type("list") %>% expect_length(length(gl_repos_by_code)) @@ -206,6 +293,27 @@ test_that("`tailor_repos_response()` tailors precisely `repos_list`", { test_mocker$cache(gl_repos_by_code_tailored) }) +test_that("`tailor_repos_response()` tailors precisely to minimal `repos_list`", { + gl_repos_by_code <- test_mocker$use("gl_search_repos_by_code") + gl_repos_by_code_tailored <- + test_rest_gitlab$tailor_repos_response( + repos_response = gl_repos_by_code, + output = "table_min" + ) + gl_repos_by_code_tailored %>% + expect_type("list") %>% + expect_length(length(gl_repos_by_code)) + + expect_list_contains_only( + gl_repos_by_code_tailored[[1]], + c("repo_id", "repo_name", "created_at", "default_branch", "organization") + ) + expect_lt( + length(gl_repos_by_code_tailored[[1]]), + length(gl_repos_by_code[[1]]) + ) +}) + test_that("REST client prepares table from GitLab repositories response", { gl_repos_by_code_table <- test_rest_gitlab$prepare_repos_table( repos_list = test_mocker$use("gl_repos_by_code_tailored"), diff --git a/tests/testthat/test-02-get_commits-GitLab.R b/tests/testthat/test-02-get_commits-GitLab.R index e3e45d37..8ee44a3f 100644 --- a/tests/testthat/test-02-get_commits-GitLab.R +++ b/tests/testthat/test-02-get_commits-GitLab.R @@ -1,17 +1,34 @@ -test_that("`get_commits_from_repos()` pulls commits from repo", { +test_that("`get_commits_from_one_repo()` pulls commits from repository", { + mockery::stub( + test_rest_gitlab_priv$get_commits_from_one_repo, + "private$paginate_results", + test_fixtures$gitlab_commits_response + ) + gl_commits_repo <- test_rest_gitlab_priv$get_commits_from_one_repo( + repo_path = "TestRepo", + since = "2023-01-01", + until = "2023-04-20" + ) + expect_gt(length(gl_commits_repo), 1) + purrr::walk(gl_commits_repo, ~ expect_gl_commit_rest_response(.)) + test_mocker$cache(gl_commits_repo) +}) + +test_that("`get_commits_from_repos()` pulls commits from repositories", { mockery::stub( test_rest_gitlab$get_commits_from_repos, "private$get_commits_from_one_repo", - test_fixtures$gitlab_commits_response + test_mocker$use("gl_commits_repo") ) - repos_names <- c("mbtests%2Fgitstatstesting", "mbtests%2Fgitstats-testing-2") + repos_names <- c("test_org/TestRepo1", "test_org/TestRepo2") gl_commits_org <- test_rest_gitlab$get_commits_from_repos( repos_names = repos_names, since = "2023-01-01", until = "2023-04-20", progress = FALSE ) - purrr::walk(gl_commits_org, ~ expect_gl_commit_rest_response(.)) + expect_equal(names(gl_commits_org), c("test_org/TestRepo1", "test_org/TestRepo2")) + purrr::walk(gl_commits_org[[1]], ~ expect_gl_commit_rest_response(.)) test_mocker$cache(gl_commits_org) }) @@ -126,11 +143,11 @@ test_that("get_commits_from_orgs works", { "rest_engine$get_commits_authors_handles_and_names", test_mocker$use("gl_commits_table") ) - suppressMessages( + expect_snapshot( gl_commits_table <- gitlab_testhost_priv$get_commits_from_orgs( since = "2023-03-01", until = "2023-04-01", - verbose = FALSE, + verbose = TRUE, progress = FALSE ) ) @@ -139,3 +156,35 @@ test_that("get_commits_from_orgs works", { ) test_mocker$cache(gl_commits_table) }) + +test_that("get_commits_from_repos works", { + gitlab_testhost_priv <- create_gitlab_testhost( + repos = "TestRepo", + mode = "private" + ) + test_org <- "test_org" + attr(test_org, "type") <- "organization" + mockery::stub( + gitlab_testhost_priv$get_repos_from_repos, + "private$set_owner_type", + test_org + ) + mockery::stub( + gitlab_testhost_priv$get_commits_from_repos, + "rest_engine$get_commits_authors_handles_and_names", + test_mocker$use("gl_commits_table") + ) + gitlab_testhost_priv$searching_scope <- "repo" + gitlab_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + expect_snapshot( + gl_commits_table <- gitlab_testhost_priv$get_commits_from_repos( + since = "2023-03-01", + until = "2023-04-01", + verbose = TRUE, + progress = FALSE + ) + ) + expect_commits_table( + gl_commits_table + ) +}) diff --git a/tests/testthat/test-get_release-GitLab.R b/tests/testthat/test-get_release-GitLab.R index 06e73b5d..05d8eb4f 100644 --- a/tests/testthat/test-get_release-GitLab.R +++ b/tests/testthat/test-get_release-GitLab.R @@ -80,11 +80,13 @@ test_that("`get_release_logs_from_repos()` works", { ) gitlab_testhost_priv$searching_scope <- "repo" gitlab_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") - releases_from_repos <- gitlab_testhost_priv$get_release_logs_from_repos( - since = "2023-05-01", - until = "2023-09-30", - verbose = FALSE, - progress = FALSE + expect_snapshot( + releases_from_repos <- gitlab_testhost_priv$get_release_logs_from_repos( + since = "2023-05-01", + until = "2023-09-30", + verbose = TRUE, + progress = FALSE + ) ) expect_releases_table(releases_from_repos) test_mocker$cache(releases_from_repos) diff --git a/tests/testthat/test-get_urls_repos-GitHub.R b/tests/testthat/test-get_urls_repos-GitHub.R index 0a9c008e..e474b445 100644 --- a/tests/testthat/test-get_urls_repos-GitHub.R +++ b/tests/testthat/test-get_urls_repos-GitHub.R @@ -88,10 +88,12 @@ test_that("get_repos_urls_from_repos prepares web repo_urls vector", { ) github_testhost_priv$searching_scope <- c("repo") github_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") - gh_repos_urls <- github_testhost_priv$get_repos_urls_from_repos( - type = "web", - verbose = FALSE, - progress = FALSE + expect_snapshot( + gh_repos_urls <- github_testhost_priv$get_repos_urls_from_repos( + type = "web", + verbose = TRUE, + progress = FALSE + ) ) expect_gt(length(gh_repos_urls), 0) expect_true(any(grepl("test-org", gh_repos_urls))) @@ -110,9 +112,11 @@ test_that("get_all_repos_urls prepares web repo_urls vector", { "private$get_repos_urls_from_repos", test_mocker$use("gh_repos_urls") ) - gh_repos_urls <- github_testhost_priv$get_all_repos_urls( - type = "web", - verbose = FALSE + expect_snapshot( + gh_repos_urls <- github_testhost_priv$get_all_repos_urls( + type = "web", + verbose = TRUE + ) ) expect_gt(length(gh_repos_urls), 0) expect_true(any(grepl("test-org", gh_repos_urls))) From 88ed0d6840e7f039f06ee55bcfc45744d45ba842 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 20 Dec 2024 12:07:14 +0000 Subject: [PATCH 52/99] Add NEWS. --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index eaa5c2d8..62178505 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,7 @@ - Fixed pulling commits for GitLab subgroups when repositories are set as scope to scan ([#551](https://github.com/r-world-devs/GitStats/issues/551)). - Filled more information on `author_name` and `author_login` if it was missing in `commits_table` ([#550](https://github.com/r-world-devs/GitStats/issues/550)). - Handled a `GraphQL` response error when pulling repositories with R error. Earlier, `GitStats` just returned empty table with no clue on what has happened, as errors from `GraphQL` are returned as list outputs (they do not break code). +- Fixed getting R package usage when repositories are set ([#548](https://github.com/r-world-devs/GitStats/issues/548)). # GitStats 2.1.2 From d3f47a7200506cdbb3d77036418dd93c026eb568 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 23 Dec 2024 13:27:11 +0000 Subject: [PATCH 53/99] Add parameter to control error when wrong repos/orgs are passed. --- DESCRIPTION | 2 +- NEWS.md | 1 + R/GitHost.R | 56 +++++++++---- R/GitHostGitHub.R | 6 +- R/GitHostGitLab.R | 6 +- R/GitStats.R | 12 ++- R/gitstats_functions.R | 79 ----------------- R/set_host.R | 84 +++++++++++++++++++ inst/set_hosts.R | 14 ++++ man/set_github_host.Rd | 8 +- man/set_gitlab_host.Rd | 8 +- tests/testthat/_snaps/set_host.md | 14 ++++ tests/testthat/test-02-get_commits-GitStats.R | 4 +- tests/testthat/test-helpers.R | 4 +- tests/testthat/test-set_host.R | 13 +++ vignettes/get_repos_with_code.Rmd | 2 +- 16 files changed, 199 insertions(+), 114 deletions(-) create mode 100644 R/set_host.R create mode 100644 inst/set_hosts.R diff --git a/DESCRIPTION b/DESCRIPTION index b6e5fb16..07993a2c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9004 +Version: 2.1.2.9005 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 62178505..d81d3c30 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ - changing name of the `time_interval` parameter to `time_aggregation`, - adding `yearly` aggregation to `time_aggregation` parameter, - changing basic input from `GitStats` to `commits_data` object which allows to build workflow in one pipeline (`create_gitstats() |> set_*_host() |> get_commits() |> get_commits_stats()`). +- Add `.show_error` parameter to the `set_*_host()` functins to control if error should pop up when wrong input is passed ([#547](https://github.com/r-world-devs/GitStats/issues/547)). ## Fixes: diff --git a/R/GitHost.R b/R/GitHost.R index d373f1c4..71cd508e 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -17,7 +17,8 @@ GitHost <- R6::R6Class( repos = NA, token = NA, host = NA, - verbose = NA) { + verbose = NA, + .error = TRUE) { private$set_api_url(host) private$set_web_url(host) private$set_endpoints() @@ -36,7 +37,8 @@ GitHost <- R6::R6Class( private$set_orgs_and_repos( orgs = orgs, repos = repos, - verbose = verbose + verbose = verbose, + .error = .error ) }, @@ -413,18 +415,20 @@ GitHost <- R6::R6Class( }, # Set organization or repositories - set_orgs_and_repos = function(orgs, repos, verbose) { + set_orgs_and_repos = function(orgs, repos, verbose, .error) { if (!private$scan_all) { if (!is.null(orgs)) { private$orgs <- private$check_organizations( orgs = orgs, - verbose = verbose + verbose = verbose, + .error = .error ) } if (!is.null(repos)) { repos <- private$check_repositories( repos = repos, - verbose = verbose + verbose = verbose, + .error = .error ) private$repos_fullnames <- repos orgs_repos <- private$extract_repos_and_orgs(private$repos_fullnames) @@ -435,7 +439,7 @@ GitHost <- R6::R6Class( }, # Check if repositories exist - check_repositories = function(repos, verbose) { + check_repositories = function(repos, verbose, .error) { if (verbose) { cli::cli_alert_info(cli::col_grey("Checking repositories...")) } @@ -443,7 +447,9 @@ GitHost <- R6::R6Class( repo_endpoint <- glue::glue("{private$endpoints$repositories}/{repo}") check <- private$check_endpoint( endpoint = repo_endpoint, - type = "Repository" + type = "Repository", + verbose = verbose, + .error = .error ) if (!check) { repo <- NULL @@ -459,7 +465,7 @@ GitHost <- R6::R6Class( }, # Check if organizations exist - check_organizations = function(orgs, verbose) { + check_organizations = function(orgs, verbose, .error) { if (verbose) { cli::cli_alert_info(cli::col_grey("Checking organizations...")) } @@ -467,7 +473,9 @@ GitHost <- R6::R6Class( org_endpoint <- glue::glue("{private$endpoints$orgs}/{org}") check <- private$check_endpoint( endpoint = org_endpoint, - type = "Organization" + type = "Organization", + verbose = verbose, + .error = .error ) if (!check) { org <- NULL @@ -483,7 +491,7 @@ GitHost <- R6::R6Class( }, # Check whether the endpoint exists. - check_endpoint = function(endpoint, type) { + check_endpoint = function(endpoint, type, verbose, .error) { check <- TRUE tryCatch( { @@ -491,17 +499,29 @@ GitHost <- R6::R6Class( }, error = function(e) { if (grepl("404", e)) { - cli::cli_abort( - c( - "x" = "{type} you provided does not exist or its name was passed + if (.error) { + cli::cli_abort( + c( + "x" = "{type} you provided does not exist or its name was passed in a wrong way: {cli::col_red({utils::URLdecode(endpoint)})}", - "!" = "Please type your {tolower(type)} name as you see it in + "!" = "Please type your {tolower(type)} name as you see it in web URL.", - "i" = "E.g. do not use spaces. {type} names as you see on the + "i" = "E.g. do not use spaces. {type} names as you see on the page may differ from their web 'address'." - ), - call = NULL - ) + ), + call = NULL + ) + } else { + if (verbose) { + cli::cli_alert_warning( + cli::col_yellow( + "{type} you provided does not exist: {cli::col_red({utils::URLdecode(endpoint)})}" + ) + ) + } + check <<- FALSE + } + } } ) diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index f0ac14fc..29463b49 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -7,12 +7,14 @@ GitHostGitHub <- R6::R6Class( repos = NA, token = NA, host = NA, - verbose = NA) { + verbose = NA, + .error = TRUE) { super$initialize(orgs = orgs, repos = repos, token = token, host = host, - verbose = verbose) + verbose = verbose, + .error = .error) if (verbose) { cli::cli_alert_success("Set connection to GitHub.") } diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index 4f2f0957..39a28c1d 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -6,7 +6,8 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", repos = NA, token = NA, host = NA, - verbose = NA) { + verbose = NA, + .error = TRUE) { repos <- if (!is.null(repos)) { url_encode(repos) } @@ -17,7 +18,8 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", repos = repos, token = token, host = host, - verbose = verbose) + verbose = verbose, + .error = .error) if (verbose) { cli::cli_alert_success("Set connection to GitLab.") } diff --git a/R/GitStats.R b/R/GitStats.R index b4fa288e..bc8c91bb 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -8,14 +8,16 @@ GitStats <- R6::R6Class( token = NULL, orgs = NULL, repos = NULL, - verbose = TRUE) { + verbose = TRUE, + .show_error = TRUE) { new_host <- NULL new_host <- GitHostGitHub$new( orgs = orgs, repos = repos, token = token, host = host, - verbose = verbose + verbose = verbose, + .error = .show_error ) private$add_new_host(new_host) }, @@ -24,14 +26,16 @@ GitStats <- R6::R6Class( token = NULL, orgs = NULL, repos = NULL, - verbose = TRUE) { + verbose = TRUE, + .show_error = TRUE) { new_host <- NULL new_host <- GitHostGitLab$new( orgs = orgs, repos = repos, token = token, host = host, - verbose = verbose + verbose = verbose, + .error = .show_error ) private$add_new_host(new_host) }, diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index e38fce10..0397c072 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -8,85 +8,6 @@ create_gitstats <- function() { GitStats$new() } -#' @title Set GitHub host -#' @name set_github_host -#' @param gitstats A GitStats object. -#' @param host A character, optional, URL name of the host. If not passed, a -#' public host will be used. -#' @param token A token. -#' @param orgs An optional character vector of organisations. If you pass it, -#' `repos` parameter should stay `NULL`. -#' @param repos An optional character vector of repositories full names -#' (organization and repository name, e.g. "r-world-devs/GitStats"). If you -#' pass it, `orgs` parameter should stay `NULL`. -#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing -#' output is switched off. -#' @details If you do not define `orgs` and `repos`, `GitStats` will be set to -#' scan whole Git platform (such as enterprise version of GitHub or GitLab), -#' unless it is a public platform. In case of a public one (like GitHub) you -#' need to define `orgs` or `repos` as scanning through all organizations may -#' take large amount of time. -#' @return A `GitStats` object with added information on host. -#' @examples -#' \dontrun{ -#' my_gitstats <- create_gitstats() %>% -#' set_github_host( -#' orgs = c("r-world-devs", "openpharma", "pharmaverse") -#' ) -#' } -#' @export -set_github_host <- function(gitstats, - host = NULL, - token = NULL, - orgs = NULL, - repos = NULL, - verbose = is_verbose(gitstats)) { - gitstats$set_github_host( - host = host, - token = token, - orgs = orgs, - repos = repos, - verbose = verbose - ) - - return(invisible(gitstats)) -} - -#' @title Set GitLab host -#' @name set_gitlab_host -#' @inheritParams set_github_host -#' @details If you do not define `orgs` and `repos`, `GitStats` will be set to -#' scan whole Git platform (such as enterprise version of GitHub or GitLab), -#' unless it is a public platform. In case of a public one (like GitHub) you -#' need to define `orgs` or `repos` as scanning through all organizations may -#' take large amount of time. -#' @return A `GitStats` object with added information on host. -#' @examples -#' \dontrun{ -#' my_gitstats <- create_gitstats() %>% -#' set_gitlab_host( -#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), -#' orgs = "mbtests" -#' ) -#' } -#' @export -set_gitlab_host <- function(gitstats, - host = NULL, - token = NULL, - orgs = NULL, - repos = NULL, - verbose = is_verbose(gitstats)) { - gitstats$set_gitlab_host( - host = host, - token = token, - orgs = orgs, - repos = repos, - verbose = verbose - ) - - return(invisible(gitstats)) -} - #' @title Get data on repositories #' @name get_repos #' @description Pulls data on all repositories for an organization, individual diff --git a/R/set_host.R b/R/set_host.R new file mode 100644 index 00000000..9bab0d3f --- /dev/null +++ b/R/set_host.R @@ -0,0 +1,84 @@ +#' @title Set GitHub host +#' @name set_github_host +#' @param gitstats A GitStats object. +#' @param host A character, optional, URL name of the host. If not passed, a +#' public host will be used. +#' @param token A token. +#' @param orgs An optional character vector of organisations. If you pass it, +#' `repos` parameter should stay `NULL`. +#' @param repos An optional character vector of repositories full names +#' (organization and repository name, e.g. "r-world-devs/GitStats"). If you +#' pass it, `orgs` parameter should stay `NULL`. +#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing +#' output is switched off. +#' @param .show_error A logical to control if passing wrong input +#' (`repositories` and `organizations`) should end with an error or not. +#' @details If you do not define `orgs` and `repos`, `GitStats` will be set to +#' scan whole Git platform (such as enterprise version of GitHub or GitLab), +#' unless it is a public platform. In case of a public one (like GitHub) you +#' need to define `orgs` or `repos` as scanning through all organizations may +#' take large amount of time. +#' @return A `GitStats` object with added information on host. +#' @examples +#' \dontrun{ +#' my_gitstats <- create_gitstats() %>% +#' set_github_host( +#' orgs = c("r-world-devs", "openpharma", "pharmaverse") +#' ) +#' } +#' @export +set_github_host <- function(gitstats, + host = NULL, + token = NULL, + orgs = NULL, + repos = NULL, + verbose = is_verbose(gitstats), + .show_error = TRUE) { + gitstats$set_github_host( + host = host, + token = token, + orgs = orgs, + repos = repos, + verbose = verbose, + .show_error = .show_error + ) + + return(invisible(gitstats)) +} + +#' @title Set GitLab host +#' @name set_gitlab_host +#' @inheritParams set_github_host +#' @details If you do not define `orgs` and `repos`, `GitStats` will be set to +#' scan whole Git platform (such as enterprise version of GitHub or GitLab), +#' unless it is a public platform. In case of a public one (like GitHub) you +#' need to define `orgs` or `repos` as scanning through all organizations may +#' take large amount of time. +#' @return A `GitStats` object with added information on host. +#' @examples +#' \dontrun{ +#' my_gitstats <- create_gitstats() %>% +#' set_gitlab_host( +#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), +#' orgs = "mbtests" +#' ) +#' } +#' @export +set_gitlab_host <- function(gitstats, + host = NULL, + token = NULL, + orgs = NULL, + repos = NULL, + verbose = is_verbose(gitstats), + .show_error = TRUE) { + gitstats$set_gitlab_host( + host = host, + token = token, + orgs = orgs, + repos = repos, + verbose = verbose, + .show_error = .show_error + ) + + return(invisible(gitstats)) +} diff --git a/inst/set_hosts.R b/inst/set_hosts.R new file mode 100644 index 00000000..02eabe81 --- /dev/null +++ b/inst/set_hosts.R @@ -0,0 +1,14 @@ +git_stats <- create_gitstats() |> + set_github_host( + orgs = c("r-world-devs"), + repos = c("openpharma/DataFakR"), + token = Sys.getenv("GITHUB_PAT"), + .show_error = FALSE + ) |> + set_gitlab_host( + orgs = c("mbtests", "makbest"), + token = Sys.getenv("GITLAB_PAT_PUBLIC"), + .show_error = FALSE + ) + +git_stats diff --git a/man/set_github_host.Rd b/man/set_github_host.Rd index 68ab860c..6d10db94 100644 --- a/man/set_github_host.Rd +++ b/man/set_github_host.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gitstats_functions.R +% Please edit documentation in R/set_host.R \name{set_github_host} \alias{set_github_host} \title{Set GitHub host} @@ -10,7 +10,8 @@ set_github_host( token = NULL, orgs = NULL, repos = NULL, - verbose = is_verbose(gitstats) + verbose = is_verbose(gitstats), + .show_error = TRUE ) } \arguments{ @@ -30,6 +31,9 @@ pass it, \code{orgs} parameter should stay \code{NULL}.} \item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing output is switched off.} + +\item{.show_error}{A logical to control if passing wrong input +(\code{repositories} and \code{organizations}) should end with an error or not.} } \value{ A \code{GitStats} object with added information on host. diff --git a/man/set_gitlab_host.Rd b/man/set_gitlab_host.Rd index 065713a3..c9df9756 100644 --- a/man/set_gitlab_host.Rd +++ b/man/set_gitlab_host.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gitstats_functions.R +% Please edit documentation in R/set_host.R \name{set_gitlab_host} \alias{set_gitlab_host} \title{Set GitLab host} @@ -10,7 +10,8 @@ set_gitlab_host( token = NULL, orgs = NULL, repos = NULL, - verbose = is_verbose(gitstats) + verbose = is_verbose(gitstats), + .show_error = TRUE ) } \arguments{ @@ -30,6 +31,9 @@ pass it, \code{orgs} parameter should stay \code{NULL}.} \item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing output is switched off.} + +\item{.show_error}{A logical to control if passing wrong input +(\code{repositories} and \code{organizations}) should end with an error or not.} } \value{ A \code{GitStats} object with added information on host. diff --git a/tests/testthat/_snaps/set_host.md b/tests/testthat/_snaps/set_host.md index 0cbab877..9dc12052 100644 --- a/tests/testthat/_snaps/set_host.md +++ b/tests/testthat/_snaps/set_host.md @@ -127,3 +127,17 @@ ! Please type your organization name as you see it in web URL. i E.g. do not use spaces. Organization names as you see on the page may differ from their web 'address'. +# When wrong orgs and repos are passed they are excluded but host is created + + Code + test_gitstats <- create_gitstats() %>% set_github_host(orgs = c("openpharma", + "r_world_devs"), repos = c("r-world-devs/GitStats", "r-world-devs/GitMetrics"), + verbose = TRUE, .show_error = FALSE) + Message + i Using PAT from GITHUB_PAT envar. + i Checking organizations... + ! Organization you provided does not exist: https://api.github.com/orgs/r_world_devs + i Checking repositories... + ! Repository you provided does not exist: https://api.github.com/repos/r-world-devs/GitMetrics + v Set connection to GitHub. + diff --git a/tests/testthat/test-02-get_commits-GitStats.R b/tests/testthat/test-02-get_commits-GitStats.R index 1a69732c..63474844 100644 --- a/tests/testthat/test-02-get_commits-GitStats.R +++ b/tests/testthat/test-02-get_commits-GitStats.R @@ -45,7 +45,7 @@ test_that("get_commits works properly", { test_that("get_commits() works", { mockery::stub( get_commits, - "gitstats_object$get_commits", + "gitstats$get_commits", test_mocker$use("commits_table") ) commits_data <- get_commits( @@ -64,7 +64,7 @@ test_that("get_commits() works", { test_that("get_commits() returns error when since is not defined", { mockery::stub( get_commits, - "gitstats_object$get_commits", + "gitstats$get_commits", test_mocker$use("commits_table") ) expect_snapshot_error( diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index 827bcd41..2cb15eb4 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -106,7 +106,9 @@ test_that("check_endpoint returns error if they are not correct", { expect_snapshot_error( check <- github_testhost_priv$check_endpoint( endpoint = "https://api.github.com/repos/r-worlddevs/GitStats", - type = "Repository" + type = "Repository", + verbose = TRUE, + .error = TRUE ) ) }) diff --git a/tests/testthat/test-set_host.R b/tests/testthat/test-set_host.R index c26f8308..17b91c99 100644 --- a/tests/testthat/test-set_host.R +++ b/tests/testthat/test-set_host.R @@ -157,6 +157,19 @@ test_that("Error pops out when `org` does not exist", { ) }) +test_that("When wrong orgs and repos are passed they are excluded but host is created", { + skip_on_cran() + expect_snapshot( + test_gitstats <- create_gitstats() %>% + set_github_host( + orgs = c("openpharma", "r_world_devs"), + repos = c("r-world-devs/GitStats", "r-world-devs/GitMetrics"), + verbose = TRUE, + .show_error = FALSE + ) + ) +}) + test_that("Setting verbose for set_*_host() to FALSE works fine", { skip_on_cran() expect_no_error( diff --git a/vignettes/get_repos_with_code.Rmd b/vignettes/get_repos_with_code.Rmd index 03f298e7..231345ff 100644 --- a/vignettes/get_repos_with_code.Rmd +++ b/vignettes/get_repos_with_code.Rmd @@ -30,7 +30,7 @@ github_stats <- create_gitstats() %>% verbose_off() repos_urls <- get_repos_urls( - gitstats_object = github_stats, + gitstats = github_stats, with_code = "shiny" ) ``` From ed83adbe99e5d82df1d3e8b3c368f932f679a3c1 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 30 Dec 2024 14:52:53 +0000 Subject: [PATCH 54/99] Bump version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 07993a2c..e0781a5a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9005 +Version: 2.1.2.9006 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), From 8f91ba7535287a3cc2d80be59ad97d2b9394d4fe Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 30 Dec 2024 14:57:41 +0000 Subject: [PATCH 55/99] Add possibility to pass users to GitHub `orgs` parameters. --- R/GitHost.R | 13 ++++++++++--- R/GitHostGitHub.R | 1 + R/GitHostGitLab.R | 1 + inst/set_hosts.R | 8 ++++++++ 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/R/GitHost.R b/R/GitHost.R index 71cd508e..e546e01d 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -469,11 +469,18 @@ GitHost <- R6::R6Class( if (verbose) { cli::cli_alert_info(cli::col_grey("Checking organizations...")) } + orgs <- private$set_owner_type( + owners = orgs + ) orgs <- purrr::map(orgs, function(org) { - org_endpoint <- glue::glue("{private$endpoints$orgs}/{org}") + owner_endpoint <- if (attr(org, "type") == "organization") { + private$endpoints$orgs + } else { + private$endpoints$users + } check <- private$check_endpoint( - endpoint = org_endpoint, - type = "Organization", + endpoint = glue::glue("{owner_endpoint}/{org}"), + type = attr(org, "type"), verbose = verbose, .error = .error ) diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index 29463b49..b61890f3 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -93,6 +93,7 @@ GitHostGitHub <- R6::R6Class( # Set groups endpoint set_orgs_endpoint = function() { private$endpoints$orgs <- glue::glue("{private$api_url}/orgs") + private$endpoints$users <- glue::glue("{private$api_url}/users") }, # Set projects endpoint diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index 39a28c1d..cbe3ac42 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -95,6 +95,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", # Set groups endpoint set_orgs_endpoint = function() { private$endpoints$orgs <- glue::glue("{private$api_url}/groups") + private$endpoints$users <- glue::glue("{private$api_url}/groups") }, # Set projects endpoint diff --git a/inst/set_hosts.R b/inst/set_hosts.R index 02eabe81..49facd19 100644 --- a/inst/set_hosts.R +++ b/inst/set_hosts.R @@ -12,3 +12,11 @@ git_stats <- create_gitstats() |> ) git_stats + +# Setting users instead of orgs + +git_stats <- create_gitstats() |> + set_github_host( + orgs = c("maciekbanas"), + token = Sys.getenv("GITHUB_PAT") + ) From 4f1d6b216ea157dc76586240cffd756414636d52 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 31 Dec 2024 15:44:03 +0000 Subject: [PATCH 56/99] Add NEWS. --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index d81d3c30..950aa687 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,7 @@ - Filled more information on `author_name` and `author_login` if it was missing in `commits_table` ([#550](https://github.com/r-world-devs/GitStats/issues/550)). - Handled a `GraphQL` response error when pulling repositories with R error. Earlier, `GitStats` just returned empty table with no clue on what has happened, as errors from `GraphQL` are returned as list outputs (they do not break code). - Fixed getting R package usage when repositories are set ([#548](https://github.com/r-world-devs/GitStats/issues/548)). +- Added possibility to pass GitHub users to `orgs` parameter in `set_github_host()` ([#562](https://github.com/r-world-devs/GitStats/issues/562)). # GitStats 2.1.2 From 4c330b3b1ee0adece1eb1878a8ba7c4322697204 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 31 Dec 2024 15:55:27 +0000 Subject: [PATCH 57/99] Fixes for pulling repos and repos urls when user set instead of organization & optimize checking organizations: leave only GraphQL method, remove REST method. The latter was made also with handling errors on the GraphQL response side. --- R/EngineGraphQLGitHub.R | 25 +++++- R/EngineGraphQLGitLab.R | 38 +++++++-- R/EngineRestGitHub.R | 4 +- R/EngineRestGitLab.R | 12 ++- R/GitHost.R | 80 +++++++++++-------- R/GitHostGitHub.R | 21 +---- R/GitHostGitLab.R | 27 +------ inst/set_hosts.R | 20 ++++- tests/testthat/_snaps/01-get_repos-GitLab.md | 2 +- .../testthat/_snaps/02-get_commits-GitLab.md | 2 +- tests/testthat/_snaps/set_host.md | 37 ++++----- tests/testthat/setup.R | 4 +- tests/testthat/test-01-get_repos-GitHub.R | 2 +- tests/testthat/test-01-get_repos-GitLab.R | 3 +- tests/testthat/test-02-get_commits-GitHub.R | 2 +- tests/testthat/test-02-get_commits-GitLab.R | 2 +- tests/testthat/test-GitHost-helpers.R | 12 +-- .../testthat/test-get_files_content-GitHub.R | 2 +- .../test-get_files_structure-GitLab.R | 6 +- tests/testthat/test-get_release-GitHub.R | 2 +- tests/testthat/test-get_urls_repos-GitHub.R | 2 +- tests/testthat/test-get_urls_repos-GitLab.R | 16 +++- 22 files changed, 186 insertions(+), 135 deletions(-) diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index 64a52c1b..5a1f536a 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -15,6 +15,28 @@ EngineGraphQLGitHub <- R6::R6Class( self$gql_query <- GQLQueryGitHub$new() }, + # Set owner type + set_owner_type = function(owners) { + user_or_org_query <- self$gql_query$user_or_org_query + login_types <- purrr::map(owners, function(owner) { + response <- self$gql_response( + gql_query = user_or_org_query, + vars = list( + "login" = owner + ) + ) + if (length(response$errors) < 2) { + type <- purrr::discard(response$data, is.null) |> + names() + attr(owner, "type") <- type + } else { + attr(owner, "type") <- "not found" + } + return(owner) + }) + return(login_types) + }, + #' Get all orgs from GitHub. get_orgs = function() { end_cursor <- NULL @@ -72,7 +94,8 @@ EngineGraphQLGitHub <- R6::R6Class( }, # Parses repositories list into table. - prepare_repos_table = function(repos_list) { + # org parameter is empty for GitHub but is needed for GitLab class. + prepare_repos_table = function(repos_list, org) { if (length(repos_list) > 0) { repos_table <- purrr::map(repos_list, function(repo) { repo$default_branch <- if (!is.null(repo$default_branch)) { diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index 5251b0f8..349b7b9f 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -17,6 +17,29 @@ EngineGraphQLGitLab <- R6::R6Class( self$gql_query <- GQLQueryGitLab$new() }, + # Set owner type + set_owner_type = function(owners) { + user_or_org_query <- self$gql_query$user_or_org_query + login_types <- purrr::map(owners, function(owner) { + response <- self$gql_response( + gql_query = user_or_org_query, + vars = list( + "username" = owner, + "grouppath" = owner + ) + ) + if (!all(purrr::map_lgl(response$data, is.null))) { + type <- purrr::discard(response$data, is.null) |> + names() + attr(owner, "type") <- type + } else { + attr(owner, "type") <- "not found" + } + return(owner) + }) + return(login_types) + }, + #' Get all groups from GitLab. get_orgs = function() { group_cursor <- "" @@ -78,7 +101,7 @@ EngineGraphQLGitLab <- R6::R6Class( }, # Parses repositories list into table. - prepare_repos_table = function(repos_list) { + prepare_repos_table = function(repos_list, org) { if (length(repos_list) > 0) { repos_table <- purrr::map(repos_list, function(repo) { repo <- repo$node @@ -86,7 +109,7 @@ EngineGraphQLGitLab <- R6::R6Class( repo[["default_branch"]] <- repo$repository$rootRef %||% "" repo$repository <- NULL repo[["languages"]] <- if (length(repo$languages) > 0) { - purrr::map_chr(repo$languages, ~ .$name) %>% + purrr::map_chr(repo$languages, ~ .$name) |> paste0(collapse = ", ") } else { "" @@ -96,16 +119,19 @@ EngineGraphQLGitLab <- R6::R6Class( repo[["issues_closed"]] <- repo$issues$closed repo$issues <- NULL repo[["last_activity_at"]] <- as.POSIXct(repo$last_activity_at) - repo[["organization"]] <- repo$namespace$path + if (!is.null(repo$namespace)) { + org <- repo$namespace$path + } + repo[["organization"]] <- org repo$namespace <- NULL repo$repo_path <- NULL # temporary to close issue 338 return(data.frame(repo)) - }) %>% - purrr::list_rbind() %>% + }) |> + purrr::list_rbind() |> dplyr::relocate( repo_url, .after = organization - ) %>% + ) |> dplyr::relocate( default_branch, .after = repo_name diff --git a/R/EngineRestGitHub.R b/R/EngineRestGitHub.R index 0add30e4..db6d95dd 100644 --- a/R/EngineRestGitHub.R +++ b/R/EngineRestGitHub.R @@ -132,9 +132,9 @@ EngineRestGitHub <- R6::R6Class( get_repos_urls = function(type, org, repos) { owner_type <- attr(org, "type") %||% "organization" if (owner_type == "user") { - repo_endpoint <- paste0(private$endpoints[["users"]], org, "/repos") + repo_endpoint <- paste0(private$endpoints[["users"]], utils::URLdecode(org), "/repos") } else { - repo_endpoint <- paste0(private$endpoints[["organizations"]], org, "/repos") + repo_endpoint <- paste0(private$endpoints[["organizations"]], utils::URLdecode(org), "/repos") } repos_response <- private$paginate_results( endpoint = repo_endpoint diff --git a/R/EngineRestGitLab.R b/R/EngineRestGitLab.R index 791e7b1e..fcccc21c 100644 --- a/R/EngineRestGitLab.R +++ b/R/EngineRestGitLab.R @@ -180,8 +180,14 @@ EngineRestGitLab <- R6::R6Class( # Pull all repositories URLs from organization get_repos_urls = function(type, org, repos) { + owner_type <- attr(org, "type") + owner_endpoint <- if (owner_type == "organization") { + private$endpoints[["organizations"]] + } else { + private$endpoints[["users"]] + } repos_response <- private$paginate_results( - endpoint = paste0(private$endpoints[["organizations"]], + endpoint = paste0(owner_endpoint, utils::URLencode(org, reserved = TRUE), "/projects") ) @@ -327,6 +333,10 @@ EngineRestGitLab <- R6::R6Class( self$rest_api_url, "/groups/" ) + private$endpoints[["users"]] <- paste0( + self$rest_api_url, + "/users/" + ) }, # Set search endpoint diff --git a/R/GitHost.R b/R/GitHost.R index e546e01d..ae76e91a 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -455,8 +455,8 @@ GitHost <- R6::R6Class( repo <- NULL } return(repo) - }, .progress = verbose) %>% - purrr::keep(~ length(.) > 0) %>% + }, .progress = verbose) |> + purrr::keep(~ length(.) > 0) |> unlist() if (length(repos) == 0) { return(NULL) @@ -464,33 +464,40 @@ GitHost <- R6::R6Class( repos }, - # Check if organizations exist + # Check if organizations or users exist check_organizations = function(orgs, verbose, .error) { if (verbose) { - cli::cli_alert_info(cli::col_grey("Checking organizations...")) + cli::cli_alert_info(cli::col_grey("Checking owners...")) } - orgs <- private$set_owner_type( - owners = orgs - ) - orgs <- purrr::map(orgs, function(org) { - owner_endpoint <- if (attr(org, "type") == "organization") { - private$endpoints$orgs - } else { - private$endpoints$users - } - check <- private$check_endpoint( - endpoint = glue::glue("{owner_endpoint}/{org}"), - type = attr(org, "type"), - verbose = verbose, - .error = .error - ) - if (!check) { - org <- NULL - } - return(org) - }) %>% - purrr::keep(~ length(.) > 0) %>% - unlist() + orgs <- private$engines$graphql$set_owner_type( + owners = utils::URLdecode(orgs) + ) |> + purrr::map(function(org) { + if (attr(org, "type") == "not found") { + if (.error) { + cli::cli_abort( + c( + "x" = "Org/user you provided does not exist or its name was passed + in a wrong way: {cli::col_red({utils::URLdecode(org)})}", + "!" = "Please type your org/user name the way you see it in + web URL." + ), + call = NULL + ) + } else { + if (verbose) { + cli::cli_alert_warning( + cli::col_yellow( + "Org/user you provided does not exist: {cli::col_red({org})}" + ) + ) + } + org <- NULL + } + } + return(org) + }) |> + purrr::keep(~ length(.) > 0) if (length(orgs) == 0) { return(NULL) } @@ -663,7 +670,10 @@ GitHost <- R6::R6Class( org = org, type = type ) |> - graphql_engine$prepare_repos_table() + graphql_engine$prepare_repos_table( + org = unclass(org) + ) |> + dplyr::filter(organization == unclass(org)) return(repos_table) }, .progress = progress) |> purrr::list_rbind() @@ -673,7 +683,7 @@ GitHost <- R6::R6Class( get_repos_from_repos = function(verbose, progress) { if ("repo" %in% private$searching_scope) { graphql_engine <- private$engines$graphql - orgs <- private$set_owner_type( + orgs <- graphql_engine$set_owner_type( owners = names(private$orgs_repos) ) purrr::map(orgs, function(org) { @@ -772,12 +782,11 @@ GitHost <- R6::R6Class( if ("org" %in% private$searching_scope) { rest_engine <- private$engines$rest repos_vector <- purrr::map(private$orgs, function(org) { - org <- utils::URLdecode(org) if (!private$scan_all && verbose) { show_message( host = private$host_name, engine = "rest", - scope = org, + scope = utils::URLdecode(org), information = "Pulling repositories (URLs)" ) } @@ -795,7 +804,8 @@ GitHost <- R6::R6Class( get_repos_urls_from_repos = function(type, verbose, progress) { if ("repo" %in% private$searching_scope) { rest_engine <- private$engines$rest - orgs <- private$set_owner_type( + graphql_engine <- private$engines$graphql + orgs <- graphql_engine$set_owner_type( owners = names(private$orgs_repos) ) repos_vector <- purrr::map(orgs, function(org) { @@ -1132,7 +1142,7 @@ GitHost <- R6::R6Class( progress = TRUE) { if ("repo" %in% private$searching_scope) { graphql_engine <- private$engines$graphql - orgs <- private$set_owner_type( + orgs <- graphql_engine$set_owner_type( owners = names(private$orgs_repos) ) files_table <- purrr::map(orgs, function(org) { @@ -1262,7 +1272,7 @@ GitHost <- R6::R6Class( progress = TRUE) { if ("repo" %in% private$searching_scope) { graphql_engine <- private$engines$graphql - orgs <- private$set_owner_type( + orgs <- graphql_engine$set_owner_type( owners = names(private$orgs_repos) ) files_structure_list <- purrr::map(orgs, function(org) { @@ -1369,7 +1379,8 @@ GitHost <- R6::R6Class( get_release_logs_from_repos = function(since, until, verbose, progress) { if ("repo" %in% private$searching_scope) { - orgs <- private$set_owner_type( + graphql_engine <- private$engines$graphql + orgs <- graphql_engine$set_owner_type( owners = names(private$orgs_repos) ) release_logs_table <- purrr::map(orgs, function(org) { @@ -1383,7 +1394,6 @@ GitHost <- R6::R6Class( information = "Pulling release logs" ) } - graphql_engine <- private$engines$graphql release_logs_table_org <- graphql_engine$get_release_logs_from_org( repos_names = private$orgs_repos[[org]], org = org diff --git a/R/GitHostGitHub.R b/R/GitHostGitHub.R index b61890f3..df4c2bf9 100644 --- a/R/GitHostGitHub.R +++ b/R/GitHostGitHub.R @@ -101,25 +101,6 @@ GitHostGitHub <- R6::R6Class( private$endpoints$repositories <- glue::glue("{private$api_url}/repos") }, - # Set owner type - set_owner_type = function(owners) { - graphql_engine <- private$engines$graphql - user_or_org_query <- graphql_engine$gql_query$user_or_org_query - login_types <- purrr::map(owners, function(owner) { - response <- graphql_engine$gql_response( - gql_query = user_or_org_query, - vars = list( - "login" = owner - ) - ) - type <- purrr::discard(response$data, is.null) %>% - names() - attr(owner, "type") <- type - return(owner) - }) - return(login_types) - }, - # Setup REST and GraphQL engines setup_engines = function() { private$engines$rest <- EngineRestGitHub$new( @@ -212,7 +193,7 @@ GitHostGitHub <- R6::R6Class( get_commits_from_repos = function(since, until, verbose, progress) { if ("repo" %in% private$searching_scope) { graphql_engine <- private$engines$graphql - orgs <- private$set_owner_type( + orgs <- graphql_engine$set_owner_type( owners = names(private$orgs_repos) ) commits_table <- purrr::map(orgs, function(org) { diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index cbe3ac42..0727293e 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -95,7 +95,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", # Set groups endpoint set_orgs_endpoint = function() { private$endpoints$orgs <- glue::glue("{private$api_url}/groups") - private$endpoints$users <- glue::glue("{private$api_url}/groups") + private$endpoints$users <- glue::glue("{private$api_url}/users?username=") }, # Set projects endpoint @@ -103,26 +103,6 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", private$endpoints$repositories <- glue::glue("{private$api_url}/projects") }, - # Set owner type - set_owner_type = function(owners) { - graphql_engine <- private$engines$graphql - user_or_org_query <- graphql_engine$gql_query$user_or_org_query - login_types <- purrr::map(owners, function(owner) { - response <- graphql_engine$gql_response( - gql_query = user_or_org_query, - vars = list( - "username" = owner, - "grouppath" = owner - ) - ) - type <- purrr::discard(response$data, is.null) %>% - names() - attr(owner, "type") <- type - return(owner) - }) - return(login_types) - }, - # Setup REST and GraphQL engines setup_engines = function() { private$engines$rest <- EngineRestGitLab$new( @@ -225,7 +205,8 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", progress = verbose) { if ("repo" %in% private$searching_scope) { rest_engine <- private$engines$rest - orgs <- private$set_owner_type( + graphql_engine <- private$engines$graphql + orgs <- graphql_engine$set_owner_type( owners = names(private$orgs_repos) ) commits_table <- purrr::map(orgs, function(org) { @@ -291,7 +272,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", progress = TRUE) { if ("repo" %in% private$searching_scope) { graphql_engine <- private$engines$graphql - orgs <- private$set_owner_type( + orgs <- graphql_engine$set_owner_type( owners = names(private$orgs_repos) ) files_table <- purrr::map(orgs, function(org) { diff --git a/inst/set_hosts.R b/inst/set_hosts.R index 49facd19..80819bc0 100644 --- a/inst/set_hosts.R +++ b/inst/set_hosts.R @@ -1,12 +1,15 @@ +devtools::load_all() + git_stats <- create_gitstats() |> set_github_host( - orgs = c("r-world-devs"), - repos = c("openpharma/DataFakR"), + orgs = c("r-world-devs", "openparma"), + repos = c("openpharma/DataFakR", "r-world-devs/GitAI"), token = Sys.getenv("GITHUB_PAT"), .show_error = FALSE ) |> set_gitlab_host( orgs = c("mbtests", "makbest"), + repos = c("makbest/something", "mbtests/gitstatstesting", "krystianigras/gitlab-test"), token = Sys.getenv("GITLAB_PAT_PUBLIC"), .show_error = FALSE ) @@ -17,6 +20,17 @@ git_stats git_stats <- create_gitstats() |> set_github_host( - orgs = c("maciekbanas"), + orgs = c("maciekbanas", "ddsjoberg", "mattsecrest"), token = Sys.getenv("GITHUB_PAT") ) +get_repos(git_stats) +get_repos_urls(git_stats) + +git_stats <- create_gitstats() |> + set_gitlab_host( + orgs = c("krystianigras"), + token = Sys.getenv("GITLAB_PAT_PUBLIC") + ) + +get_repos(git_stats, add_contributors = FALSE) +get_repos_urls(git_stats) diff --git a/tests/testthat/_snaps/01-get_repos-GitLab.md b/tests/testthat/_snaps/01-get_repos-GitLab.md index f13ca202..767febf3 100644 --- a/tests/testthat/_snaps/01-get_repos-GitLab.md +++ b/tests/testthat/_snaps/01-get_repos-GitLab.md @@ -27,5 +27,5 @@ gl_repos_from_orgs <- gitlab_testhost_priv$get_repos_from_orgs(verbose = TRUE, progress = FALSE) Message - i [Host:GitLab][Engine:GraphQl][Scope:mbtests] Pulling repositories... + i [Host:GitLab][Engine:GraphQl][Scope:test_group] Pulling repositories... diff --git a/tests/testthat/_snaps/02-get_commits-GitLab.md b/tests/testthat/_snaps/02-get_commits-GitLab.md index 13418dda..8b2810da 100644 --- a/tests/testthat/_snaps/02-get_commits-GitLab.md +++ b/tests/testthat/_snaps/02-get_commits-GitLab.md @@ -4,7 +4,7 @@ gl_commits_table <- gitlab_testhost_priv$get_commits_from_orgs(since = "2023-03-01", until = "2023-04-01", verbose = TRUE, progress = FALSE) Message - i [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits... + i [Host:GitLab][Engine:REST][Scope:test_group] Pulling commits... # get_commits_from_repos works diff --git a/tests/testthat/_snaps/set_host.md b/tests/testthat/_snaps/set_host.md index 9dc12052..60ec7dc7 100644 --- a/tests/testthat/_snaps/set_host.md +++ b/tests/testthat/_snaps/set_host.md @@ -4,7 +4,7 @@ set_github_host(gitstats = test_gitstats, token = Sys.getenv("GITHUB_PAT"), orgs = c("openpharma", "r-world-devs")) Message - i Checking organizations... + i Checking owners... v Set connection to GitHub. --- @@ -13,7 +13,7 @@ test_gitstats %>% set_gitlab_host(token = Sys.getenv("GITLAB_PAT_PUBLIC"), orgs = c("mbtests")) Message - i Checking organizations... + i Checking owners... v Set connection to GitLab. # When empty token for GitHub, GitStats pulls default token @@ -23,7 +23,7 @@ "r-world-devs")) Message i Using PAT from GITHUB_PAT envar. - i Checking organizations... + i Checking owners... v Set connection to GitHub. # When empty token for GitLab, GitStats pulls default token @@ -34,7 +34,7 @@ }) Message i Using PAT from GITLAB_PAT envar. - i Checking organizations... + i Checking owners... v Set connection to GitLab. # Set GitHub host with particular repos vector instead of orgs @@ -74,9 +74,9 @@ test_gitstats %>% set_github_host(token = Sys.getenv("GITHUB_PAT"), orgs = "pharmaverse") %>% set_github_host(token = Sys.getenv("GITHUB_PAT"), orgs = "openpharma") Message - i Checking organizations... + i Checking owners... v Set connection to GitHub. - i Checking organizations... + i Checking owners... v Set connection to GitHub. Condition Error: @@ -88,14 +88,13 @@ test_gitstats <- create_gitstats() %>% set_github_host(token = Sys.getenv( "GITHUB_PAT"), orgs = c("openparma")) Message - i Checking organizations... + i Checking owners... Condition Error in `purrr::map()`: i In index: 1. Caused by error: - x Organization you provided does not exist or its name was passed in a wrong way: https://api.github.com/orgs/openparma - ! Please type your organization name as you see it in web URL. - i E.g. do not use spaces. Organization names as you see on the page may differ from their web 'address'. + x Org/user you provided does not exist or its name was passed in a wrong way: openparma + ! Please type your org/user name the way you see it in web URL. --- @@ -103,14 +102,13 @@ test_gitstats <- create_gitstats() %>% set_gitlab_host(token = Sys.getenv( "GITLAB_PAT_PUBLIC"), orgs = c("openparma", "mbtests")) Message - i Checking organizations... + i Checking owners... Condition Error in `purrr::map()`: i In index: 1. Caused by error: - x Organization you provided does not exist or its name was passed in a wrong way: https://gitlab.com/api/v4/groups/openparma - ! Please type your organization name as you see it in web URL. - i E.g. do not use spaces. Organization names as you see on the page may differ from their web 'address'. + x Org/user you provided does not exist or its name was passed in a wrong way: openparma + ! Please type your org/user name the way you see it in web URL. --- @@ -118,14 +116,13 @@ test_gitstats <- create_gitstats() %>% set_github_host(token = Sys.getenv( "GITHUB_PAT"), orgs = c("openpharma", "r_world_devs")) Message - i Checking organizations... + i Checking owners... Condition Error in `purrr::map()`: i In index: 2. Caused by error: - x Organization you provided does not exist or its name was passed in a wrong way: https://api.github.com/orgs/r_world_devs - ! Please type your organization name as you see it in web URL. - i E.g. do not use spaces. Organization names as you see on the page may differ from their web 'address'. + x Org/user you provided does not exist or its name was passed in a wrong way: r_world_devs + ! Please type your org/user name the way you see it in web URL. # When wrong orgs and repos are passed they are excluded but host is created @@ -135,8 +132,8 @@ verbose = TRUE, .show_error = FALSE) Message i Using PAT from GITHUB_PAT envar. - i Checking organizations... - ! Organization you provided does not exist: https://api.github.com/orgs/r_world_devs + i Checking owners... + ! Org/user you provided does not exist: r_world_devs i Checking repositories... ! Repository you provided does not exist: https://api.github.com/repos/r-world-devs/GitMetrics v Set connection to GitHub. diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 69811c67..af2b05a3 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -34,6 +34,6 @@ github_testhost <- create_github_testhost(orgs = "test_org") github_testhost_priv <- create_github_testhost(orgs = "test_org", mode = "private") -gitlab_testhost <- create_gitlab_testhost(orgs = "mbtests") +gitlab_testhost <- create_gitlab_testhost(orgs = "test_group") -gitlab_testhost_priv <- create_gitlab_testhost(orgs = "mbtests", mode = "private") +gitlab_testhost_priv <- create_gitlab_testhost(orgs = "test_group", mode = "private") diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index ee7798f6..d302c97a 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -568,7 +568,7 @@ test_that("get_repos_from_repos works", { attr(test_org, "type") <- "organization" mockery::stub( github_testhost_priv$get_repos_from_repos, - "private$set_owner_type", + "graphql_engine$set_owner_type", test_org ) mockery::stub( diff --git a/tests/testthat/test-01-get_repos-GitLab.R b/tests/testthat/test-01-get_repos-GitLab.R index 695eb38d..bf0ec6fb 100644 --- a/tests/testthat/test-01-get_repos-GitLab.R +++ b/tests/testthat/test-01-get_repos-GitLab.R @@ -234,7 +234,8 @@ test_that("`get_repos_by_code()` works", { test_that("`prepare_repos_table()` prepares repos table", { gl_repos_table <- test_graphql_gitlab$prepare_repos_table( - repos_list = test_mocker$use("gl_repos_from_org") + repos_list = test_mocker$use("gl_repos_from_org"), + org = "test_group" ) expect_repos_table( gl_repos_table diff --git a/tests/testthat/test-02-get_commits-GitHub.R b/tests/testthat/test-02-get_commits-GitHub.R index 38f0c5e1..7904aa0e 100644 --- a/tests/testthat/test-02-get_commits-GitHub.R +++ b/tests/testthat/test-02-get_commits-GitHub.R @@ -151,7 +151,7 @@ test_that("get_commits_from_repos for GitHub works", { attr(test_org, "type") <- "organization" mockery::stub( github_testhost_priv$get_commits_from_repos, - "private$set_owner_type", + "graphql_engine$set_owner_type", test_org ) gh_commits_from_repos <- github_testhost_priv$get_commits_from_repos( diff --git a/tests/testthat/test-02-get_commits-GitLab.R b/tests/testthat/test-02-get_commits-GitLab.R index 8ee44a3f..2c916745 100644 --- a/tests/testthat/test-02-get_commits-GitLab.R +++ b/tests/testthat/test-02-get_commits-GitLab.R @@ -166,7 +166,7 @@ test_that("get_commits_from_repos works", { attr(test_org, "type") <- "organization" mockery::stub( gitlab_testhost_priv$get_repos_from_repos, - "private$set_owner_type", + "graphql_engine$set_owner_type", test_org ) mockery::stub( diff --git a/tests/testthat/test-GitHost-helpers.R b/tests/testthat/test-GitHost-helpers.R index fbadb260..e523814c 100644 --- a/tests/testthat/test-GitHost-helpers.R +++ b/tests/testthat/test-GitHost-helpers.R @@ -1,21 +1,21 @@ test_that("set_owner_types sets attributes to owners list", { mockery::stub( - github_testhost_priv$set_owner_type, - "graphql_engine$gql_response", + test_graphql_github$set_owner_type, + "self$gql_response", test_fixtures$github_user_login ) - owner <- github_testhost_priv$set_owner_type( + owner <- test_graphql_github$set_owner_type( owners = c("test_user") ) expect_equal(attr(owner[[1]], "type"), "user") expect_equal(owner[[1]], "test_user", ignore_attr = TRUE) mockery::stub( - github_testhost_priv$set_owner_type, - "graphql_engine$gql_response", + test_graphql_github$set_owner_type, + "self$gql_response", test_fixtures$github_org_login ) - owner <- github_testhost_priv$set_owner_type( + owner <- test_graphql_github$set_owner_type( owners = c("test_org") ) expect_equal(attr(owner[[1]], "type"), "organization") diff --git a/tests/testthat/test-get_files_content-GitHub.R b/tests/testthat/test-get_files_content-GitHub.R index 54c896ce..98132f28 100644 --- a/tests/testthat/test-get_files_content-GitHub.R +++ b/tests/testthat/test-get_files_content-GitHub.R @@ -120,7 +120,7 @@ test_that("get_files_content_from_repos for GitHub works", { attr(test_org, "type") <- "organization" mockery::stub( github_testhost_priv$get_files_content_from_repos, - "private$set_owner_type", + "graphql_engine$set_owner_type", test_org ) mockery::stub( diff --git a/tests/testthat/test-get_files_structure-GitLab.R b/tests/testthat/test-get_files_structure-GitLab.R index 64f08825..0671bc71 100644 --- a/tests/testthat/test-get_files_structure-GitLab.R +++ b/tests/testthat/test-get_files_structure-GitLab.R @@ -182,7 +182,7 @@ test_that("get_files_structure_from_orgs pulls files structure for repositories ) expect_equal( names(gl_files_structure_from_orgs), - c("mbtests") + c("test_group") ) purrr::walk(gl_files_structure_from_orgs[[1]], function(repo_files) { expect_true(all(grepl("\\.md", repo_files))) @@ -198,7 +198,7 @@ test_that("get_path_from_files_structure gets file path from files structure", { test_graphql_gitlab <- environment(test_graphql_gitlab$initialize)$private file_path <- test_graphql_gitlab$get_path_from_files_structure( host_files_structure = test_mocker$use("gl_files_structure_from_orgs"), - org = "mbtests" # this will need fixing and repo parameter must come back + org = "test_group" # this will need fixing and repo parameter must come back ) expect_equal(typeof(file_path), "character") expect_true(length(file_path) > 0) @@ -218,7 +218,7 @@ test_that("get_files_structure pulls files structure for repositories in orgs", ) expect_equal( names(gl_files_structure_from_orgs), - c("mbtests") + c("test_group") ) purrr::walk(gl_files_structure_from_orgs[[1]], function(repo_files) { expect_true(all(grepl("\\.md", repo_files))) diff --git a/tests/testthat/test-get_release-GitHub.R b/tests/testthat/test-get_release-GitHub.R index 19b8f49a..cc61b67f 100644 --- a/tests/testthat/test-get_release-GitHub.R +++ b/tests/testthat/test-get_release-GitHub.R @@ -65,7 +65,7 @@ test_that("`get_release_logs_from_repos()` works", { attr(test_org, "type") <- "organization" mockery::stub( github_testhost_priv$get_release_logs_from_repos, - "private$set_owner_type", + "graphql_engine$set_owner_type", test_org ) github_testhost_priv$searching_scope <- "repo" diff --git a/tests/testthat/test-get_urls_repos-GitHub.R b/tests/testthat/test-get_urls_repos-GitHub.R index e474b445..baa41cd8 100644 --- a/tests/testthat/test-get_urls_repos-GitHub.R +++ b/tests/testthat/test-get_urls_repos-GitHub.R @@ -77,7 +77,7 @@ test_that("get_repos_urls_from_repos prepares web repo_urls vector", { attr(test_org, "type") <- "organization" mockery::stub( github_testhost_priv$get_repos_urls_from_repos, - "private$set_owner_type", + "graphql_engine$set_owner_type", test_org ) mockery::stub( diff --git a/tests/testthat/test-get_urls_repos-GitLab.R b/tests/testthat/test-get_urls_repos-GitLab.R index 91fb72dc..c49f574d 100644 --- a/tests/testthat/test-get_urls_repos-GitLab.R +++ b/tests/testthat/test-get_urls_repos-GitLab.R @@ -4,9 +4,11 @@ test_that("get_repos_urls() works for org", { "private$paginate_results", test_fixtures$gitlab_repositories_rest_response ) + test_org <- "test_org" + attr(test_org, "type") <- "organization" gl_api_repos_urls <- test_rest_gitlab$get_repos_urls( type = "api", - org = "test_org", + org = test_org, repos = NULL ) expect_length( @@ -14,9 +16,11 @@ test_that("get_repos_urls() works for org", { 3 ) test_mocker$cache(gl_api_repos_urls) + test_user <- "test_user" + attr(test_user, "type") <- "user" gl_web_repos_urls <- test_rest_gitlab$get_repos_urls( type = "web", - org = "test_org", + org = test_user, repos = NULL ) expect_length( @@ -32,9 +36,11 @@ test_that("get_repos_urls() works for individual repos", { "private$paginate_results", test_fixtures$gitlab_repositories_rest_response ) + test_group <- "test_group" + attr(test_group, "type") <- "organization" gl_api_repos_urls <- test_rest_gitlab$get_repos_urls( type = "api", - org = "test_org", + org = test_group, repos = c("testRepo1", "testRepo2") ) expect_length( @@ -42,9 +48,11 @@ test_that("get_repos_urls() works for individual repos", { 2 ) test_mocker$cache(gl_api_repos_urls) + test_user <- "test_user" + attr(test_user, "type") <- "user" gl_web_repos_urls <- test_rest_gitlab$get_repos_urls( type = "web", - org = "mbtests", + org = test_user, repos = c("testRepo1", "testRepo2") ) expect_length( From 419cc04451dbfe0bfc8d4e820f824c780b7a208f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 7 Jan 2025 12:49:15 +0000 Subject: [PATCH 58/99] Bump version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e0781a5a..53fb5a5c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9006 +Version: 2.1.2.9007 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), From 622f4566228150d2802f19b7756b2480658917c7 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 7 Jan 2025 12:49:39 +0000 Subject: [PATCH 59/99] Show path instead of name in table. --- R/EngineGraphQLGitLab.R | 4 ++-- R/GQLQueryGitLab.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index 349b7b9f..aea6906f 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -302,7 +302,7 @@ EngineGraphQLGitLab <- R6::R6Class( "repo_name" = response_data$data$project$name, "repo_id" = response_data$data$project$id, "organization" = org, - "file_path" = file$name, + "file_path" = file$path, "file_content" = file$rawBlob, "file_size" = as.integer(file$size), "repo_url" = response_data$data$project$webUrl @@ -318,7 +318,7 @@ EngineGraphQLGitLab <- R6::R6Class( "repo_name" = project$name, "repo_id" = project$id, "organization" = org, - "file_path" = file$name, + "file_path" = file$path, "file_content" = file$rawBlob, "file_size" = as.integer(file$size), "repo_url" = project$webUrl diff --git a/R/GQLQueryGitLab.R b/R/GQLQueryGitLab.R index fa3def62..3ae4cf06 100644 --- a/R/GQLQueryGitLab.R +++ b/R/GQLQueryGitLab.R @@ -119,7 +119,7 @@ GQLQueryGitLab <- R6::R6Class("GQLQueryGitLab", repository { blobs(paths: $file_paths) { nodes { - name + path rawBlob size } @@ -143,7 +143,7 @@ GQLQueryGitLab <- R6::R6Class("GQLQueryGitLab", repository { blobs(paths: $file_paths) { nodes { - name + path rawBlob size } From a9de4e908f10c9aae7cf7f3fb7ff60c35e49042d Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 7 Jan 2025 13:43:03 +0000 Subject: [PATCH 60/99] Merge two functions into one get_files(). --- NAMESPACE | 3 +- R/GitHost.R | 25 ++- R/GitHostGitLab.R | 11 +- R/GitStats.R | 165 +++++-------------- R/get_files.R | 61 +++++++ R/gitstats_functions.R | 112 ------------- man/{get_files_structure.Rd => get_files.Rd} | 39 +++-- man/get_files_content.Rd | 72 -------- vignettes/get_files.Rmd | 14 +- 9 files changed, 138 insertions(+), 364 deletions(-) create mode 100644 R/get_files.R rename man/{get_files_structure.Rd => get_files.Rd} (60%) delete mode 100644 man/get_files_content.Rd diff --git a/NAMESPACE b/NAMESPACE index 2d1a9552..33baeba0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,8 +5,7 @@ export(create_gitstats) export(get_R_package_usage) export(get_commits) export(get_commits_stats) -export(get_files_content) -export(get_files_structure) +export(get_files) export(get_release_logs) export(get_repos) export(get_repos_urls) diff --git a/R/GitHost.R b/R/GitHost.R index ae76e91a..82230067 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -164,10 +164,10 @@ GitHost <- R6::R6Class( #' Retrieve content of given text files from all repositories for a host in #' a table format. get_files_content = function(file_path, - host_files_structure = NULL, + files_structure = NULL, verbose = TRUE, progress = TRUE) { - if (is.null(host_files_structure)) { + if (is.null(files_structure)) { if (!private$scan_all) { files_content_from_orgs <- private$get_files_content_from_orgs( file_path = file_path, @@ -191,9 +191,9 @@ GitHost <- R6::R6Class( ) } } - if (!is.null(host_files_structure)) { + if (!is.null(files_structure)) { files_table <- private$get_files_content_from_files_structure( - host_files_structure = host_files_structure, + files_structure = files_structure, verbose = verbose, progress = progress ) @@ -1173,17 +1173,12 @@ GitHost <- R6::R6Class( } }, - get_files_content_from_files_structure = function(host_files_structure, + get_files_content_from_files_structure = function(files_structure, verbose = TRUE, progress = TRUE) { graphql_engine <- private$engines$graphql - if (verbose) { - cli::cli_alert_info( - cli::col_green("I will make use of files structure stored in GitStats.") - ) - } result <- private$get_orgs_and_repos_from_files_structure( - host_files_structure = host_files_structure + files_structure = files_structure ) orgs <- result$orgs repos <- result$repos @@ -1201,7 +1196,7 @@ GitHost <- R6::R6Class( org = org, type = type, repos = repos, - host_files_structure = host_files_structure, + host_files_structure = files_structure, verbose = verbose, progress = progress ) |> @@ -1214,10 +1209,10 @@ GitHost <- R6::R6Class( return(files_table) }, - get_orgs_and_repos_from_files_structure = function(host_files_structure) { + get_orgs_and_repos_from_files_structure = function(files_structure) { result <- list( - "orgs" = names(host_files_structure), - "repos" = purrr::map(host_files_structure, ~names(.)) %>% unlist() %>% unname() + "orgs" = names(files_structure), + "repos" = purrr::map(files_structure, ~names(.)) %>% unlist() %>% unname() ) return(result) }, diff --git a/R/GitHostGitLab.R b/R/GitHostGitLab.R index 0727293e..432ccd3f 100644 --- a/R/GitHostGitLab.R +++ b/R/GitHostGitLab.R @@ -303,17 +303,12 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", } }, - get_files_content_from_files_structure = function(host_files_structure, + get_files_content_from_files_structure = function(files_structure, verbose = TRUE, progress = TRUE) { graphql_engine <- private$engines$graphql - if (verbose) { - cli::cli_alert_info( - cli::col_green("I will make use of files structure stored in GitStats.") - ) - } result <- private$get_orgs_and_repos_from_files_structure( - host_files_structure = host_files_structure + files_structure = files_structure ) orgs <- result$orgs repos <- result$repos @@ -331,7 +326,7 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab", org = org, type = type, repos = repos, - host_files_structure = host_files_structure, + host_files_structure = files_structure, verbose = verbose, progress = progress ) |> diff --git a/R/GitStats.R b/R/GitStats.R index bc8c91bb..0df59163 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -203,14 +203,18 @@ GitStats <- R6::R6Class( return(users) }, - get_files_content = function(file_path = NULL, - use_files_structure = TRUE, - cache = TRUE, - verbose = TRUE, - progress = verbose) { + get_files = function(pattern, + depth, + file_path = NULL, + cache = TRUE, + verbose = TRUE, + progress = verbose) { private$check_for_host() - args_list <- list("file_path" = file_path, - "use_files_structure" = use_files_structure) + args_list <- list( + "pattern" = pattern, + "depth" = depth, + "file_path" = file_path + ) trigger <- private$trigger_pulling( cache = cache, storage = "files", @@ -218,12 +222,13 @@ GitStats <- R6::R6Class( verbose = verbose ) if (trigger) { - files <- private$get_files_content_from_hosts( + files <- private$get_files_from_hosts( + pattern = pattern, + depth = depth, file_path = file_path, - use_files_structure = use_files_structure, verbose = verbose, progress = progress - ) %>% + ) |> private$set_object_class( class = "files_data", attr_list = args_list @@ -238,44 +243,6 @@ GitStats <- R6::R6Class( return(files) }, - get_files_structure = function(pattern, - depth, - cache = TRUE, - verbose = TRUE, - progress = verbose) { - private$check_for_host() - args_list <- list("pattern" = pattern, - "depth" = depth) - trigger <- private$trigger_pulling( - cache = cache, - storage = "files_structure", - args_list = args_list, - verbose = verbose - ) - if (trigger) { - files_structure <- private$get_files_structure_from_hosts( - pattern = pattern, - depth = depth, - verbose = verbose, - progress = progress - ) - if (!is.null(files_structure)) { - files_structure <- private$set_object_class( - object = files_structure, - class = "files_structure", - attr_list = args_list - ) - private$save_to_storage(files_structure) - } - } else { - files_structure <- private$get_from_storage( - table = "files_structure", - verbose = verbose - ) - } - return(files_structure) - }, - get_release_logs = function(since, until = Sys.Date(), cache = TRUE, @@ -713,89 +680,35 @@ GitStats <- R6::R6Class( }, # Pull content of a text file in a table form - get_files_content_from_hosts = function(file_path, - use_files_structure, - verbose, - progress) { + get_files_from_hosts = function(pattern, + depth, + file_path, + verbose, + progress) { purrr::map(private$hosts, function(host) { - if (is.null(file_path) && use_files_structure) { - host_files_structure <- private$get_host_files_structure( - host = host, - verbose = FALSE - ) - } else { - host_files_structure <- NULL - } - if (is.null(file_path) && is.null(host_files_structure)) { - host_name <- host$.__enclos_env__$private$host_name - if (verbose) { - cli::cli_alert_warning( - cli::col_yellow("I will skip pulling data for {host_name}: files structure is empty.") - ) + if (is.null(file_path)) { + files_structure <- host$get_files_structure( + pattern = pattern, + depth = depth, + verbose = verbose, + progress = progress + ) |> + purrr::discard(~ length(.) == 0) + if (length(files_structure) == 0) { + files_structure <- NULL } - NULL } else { - host$get_files_content( - file_path = file_path, - host_files_structure = host_files_structure, - verbose = verbose, - progress = progress - ) + files_structure <- NULL } - }) %>% - purrr::list_rbind() %>% - dplyr::as_tibble() - }, - - get_host_files_structure = function(host, verbose) { - files_structure <- private$get_from_storage( - table = "files_structure", - verbose = verbose - ) - if (is.null(files_structure)) { - cli::cli_abort(c( - "x" = "No files_structure object found in GitStats.", - "i" = "Run `get_files_structure()` function first, then `get_files_content()`." - ), - call = NULL - ) - } - host_name <- host$.__enclos_env__$private$web_url - return(files_structure[[gsub("https://", "", host_name)]]) - }, - - get_files_structure_from_hosts = function(pattern, depth, verbose, progress) { - files_structure_from_hosts <- purrr::map(private$hosts, function(host) { - host$get_files_structure( - pattern = pattern, - depth = depth, - verbose = verbose, + host$get_files_content( + file_path = file_path, + files_structure = files_structure, + verbose = verbose, progress = progress ) - }) - names(files_structure_from_hosts) <- private$get_host_urls() - files_structure_from_hosts <- files_structure_from_hosts %>% - purrr::discard(~ length(.) == 0) - if (length(files_structure_from_hosts) == 0) { - files_structure_from_hosts <- NULL - if (verbose) { - cli::cli_alert_warning( - cli::col_yellow( - "No files structure found for matching pattern {pattern} in {depth} level of dirs." - ) - ) - cli::cli_alert_warning( - cli::col_yellow( - "Files structure will not be saved in GitStats." - ) - ) - } - } - return(files_structure_from_hosts) - }, - - get_host_urls = function() { - purrr::map_vec(private$hosts, ~ gsub("https://", "", .$.__enclos_env__$private$web_url)) + }) |> + purrr::list_rbind() |> + dplyr::as_tibble() }, # Pull release logs tables from hosts and bind them into one @@ -1075,7 +988,6 @@ GitStats <- R6::R6Class( storage_attr <- switch(storage_name, "repos_urls" = "type", "files" = "file_path", - "files_structure" = "pattern", "commits" = "date_range", "release_logs" = "date_range", "users" = "logins", @@ -1084,7 +996,6 @@ GitStats <- R6::R6Class( attr_name <- switch(storage_attr, "type" = "type", "file_path" = "files", - "pattern" = "files matching pattern", "date_range" = "date range", "packages" = "packages", "logins" = "logins") diff --git a/R/get_files.R b/R/get_files.R new file mode 100644 index 00000000..cb65c508 --- /dev/null +++ b/R/get_files.R @@ -0,0 +1,61 @@ +#' @title Get files +#' @name get_files +#' @description Pulls text files and their content. +#' @param gitstats A `GitStats` object. +#' @param pattern A regular expression. If defined, it pulls file structure for +#' a repository matching this pattern. Can be defined if `file_path` stays +#' `NULL`. +#' @param depth An optional integer. Defines level of directories to retrieve +#' files from. E.g. if set to `0`, it will pull files only from root, if `1L`, +#' will take data from `root` directory and directories visible in `root` +#' directory. If left with no argument, will pull files from all directories. +#' @param file_path Optional. A standardized path to file(s) in repositories. +#' May be a character vector if multiple files are to be pulled. Can be +#' defined if `pattern` stays `NULL`. +#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last +#' result from its storage. +#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing +#' output is switched off. +#' @param progress A logical, by default set to `verbose` value. If `FALSE` no +#' `cli` progress bar will be displayed. +#' @examples +#' \dontrun{ +#' rmd_files <- create_gitstats() |> +#' set_github_host( +#' token = Sys.getenv("GITHUB_PAT"), +#' orgs = c("r-world-devs") +#' ) %>% +#' set_gitlab_host( +#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), +#' orgs = "mbtests" +#' ) |> +#' get_files( +#' pattern = "\\.Rmd", +#' depth = 2L +#' ) +#' +#' } +#' @return A data.frame. +#' @export +get_files <- function(gitstats, + pattern = NULL, + depth = Inf, + file_path = NULL, + cache = TRUE, + verbose = is_verbose(gitstats), + progress = verbose) { + if (!is.null(pattern) && !is.null(file_path)) { + cli::cli_abort( + "Please choose either `pattern` or `file_path`.", + call = NULL + ) + } + gitstats$get_files( + pattern = pattern, + depth = depth, + file_path = file_path, + cache = cache, + verbose = verbose, + progress = progress + ) +} diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index 0397c072..0af1c0a5 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -210,118 +210,6 @@ get_users <- function(gitstats, ) } -#' @title Get content of files -#' @name get_files_content -#' @description Pulls content of text files. -#' @param gitstats A GitStats object. -#' @param file_path Optional. A standardized path to file(s) in repositories. -#' May be a character vector if multiple files are to be pulled. If set to -#' `NULL` and `use_files_structure` parameter is set to `TRUE`, `GitStats` -#' will try to pull data from `files_structure` (see below). -#' @param use_files_structure Logical. If `TRUE` and `file_path` is set to -#' `NULL`, will iterate over `files_structure` pulled by -#' `get_files_structure()` function and kept in storage. If there is no -#' `files_structure` in storage, an error will be returned. If `file_path` is -#' defined, it will override `use_files_structure` parameter. -#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last -#' result from its storage. -#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing -#' output is switched off. -#' @param progress A logical, by default set to `verbose` value. If `FALSE` no -#' `cli` progress bar will be displayed. -#' @examples -#' \dontrun{ -#' my_gitstats <- create_gitstats() %>% -#' set_github_host( -#' token = Sys.getenv("GITHUB_PAT"), -#' orgs = c("r-world-devs") -#' ) %>% -#' set_gitlab_host( -#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), -#' orgs = "mbtests" -#' ) -#' get_files_content( -#' gitstats = my_gitstats, -#' file_path = c("LICENSE", "DESCRIPTION") -#' ) -#' -#' # example with files structure -#' files_structure <- get_files_structure( -#' gitstats = my_gitstats, -#' pattern = "\\.Rmd", -#' depth = 2L -#' ) -#' # get_files_content() will make use of pulled earlier files structure -#' files_content <- get_files_content( -#' gitstats = my_gitstats -#' ) -#' } -#' @return A data.frame. -#' @export -get_files_content <- function(gitstats, - file_path = NULL, - use_files_structure = TRUE, - cache = TRUE, - verbose = is_verbose(gitstats), - progress = verbose) { - gitstats$get_files_content( - file_path = file_path, - use_files_structure = use_files_structure, - cache = cache, - verbose = verbose, - progress = progress - ) -} - -#' @title Get structure of files -#' @name get_files_structure -#' @description Pulls file structure for a given repository. -#' @param gitstats A GitStats object. -#' @param pattern An optional regular expression. If defined, it pulls file -#' structure for a repository matching this pattern. -#' @param depth An optional integer. Defines level of directories to retrieve -#' files from. E.g. if set to `0`, it will pull files only from root, if `1`, -#' will take data from `root` directory and directories visible in `root` -#' directory. If left with no argument, will pull files from all directories. -#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last -#' result from its storage. -#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing -#' output is switched off. -#' @param progress A logical, by default set to `verbose` value. If `FALSE` no -#' `cli` progress bar will be displayed. -#' @examples -#' \dontrun{ -#' my_gitstats <- create_gitstats() %>% -#' set_github_host( -#' token = Sys.getenv("GITHUB_PAT"), -#' orgs = c("r-world-devs") -#' ) %>% -#' set_gitlab_host( -#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), -#' orgs = "mbtests" -#' ) -#' get_files_structure( -#' gitstats = my_gitstats, -#' pattern = "\\.md" -#' ) -#' } -#' @return A list of vectors. -#' @export -get_files_structure <- function(gitstats, - pattern = NULL, - depth = Inf, - cache = TRUE, - verbose = is_verbose(gitstats), - progress = verbose) { - gitstats$get_files_structure( - pattern = pattern, - depth = depth, - cache = cache, - verbose = verbose, - progress = progress - ) -} - #' @title Get data on package usage across repositories #' @name get_R_package_usage #' @description Wrapper over searching repositories by code blobs related to diff --git a/man/get_files_structure.Rd b/man/get_files.Rd similarity index 60% rename from man/get_files_structure.Rd rename to man/get_files.Rd index db33b9a4..d6439ab0 100644 --- a/man/get_files_structure.Rd +++ b/man/get_files.Rd @@ -1,29 +1,35 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gitstats_functions.R -\name{get_files_structure} -\alias{get_files_structure} -\title{Get structure of files} +% Please edit documentation in R/get_files.R +\name{get_files} +\alias{get_files} +\title{Get files} \usage{ -get_files_structure( +get_files( gitstats, pattern = NULL, depth = Inf, + file_path = NULL, cache = TRUE, verbose = is_verbose(gitstats), progress = verbose ) } \arguments{ -\item{gitstats}{A GitStats object.} +\item{gitstats}{A \code{GitStats} object.} -\item{pattern}{An optional regular expression. If defined, it pulls file -structure for a repository matching this pattern.} +\item{pattern}{A regular expression. If defined, it pulls file structure for +a repository matching this pattern. Can be defined if \code{file_path} stays +\code{NULL}.} \item{depth}{An optional integer. Defines level of directories to retrieve -files from. E.g. if set to \code{0}, it will pull files only from root, if \code{1}, +files from. E.g. if set to \code{0}, it will pull files only from root, if \code{1L}, will take data from \code{root} directory and directories visible in \code{root} directory. If left with no argument, will pull files from all directories.} +\item{file_path}{Optional. A standardized path to file(s) in repositories. +May be a character vector if multiple files are to be pulled. Can be +defined if \code{pattern} stays \code{NULL}.} + \item{cache}{A logical, if set to \code{TRUE} GitStats will retrieve the last result from its storage.} @@ -34,14 +40,14 @@ output is switched off.} \code{cli} progress bar will be displayed.} } \value{ -A list of vectors. +A data.frame. } \description{ -Pulls file structure for a given repository. +Pulls text files and their content. } \examples{ \dontrun{ - my_gitstats <- create_gitstats() \%>\% + rmd_files <- create_gitstats() |> set_github_host( token = Sys.getenv("GITHUB_PAT"), orgs = c("r-world-devs") @@ -49,10 +55,11 @@ Pulls file structure for a given repository. set_gitlab_host( token = Sys.getenv("GITLAB_PAT_PUBLIC"), orgs = "mbtests" - ) - get_files_structure( - gitstats = my_gitstats, - pattern = "\\\\.md" + ) |> + get_files( + pattern = "\\\\.Rmd", + depth = 2L ) + } } diff --git a/man/get_files_content.Rd b/man/get_files_content.Rd deleted file mode 100644 index f2acc20e..00000000 --- a/man/get_files_content.Rd +++ /dev/null @@ -1,72 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gitstats_functions.R -\name{get_files_content} -\alias{get_files_content} -\title{Get content of files} -\usage{ -get_files_content( - gitstats, - file_path = NULL, - use_files_structure = TRUE, - cache = TRUE, - verbose = is_verbose(gitstats), - progress = verbose -) -} -\arguments{ -\item{gitstats}{A GitStats object.} - -\item{file_path}{Optional. A standardized path to file(s) in repositories. -May be a character vector if multiple files are to be pulled. If set to -\code{NULL} and \code{use_files_structure} parameter is set to \code{TRUE}, \code{GitStats} -will try to pull data from \code{files_structure} (see below).} - -\item{use_files_structure}{Logical. If \code{TRUE} and \code{file_path} is set to -\code{NULL}, will iterate over \code{files_structure} pulled by -\code{get_files_structure()} function and kept in storage. If there is no -\code{files_structure} in storage, an error will be returned. If \code{file_path} is -defined, it will override \code{use_files_structure} parameter.} - -\item{cache}{A logical, if set to \code{TRUE} GitStats will retrieve the last -result from its storage.} - -\item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing -output is switched off.} - -\item{progress}{A logical, by default set to \code{verbose} value. If \code{FALSE} no -\code{cli} progress bar will be displayed.} -} -\value{ -A data.frame. -} -\description{ -Pulls content of text files. -} -\examples{ -\dontrun{ - my_gitstats <- create_gitstats() \%>\% - set_github_host( - token = Sys.getenv("GITHUB_PAT"), - orgs = c("r-world-devs") - ) \%>\% - set_gitlab_host( - token = Sys.getenv("GITLAB_PAT_PUBLIC"), - orgs = "mbtests" - ) - get_files_content( - gitstats = my_gitstats, - file_path = c("LICENSE", "DESCRIPTION") - ) - - # example with files structure - files_structure <- get_files_structure( - gitstats = my_gitstats, - pattern = "\\\\.Rmd", - depth = 2L - ) - # get_files_content() will make use of pulled earlier files structure - files_content <- get_files_content( - gitstats = my_gitstats - ) -} -} diff --git a/vignettes/get_files.Rmd b/vignettes/get_files.Rmd index 60da1ccf..1aed18bc 100644 --- a/vignettes/get_files.Rmd +++ b/vignettes/get_files.Rmd @@ -34,10 +34,10 @@ git_stats <- create_gitstats() %>% ) ``` -With `GitStats` you can get the content of all text files in repo that are of your interest. First you need to get the files structure. You can pull specific types of files, by setting `pattern` with regular expression and `depth` with integer, which defines level of directories to look for the files. +With `GitStats` you can get the content of all text files in repo that are of your interest. You can pull specific types of files, by setting `pattern` with regular expression and `depth` with integer, which defines level of directories to look for the files. ```{r} -files_structure <- get_files_structure( +files_structure <- get_files( gitstats = git_stats, pattern = "\\.md", depth = 1L, @@ -45,13 +45,3 @@ files_structure <- get_files_structure( ) dplyr::glimpse(files_structure) ``` - -Once you pull the files structure, `GitStats` will store it. If you run then `get_files_content()` function, by default it will make use of this structure (unless you define `file_path`, which will override saved files structure). - -```{r} -files_content <- get_files_content( - gitstats = git_stats, - progress = FALSE -) -dplyr::glimpse(files_content) -``` From 1dcb0ced58453ed28780441d3c66792a1c48df56 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 7 Jan 2025 13:52:23 +0000 Subject: [PATCH 61/99] Update NEWS and example workflow. --- NEWS.md | 1 + inst/get_files_workflow.R | 12 +++++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 950aa687..a963fc31 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ - changing name of the `time_interval` parameter to `time_aggregation`, - adding `yearly` aggregation to `time_aggregation` parameter, - changing basic input from `GitStats` to `commits_data` object which allows to build workflow in one pipeline (`create_gitstats() |> set_*_host() |> get_commits() |> get_commits_stats()`). +- Merged two functions `get_files_content()` and `get_files_structure()` into one `get_files()` ([#564](https://github.com/r-world-devs/GitStats/issues/564)). - Add `.show_error` parameter to the `set_*_host()` functins to control if error should pop up when wrong input is passed ([#547](https://github.com/r-world-devs/GitStats/issues/547)). ## Fixes: diff --git a/inst/get_files_workflow.R b/inst/get_files_workflow.R index 6aa09d4d..3f0a3efe 100644 --- a/inst/get_files_workflow.R +++ b/inst/get_files_workflow.R @@ -8,7 +8,7 @@ test_gitstats <- create_gitstats() |> orgs = c("mbtests", "mbtestapps") ) -get_files_content( +get_files( gitstats = test_gitstats, file_path = c("LICENSE", "DESCRIPTION") ) @@ -24,7 +24,7 @@ github_stats <- create_gitstats() |> github_stats -get_files_content( +get_files( gitstats = github_stats, file_path = "DESCRIPTION" ) @@ -34,20 +34,18 @@ datafaker_stats <- create_gitstats() |> repos = "openpharma/DataFakeR" ) -get_files_content( +get_files( gitstats = datafaker_stats, file_path = "DESCRIPTION" ) -md_files_structure <- get_files_structure( +md_files <- get_files( gitstats = test_gitstats, pattern = "\\.md|.R", depth = 2L ) -get_files_content(test_gitstats) - -md_files_structure <- get_files_structure( +get_files( gitstats = test_gitstats, pattern = "DESCRIPTION|\\.md|\\.qmd|\\.Rmd", depth = 2L, From dfbf1e5c5f317ffc4788defdfa8deccb5fc63bca Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 7 Jan 2025 15:45:25 +0000 Subject: [PATCH 62/99] Fix, adjust and add tests. --- R/GitHost.R | 8 +-- ...ub.md => 03-get_files_structure-GitHub.md} | 8 --- ...ab.md => 03-get_files_structure-GitLab.md} | 8 --- tests/testthat/_snaps/04-get_files.md | 4 ++ ...tHub.md => 04-get_files_content-GitHub.md} | 6 ++ ...tLab.md => 04-get_files_content-GitLab.md} | 10 ++- .../_snaps/get_files_structure-GitStats.md | 9 --- tests/testthat/helper-expect-responses.R | 10 +-- tests/testthat/helper-fixtures.R | 8 +-- ...R => test-03-get_files_structure-GitHub.R} | 42 +++++++----- ...R => test-03-get_files_structure-GitLab.R} | 17 ----- tests/testthat/test-04-get_files.R | 66 +++++++++++++++++++ ...b.R => test-04-get_files_content-GitHub.R} | 41 +++++------- ...b.R => test-04-get_files_content-GitLab.R} | 18 ++++- .../test-get_files_content-GitStats.R | 21 ------ .../test-get_files_structure-GitStats.R | 47 ------------- tests/testthat/test-get_storage.R | 8 +-- 17 files changed, 159 insertions(+), 172 deletions(-) rename tests/testthat/_snaps/{get_files_structure-GitHub.md => 03-get_files_structure-GitHub.md} (77%) rename tests/testthat/_snaps/{get_files_structure-GitLab.md => 03-get_files_structure-GitLab.md} (72%) create mode 100644 tests/testthat/_snaps/04-get_files.md rename tests/testthat/_snaps/{get_files_content-GitHub.md => 04-get_files_content-GitHub.md} (70%) rename tests/testthat/_snaps/{get_files_content-GitLab.md => 04-get_files_content-GitLab.md} (75%) delete mode 100644 tests/testthat/_snaps/get_files_structure-GitStats.md rename tests/testthat/{test-get_files_structure-GitHub.R => test-03-get_files_structure-GitHub.R} (92%) rename tests/testthat/{test-get_files_structure-GitLab.R => test-03-get_files_structure-GitLab.R} (93%) create mode 100644 tests/testthat/test-04-get_files.R rename tests/testthat/{test-get_files_content-GitHub.R => test-04-get_files_content-GitHub.R} (89%) rename tests/testthat/{test-get_files_content-GitLab.R => test-04-get_files_content-GitLab.R} (91%) delete mode 100644 tests/testthat/test-get_files_content-GitStats.R delete mode 100644 tests/testthat/test-get_files_structure-GitStats.R diff --git a/R/GitHost.R b/R/GitHost.R index 82230067..58e54e74 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -1322,11 +1322,9 @@ GitHost <- R6::R6Class( ) } files_table <- rest_engine$get_files( - file_paths = file_path, - verbose = verbose, - progress = progress - ) %>% - rest_engine$prepare_files_table() %>% + files = file_path + ) |> + rest_engine$prepare_files_table() |> private$add_repo_api_url() return(files_table) }, diff --git a/tests/testthat/_snaps/get_files_structure-GitHub.md b/tests/testthat/_snaps/03-get_files_structure-GitHub.md similarity index 77% rename from tests/testthat/_snaps/get_files_structure-GitHub.md rename to tests/testthat/_snaps/03-get_files_structure-GitHub.md index e00cecfe..d8896691 100644 --- a/tests/testthat/_snaps/get_files_structure-GitHub.md +++ b/tests/testthat/_snaps/03-get_files_structure-GitHub.md @@ -20,11 +20,3 @@ x This feature is not applicable to scan whole Git Host (time consuming). i Set `orgs` or `repos` arguments in `set_*_host()` if you wish to run this function. -# get_files_content makes use of files_structure - - Code - files_content <- github_testhost_priv$get_files_content_from_files_structure( - host_files_structure = test_mocker$use("gh_files_structure_from_orgs")) - Message - i I will make use of files structure stored in GitStats. - diff --git a/tests/testthat/_snaps/get_files_structure-GitLab.md b/tests/testthat/_snaps/03-get_files_structure-GitLab.md similarity index 72% rename from tests/testthat/_snaps/get_files_structure-GitLab.md rename to tests/testthat/_snaps/03-get_files_structure-GitLab.md index 6f4c26ad..7469230b 100644 --- a/tests/testthat/_snaps/get_files_structure-GitLab.md +++ b/tests/testthat/_snaps/03-get_files_structure-GitLab.md @@ -5,11 +5,3 @@ Output [1] "\n query GetFilesTree ($fullPath: ID!, $file_path: String!) {\n project(fullPath: $fullPath) {\n repository {\n tree(path: $file_path) {\n trees (first: 100) {\n pageInfo{\n endCursor\n hasNextPage\n }\n nodes {\n name\n }\n }\n blobs (first: 100) {\n pageInfo{\n endCursor\n hasNextPage\n }\n nodes {\n name\n }\n }\n }\n }\n }\n }\n " -# get_files_content makes use of files_structure - - Code - files_content <- gitlab_testhost_priv$get_files_content_from_files_structure( - host_files_structure = test_mocker$use("gl_files_structure_from_orgs")) - Message - i I will make use of files structure stored in GitStats. - diff --git a/tests/testthat/_snaps/04-get_files.md b/tests/testthat/_snaps/04-get_files.md new file mode 100644 index 00000000..ab594ce2 --- /dev/null +++ b/tests/testthat/_snaps/04-get_files.md @@ -0,0 +1,4 @@ +# error shows when file_path and pattern are defined at the same time + + Please choose either `pattern` or `file_path`. + diff --git a/tests/testthat/_snaps/get_files_content-GitHub.md b/tests/testthat/_snaps/04-get_files_content-GitHub.md similarity index 70% rename from tests/testthat/_snaps/get_files_content-GitHub.md rename to tests/testthat/_snaps/04-get_files_content-GitHub.md index 6646a97c..33240b5c 100644 --- a/tests/testthat/_snaps/get_files_content-GitHub.md +++ b/tests/testthat/_snaps/04-get_files_content-GitHub.md @@ -5,3 +5,9 @@ Output [1] "query GetFileBlobFromRepo($org: String!, $repo: String!, $expression: String!) {\n repository(owner: $org, name: $repo) {\n repo_id: id\n repo_name: name\n repo_url: url\n file: object(expression: $expression) {\n ... on Blob {\n text\n byteSize\n }\n }\n }\n }" +# get_files_content makes use of files_structure + + Code + files_content <- github_testhost_priv$get_files_content_from_files_structure( + files_structure = test_mocker$use("gh_files_structure_from_orgs")) + diff --git a/tests/testthat/_snaps/get_files_content-GitLab.md b/tests/testthat/_snaps/04-get_files_content-GitLab.md similarity index 75% rename from tests/testthat/_snaps/get_files_content-GitLab.md rename to tests/testthat/_snaps/04-get_files_content-GitLab.md index f7b8d5dd..71dea5ce 100644 --- a/tests/testthat/_snaps/get_files_content-GitLab.md +++ b/tests/testthat/_snaps/04-get_files_content-GitLab.md @@ -3,12 +3,18 @@ Code gl_files_query Output - [1] "query GetFilesByOrg($org: ID!, $file_paths: [String!]!) {\n group(fullPath: $org) {\n projects(first: 100) {\n count\n pageInfo {\n hasNextPage\n endCursor\n }\n edges {\n node {\n name\n id\n webUrl\n repository {\n blobs(paths: $file_paths) {\n nodes {\n name\n rawBlob\n size\n }\n }\n }\n }\n }\n }\n }\n }" + [1] "query GetFilesByOrg($org: ID!, $file_paths: [String!]!) {\n group(fullPath: $org) {\n projects(first: 100) {\n count\n pageInfo {\n hasNextPage\n endCursor\n }\n edges {\n node {\n name\n id\n webUrl\n repository {\n blobs(paths: $file_paths) {\n nodes {\n path\n rawBlob\n size\n }\n }\n }\n }\n }\n }\n }\n }" --- Code gl_file_blobs_from_repo_query Output - [1] "\n query GetFilesByRepo($fullPath: ID!, $file_paths: [String!]!) {\n project(fullPath: $fullPath) {\n name\n id\n webUrl\n repository {\n blobs(paths: $file_paths) {\n nodes {\n name\n rawBlob\n size\n }\n }\n }\n }\n }\n " + [1] "\n query GetFilesByRepo($fullPath: ID!, $file_paths: [String!]!) {\n project(fullPath: $fullPath) {\n name\n id\n webUrl\n repository {\n blobs(paths: $file_paths) {\n nodes {\n path\n rawBlob\n size\n }\n }\n }\n }\n }\n " + +# get_files_content makes use of files_structure + + Code + files_content <- gitlab_testhost_priv$get_files_content_from_files_structure( + files_structure = test_mocker$use("gl_files_structure_from_orgs")) diff --git a/tests/testthat/_snaps/get_files_structure-GitStats.md b/tests/testthat/_snaps/get_files_structure-GitStats.md deleted file mode 100644 index a3cf4497..00000000 --- a/tests/testthat/_snaps/get_files_structure-GitStats.md +++ /dev/null @@ -1,9 +0,0 @@ -# if returned files_structure is empty, do not store it and give proper message - - Code - files_structure <- test_gitstats_priv$get_files_structure_from_hosts(pattern = "\\.png", - depth = 1L, verbose = TRUE) - Message - ! No files structure found for matching pattern \.png in 1 level of dirs. - ! Files structure will not be saved in GitStats. - diff --git a/tests/testthat/helper-expect-responses.R b/tests/testthat/helper-expect-responses.R index acffaad3..8792b8d5 100644 --- a/tests/testthat/helper-expect-responses.R +++ b/tests/testthat/helper-expect-responses.R @@ -144,7 +144,7 @@ expect_gitlab_files_blob_response <- function(object) { purrr::walk(object$data$project$repository$blobs$nodes, function(node) { expect_equal( names(node), - c("name", "rawBlob", "size") + c("path", "rawBlob", "size") ) }) } @@ -194,13 +194,13 @@ expect_gitlab_files_from_org_response <- function(object) { expect_list_contains( project, c( - "name", "id", "repository" + "path", "id", "repository" ) ) expect_list_contains( project$repository$blobs$nodes[[1]], c( - "name", "rawBlob", "size" + "path", "rawBlob", "size" ) ) }) @@ -219,11 +219,11 @@ expect_gitlab_files_from_org_by_repos_response <- function(response, expected_fi purrr::walk(repo$data$project$repository$blobs$nodes, function(file) { expect_equal( names(file), - c("name", "rawBlob", "size") + c("path", "rawBlob", "size") ) }) }) - files_vec <- purrr::map(response, ~ purrr::map_vec(.$data$project$repository$blobs$nodes, ~ .$name)) %>% + files_vec <- purrr::map(response, ~ purrr::map_vec(.$data$project$repository$blobs$nodes, ~ .$path)) %>% unlist() %>% unique() expect_true( diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R index a464927c..e38e7d8b 100644 --- a/tests/testthat/helper-fixtures.R +++ b/tests/testthat/helper-fixtures.R @@ -358,7 +358,7 @@ test_fixtures$gitlab_file_org_response <- list( "blobs" = list( "nodes" = list( list( - "name" = "meta_data.yaml", + "path" = "meta_data.yaml", "rawBlob" = "Some interesting text", "size" = 4 ) @@ -376,7 +376,7 @@ test_fixtures$gitlab_file_org_response <- list( "blobs" = list( "nodes" = list( list( - "name" = "meta_data.yaml", + "path" = "meta_data.yaml", "rawBlob" = "Some interesting text", "size" = 5 ) @@ -401,12 +401,12 @@ test_fixtures$gitlab_file_repo_response <- list( "blobs" = list( "nodes" = list( list( - "name" = "README.md", + "path" = "README.md", "rawBlob" = "# graphql_tests\n\nThis project is for testing GraphQL capabilities.\n", "size" = "67" ), list( - "name" = "project_metadata.yaml", + "path" = "project_metadata.yaml", "rawBlob" = "Name: GraphQL Tests", "size" = "19" ) diff --git a/tests/testthat/test-get_files_structure-GitHub.R b/tests/testthat/test-03-get_files_structure-GitHub.R similarity index 92% rename from tests/testthat/test-get_files_structure-GitHub.R rename to tests/testthat/test-03-get_files_structure-GitHub.R index cc6fc82c..79e63505 100644 --- a/tests/testthat/test-get_files_structure-GitHub.R +++ b/tests/testthat/test-03-get_files_structure-GitHub.R @@ -114,6 +114,29 @@ test_that("only files with certain pattern are retrieved", { test_mocker$cache(md_files_structure) }) +test_that("get_repos_data pulls data on repos and branches", { + mockery::stub( + test_graphql_github_priv$get_repos_data, + "self$get_repos_from_org", + test_mocker$use("gh_repos_from_org") + ) + gh_repos_data <- test_graphql_github_priv$get_repos_data( + org = "r-world-devs", + repos = NULL + ) + expect_equal( + names(gh_repos_data), + c("repositories", "def_branches") + ) + expect_true( + length(gh_repos_data$repositories) > 0 + ) + expect_true( + length(gh_repos_data$def_branches) > 0 + ) + test_mocker$cache(gh_repos_data) +}) + test_that("GitHub GraphQL Engine pulls files structure from repositories", { mockery::stub( test_graphql_github$get_files_structure_from_org, @@ -182,7 +205,7 @@ test_that("get_files_structure_from_orgs", { test_that("get_orgs_and_repos_from_files_structure", { result <- github_testhost_priv$get_orgs_and_repos_from_files_structure( - host_files_structure = test_mocker$use("gh_files_structure_from_orgs") + files_structure = test_mocker$use("gh_files_structure_from_orgs") ) expect_equal( names(result), @@ -258,20 +281,3 @@ test_that("get_files_structure aborts when scope to scan whole host", { ) ) }) - -test_that("get_files_content makes use of files_structure", { - mockery::stub( - github_testhost_priv$get_files_content_from_files_structure, - "private$add_repo_api_url", - test_mocker$use("gh_files_table") - ) - expect_snapshot( - files_content <- github_testhost_priv$get_files_content_from_files_structure( - host_files_structure = test_mocker$use("gh_files_structure_from_orgs") - ) - ) - expect_files_table( - files_content, - with_cols = "api_url" - ) -}) diff --git a/tests/testthat/test-get_files_structure-GitLab.R b/tests/testthat/test-03-get_files_structure-GitLab.R similarity index 93% rename from tests/testthat/test-get_files_structure-GitLab.R rename to tests/testthat/test-03-get_files_structure-GitLab.R index 0671bc71..e84712cf 100644 --- a/tests/testthat/test-get_files_structure-GitLab.R +++ b/tests/testthat/test-03-get_files_structure-GitLab.R @@ -225,20 +225,3 @@ test_that("get_files_structure pulls files structure for repositories in orgs", }) test_mocker$cache(gl_files_structure_from_orgs) }) - -test_that("get_files_content makes use of files_structure", { - mockery::stub( - gitlab_testhost_priv$get_files_content_from_files_structure, - "private$add_repo_api_url", - test_mocker$use("gl_files_table") - ) - expect_snapshot( - files_content <- gitlab_testhost_priv$get_files_content_from_files_structure( - host_files_structure = test_mocker$use("gl_files_structure_from_orgs") - ) - ) - expect_files_table( - files_content, - with_cols = "api_url" - ) -}) diff --git a/tests/testthat/test-04-get_files.R b/tests/testthat/test-04-get_files.R new file mode 100644 index 00000000..231fa64e --- /dev/null +++ b/tests/testthat/test-04-get_files.R @@ -0,0 +1,66 @@ +test_that("get_files_from_hosts works properly", { + mockery::stub( + test_gitstats_priv$get_files_from_hosts, + "host$get_files_content", + purrr::list_rbind( + list( + test_mocker$use("gh_files_table"), + test_mocker$use("gl_files_table") + ) + ) + ) + files_table <- test_gitstats_priv$get_files_from_hosts( + pattern = NULL, + depth = Inf, + file_path = "meta_data.yaml", + verbose = FALSE, + progress = FALSE + ) + expect_files_table( + files_table, + with_cols = "api_url" + ) + test_mocker$cache(files_table) +}) + +test_that("get_files works properly", { + mockery::stub( + test_gitstats$get_files, + "private$get_files_from_hosts", + test_mocker$use("files_table") + ) + files_table <- test_gitstats$get_files( + pattern = NULL, + depth = Inf, + file_path = "meta_data.yaml", + verbose = FALSE + ) + expect_files_table( + files_table, + with_cols = "api_url" + ) + test_mocker$cache(files_table) + mockery::stub( + get_files, + "git_stats$get_files", + test_mocker$use("files_table") + ) + get_files(test_gitstats, + file_path = "meta_data.yaml", + verbose = FALSE, + progress = FALSE) + expect_files_table( + files_table, + with_cols = "api_url" + ) +}) + +test_that("error shows when file_path and pattern are defined at the same time", { + expect_snapshot_error( + get_files(test_gitstats, + pattern = "\\.md", + file_path = "meta_data.yaml", + verbose = FALSE, + progress = FALSE) + ) +}) diff --git a/tests/testthat/test-get_files_content-GitHub.R b/tests/testthat/test-04-get_files_content-GitHub.R similarity index 89% rename from tests/testthat/test-get_files_content-GitHub.R rename to tests/testthat/test-04-get_files_content-GitHub.R index 98132f28..c8a0fe9c 100644 --- a/tests/testthat/test-get_files_content-GitHub.R +++ b/tests/testthat/test-04-get_files_content-GitHub.R @@ -7,29 +7,6 @@ test_that("file queries for GitHub are built properly", { test_mocker$cache(gh_file_blobs_from_repo_query) }) -test_that("get_repos_data pulls data on repos and branches", { - mockery::stub( - test_graphql_github_priv$get_repos_data, - "self$get_repos_from_org", - test_mocker$use("gh_repos_from_org") - ) - gh_repos_data <- test_graphql_github_priv$get_repos_data( - org = "r-world-devs", - repos = NULL - ) - expect_equal( - names(gh_repos_data), - c("repositories", "def_branches") - ) - expect_true( - length(gh_repos_data$repositories) > 0 - ) - expect_true( - length(gh_repos_data$def_branches) > 0 - ) - test_mocker$cache(gh_repos_data) -}) - test_that("GitHub GraphQL Engine pulls file response", { mockery::stub( test_graphql_github_priv$get_file_response, @@ -115,6 +92,23 @@ test_that("get_files_content_from_orgs for GitHub works", { test_mocker$cache(gh_files_from_orgs) }) +test_that("get_files_content makes use of files_structure", { + mockery::stub( + github_testhost_priv$get_files_content_from_files_structure, + "private$add_repo_api_url", + test_mocker$use("gh_files_from_orgs") + ) + expect_snapshot( + files_content <- github_testhost_priv$get_files_content_from_files_structure( + files_structure = test_mocker$use("gh_files_structure_from_orgs") + ) + ) + expect_files_table( + files_content, + with_cols = "api_url" + ) +}) + test_that("get_files_content_from_repos for GitHub works", { test_org <- "test_org" attr(test_org, "type") <- "organization" @@ -141,6 +135,7 @@ test_that("get_files_content_from_repos for GitHub works", { }) test_that("`get_files_content()` pulls files in the table format", { + github_testhost <- create_github_testhost(orgs = "test_org") mockery::stub( github_testhost$get_files_content, "private$get_files_content_from_orgs", diff --git a/tests/testthat/test-get_files_content-GitLab.R b/tests/testthat/test-04-get_files_content-GitLab.R similarity index 91% rename from tests/testthat/test-get_files_content-GitLab.R rename to tests/testthat/test-04-get_files_content-GitLab.R index acacb91e..d46b9227 100644 --- a/tests/testthat/test-get_files_content-GitLab.R +++ b/tests/testthat/test-04-get_files_content-GitLab.R @@ -27,7 +27,6 @@ test_that("get_file_blobs_response() works", { test_mocker$cache(gl_file_blobs_response) }) - test_that("get_repos_data pulls data on repositories", { mockery::stub( test_graphql_gitlab_priv$get_repos_data, @@ -179,3 +178,20 @@ test_that("get_files_content_from_orgs for GitLab works", { ) test_mocker$cache(gl_files_table) }) + +test_that("get_files_content makes use of files_structure", { + mockery::stub( + gitlab_testhost_priv$get_files_content_from_files_structure, + "private$add_repo_api_url", + test_mocker$use("gl_files_table") + ) + expect_snapshot( + files_content <- gitlab_testhost_priv$get_files_content_from_files_structure( + files_structure = test_mocker$use("gl_files_structure_from_orgs") + ) + ) + expect_files_table( + files_content, + with_cols = "api_url" + ) +}) diff --git a/tests/testthat/test-get_files_content-GitStats.R b/tests/testthat/test-get_files_content-GitStats.R deleted file mode 100644 index 8e80af00..00000000 --- a/tests/testthat/test-get_files_content-GitStats.R +++ /dev/null @@ -1,21 +0,0 @@ -test_that("get_files_content works properly", { - mockery::stub( - test_gitstats$get_files_content, - "private$get_files_content_from_hosts", - purrr::list_rbind( - list( - test_mocker$use("gh_files_table"), - test_mocker$use("gl_files_table") - ) - ) - ) - files_table <- test_gitstats$get_files_content( - file_path = "meta_data.yaml", - verbose = FALSE - ) - expect_files_table( - files_table, - with_cols = "api_url" - ) - test_mocker$cache(files_table) -}) diff --git a/tests/testthat/test-get_files_structure-GitStats.R b/tests/testthat/test-get_files_structure-GitStats.R deleted file mode 100644 index 50d6c46c..00000000 --- a/tests/testthat/test-get_files_structure-GitStats.R +++ /dev/null @@ -1,47 +0,0 @@ -test_that("get_files_structure_from_hosts works as expected", { - mockery::stub( - test_gitstats_priv$get_files_structure_from_hosts, - "host$get_files_structure", - test_mocker$use("gh_files_structure_from_orgs") - ) - files_structure_from_hosts <- test_gitstats_priv$get_files_structure_from_hosts( - pattern = "\\.md|\\.qmd\\.Rmd", - depth = 1L, - verbose = FALSE - ) - expect_equal(names(files_structure_from_hosts), - c("github.com", "gitlab.com")) - expect_equal(names(files_structure_from_hosts[[1]]), c("test_org")) - files_structure_from_hosts[[2]] <- test_mocker$use("gl_files_structure_from_orgs") - test_mocker$cache(files_structure_from_hosts) -}) - -test_that("if returned files_structure is empty, do not store it and give proper message", { - mockery::stub( - test_gitstats_priv$get_files_structure_from_hosts, - "host$get_files_structure", - list() - ) - expect_snapshot( - files_structure <- test_gitstats_priv$get_files_structure_from_hosts( - pattern = "\\.png", - depth = 1L, - verbose = TRUE - ) - ) -}) - -test_that("get_files_structure works as expected", { - mockery::stub( - test_gitstats$get_files_structure, - "private$get_files_structure_from_hosts", - test_mocker$use("files_structure_from_hosts") - ) - files_structure <- test_gitstats$get_files_structure( - pattern = "\\.md", - depth = 2L, - verbose = FALSE - ) - expect_s3_class(files_structure, "files_structure") - test_mocker$cache(files_structure) -}) diff --git a/tests/testthat/test-get_storage.R b/tests/testthat/test-get_storage.R index d6aa50b2..d434fb80 100644 --- a/tests/testthat/test-get_storage.R +++ b/tests/testthat/test-get_storage.R @@ -37,14 +37,14 @@ test_that("get_storage retrieves one table", { test_that("get_storage retrieves one table", { gitstats_storage <- get_storage( gitstats = test_gitstats, - storage = "files_structure" + storage = "files" ) - expect_type( + expect_s3_class( gitstats_storage, - "list" + "tbl" ) expect_s3_class( gitstats_storage, - "files_structure" + "files_data" ) }) From e1d3cfbb7076206629461b2fc90811366e247e6b Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 7 Jan 2025 15:47:45 +0000 Subject: [PATCH 63/99] spelling --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a963fc31..203d6819 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,7 +9,7 @@ - adding `yearly` aggregation to `time_aggregation` parameter, - changing basic input from `GitStats` to `commits_data` object which allows to build workflow in one pipeline (`create_gitstats() |> set_*_host() |> get_commits() |> get_commits_stats()`). - Merged two functions `get_files_content()` and `get_files_structure()` into one `get_files()` ([#564](https://github.com/r-world-devs/GitStats/issues/564)). -- Add `.show_error` parameter to the `set_*_host()` functins to control if error should pop up when wrong input is passed ([#547](https://github.com/r-world-devs/GitStats/issues/547)). +- Add `.show_error` parameter to the `set_*_host()` functions to control if error should pop up when wrong input is passed ([#547](https://github.com/r-world-devs/GitStats/issues/547)). ## Fixes: From 1e88c1cb5d1d337d0901f946f2ca60a59fb24391 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 7 Jan 2025 16:09:15 +0000 Subject: [PATCH 64/99] Fix printing GitStats files storage when pattern is used. --- R/GitStats.R | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/R/GitStats.R b/R/GitStats.R index 0df59163..d2930bb8 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -228,12 +228,29 @@ GitStats <- R6::R6Class( file_path = file_path, verbose = verbose, progress = progress - ) |> - private$set_object_class( + ) + if (nrow(files) > 0) { + if (!is.null(pattern)) { + attr_list <- list( + "file_path" = pattern + ) + } + if (!is.null(file_path)) { + attr_list <- list( + "file_path" = file_path + ) + } + files <- private$set_object_class( + object = files, class = "files_data", - attr_list = args_list + attr_list = attr_list ) - private$save_to_storage(files) + private$save_to_storage(files) + } else { + if (verbose) { + cli::cli_alert_warning("No files found.") + } + } } else { files <- private$get_from_storage( table = "files", From ccab351322caf99dc9d17e567059d9e111203340 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 08:02:38 +0000 Subject: [PATCH 65/99] Fix tests. --- tests/testthat/test-04-get_files.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-04-get_files.R b/tests/testthat/test-04-get_files.R index 231fa64e..2a735716 100644 --- a/tests/testthat/test-04-get_files.R +++ b/tests/testthat/test-04-get_files.R @@ -42,7 +42,7 @@ test_that("get_files works properly", { test_mocker$cache(files_table) mockery::stub( get_files, - "git_stats$get_files", + "gitstats$get_files", test_mocker$use("files_table") ) get_files(test_gitstats, From 958a6035b2ce237d4e1bcddb292b5576e09ba458 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 08:25:11 +0000 Subject: [PATCH 66/99] Rename test file, to properly queue tests in all scenarios. Earlier on Macos it worked, on Windows and Ubuntu failed. --- tests/testthat/{test-04-get_files.R => test-05-get_files.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/testthat/{test-04-get_files.R => test-05-get_files.R} (100%) diff --git a/tests/testthat/test-04-get_files.R b/tests/testthat/test-05-get_files.R similarity index 100% rename from tests/testthat/test-04-get_files.R rename to tests/testthat/test-05-get_files.R From fdc94e1456280fe48be3ba4db04e95f30a69c8da Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 08:51:31 +0000 Subject: [PATCH 67/99] Fix setting attributes. Previous approach resulted in not-cacheing as attributest always triggered change even if they did not change. --- R/GitStats.R | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/R/GitStats.R b/R/GitStats.R index d2930bb8..9bd78f4b 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -211,9 +211,8 @@ GitStats <- R6::R6Class( progress = verbose) { private$check_for_host() args_list <- list( - "pattern" = pattern, - "depth" = depth, - "file_path" = file_path + "file_pattern" = paste0(file_path, pattern), + "depth" = depth ) trigger <- private$trigger_pulling( cache = cache, @@ -230,20 +229,10 @@ GitStats <- R6::R6Class( progress = progress ) if (nrow(files) > 0) { - if (!is.null(pattern)) { - attr_list <- list( - "file_path" = pattern - ) - } - if (!is.null(file_path)) { - attr_list <- list( - "file_path" = file_path - ) - } files <- private$set_object_class( object = files, class = "files_data", - attr_list = attr_list + attr_list = args_list ) private$save_to_storage(files) } else { @@ -1004,7 +993,7 @@ GitStats <- R6::R6Class( if (storage_name != "repositories") { storage_attr <- switch(storage_name, "repos_urls" = "type", - "files" = "file_path", + "files" = "file_pattern", "commits" = "date_range", "release_logs" = "date_range", "users" = "logins", @@ -1012,7 +1001,7 @@ GitStats <- R6::R6Class( attr_data <- attr(storage_data, storage_attr) attr_name <- switch(storage_attr, "type" = "type", - "file_path" = "files", + "file_pattern" = "file pattern", "date_range" = "date range", "packages" = "packages", "logins" = "logins") From 646fdbde9231ee653e94ee75183ff9219b069b0b Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 09:03:25 +0000 Subject: [PATCH 68/99] Add test. --- .../{04-get_files.md => 05-get_files.md} | 0 tests/testthat/test-05-get_files.R | 30 +++++++++++++++++++ 2 files changed, 30 insertions(+) rename tests/testthat/_snaps/{04-get_files.md => 05-get_files.md} (100%) diff --git a/tests/testthat/_snaps/04-get_files.md b/tests/testthat/_snaps/05-get_files.md similarity index 100% rename from tests/testthat/_snaps/04-get_files.md rename to tests/testthat/_snaps/05-get_files.md diff --git a/tests/testthat/test-05-get_files.R b/tests/testthat/test-05-get_files.R index 2a735716..b6590d52 100644 --- a/tests/testthat/test-05-get_files.R +++ b/tests/testthat/test-05-get_files.R @@ -23,6 +23,36 @@ test_that("get_files_from_hosts works properly", { test_mocker$cache(files_table) }) +test_that("get_files_from_hosts works properly", { + mockery::stub( + test_gitstats_priv$get_files_from_hosts, + "host$get_files_structure", + test_mocker$use("gh_files_structure_from_orgs") + ) + mockery::stub( + test_gitstats_priv$get_files_from_hosts, + "host$get_files_content", + purrr::list_rbind( + list( + test_mocker$use("gh_files_table"), + test_mocker$use("gl_files_table") + ) + ) + ) + files_table <- test_gitstats_priv$get_files_from_hosts( + pattern = "\\.md", + depth = Inf, + file_path = NULL, + verbose = FALSE, + progress = FALSE + ) + expect_files_table( + files_table, + with_cols = "api_url" + ) + test_mocker$cache(files_table) +}) + test_that("get_files works properly", { mockery::stub( test_gitstats$get_files, From fe35f88847a403bd84c3fbb8f13e7bd678311302 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 09:56:28 +0000 Subject: [PATCH 69/99] Fix. --- DESCRIPTION | 2 +- R/EngineGraphQLGitLab.R | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 53fb5a5c..6f6f47fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9007 +Version: 2.1.2.9008 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index aea6906f..e806439e 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -31,6 +31,9 @@ EngineGraphQLGitLab <- R6::R6Class( if (!all(purrr::map_lgl(response$data, is.null))) { type <- purrr::discard(response$data, is.null) |> names() + if (type == "group") { + type <- "organization" + } attr(owner, "type") <- type } else { attr(owner, "type") <- "not found" From 522e86081ad9d7c6c877d83bc964fca2a83febfd Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 09:56:38 +0000 Subject: [PATCH 70/99] Update example workflow. --- inst/get_files_workflow.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/get_files_workflow.R b/inst/get_files_workflow.R index 3f0a3efe..b5877cb9 100644 --- a/inst/get_files_workflow.R +++ b/inst/get_files_workflow.R @@ -41,7 +41,7 @@ get_files( md_files <- get_files( gitstats = test_gitstats, - pattern = "\\.md|.R", + pattern = "\\.md|\\.Rmd", depth = 2L ) From 5d06e335c524443a5ade5ad854816b6b08e377a1 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 14:10:30 +0000 Subject: [PATCH 71/99] Bump version. --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6f6f47fa..e4e35d81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.2.9008 +Version: 2.2.0 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 203d6819..fd9d7ac4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# GitStats (development version) +# GitStats 2.2.0 ## Features: From aca1b77c2b5c086593d4d06a23389e748922453f Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 15:07:25 +0000 Subject: [PATCH 72/99] Update Readme with an example. --- README.Rmd | 30 ++++++++++++++++++++++++---- README.md | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 8 deletions(-) diff --git a/README.Rmd b/README.Rmd index 776f072b..41dca11f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -7,6 +7,7 @@ output: github_document ```{r, include = FALSE} knitr::opts_chunk$set( message = FALSE, + warning = FALSE, collapse = TRUE, comment = "#>", fig.path = "man/figures/README-" @@ -27,7 +28,6 @@ With GitStats you can pull git data in a uniform way (table format) from GitHub * commits, * users, * release logs, -* repository files structure, * text files content, * R package usage. @@ -45,9 +45,31 @@ Or development version: devtools::install_github("r-world-devs/GitStats") ``` -## GitStats workflow - -On how to use GitStats, refer to the [documentation](https://r-world-devs.github.io/GitStats/index.html). +## Start + +```{r} +library(GitStats) + +commits <- create_gitstats() |> + set_gitlab_host( + repos = "mbtests/gitstatstesting" + ) |> + set_github_host( + orgs = "r-world-devs", + repos = "openpharma/DataFakeR" + ) |> + get_commits( + since = "2022-01-01" + ) + +commits + +commits |> + get_commits_stats( + time_aggregation = "month", + group_var = author + ) +``` ## Acknowledgement diff --git a/README.md b/README.md index 0d3003f3..8a8f4c15 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,6 @@ GitHub and GitLab. For the time-being you can get data on: - commits, - users, - release logs, -- repository files structure, - text files content, - R package usage. @@ -36,10 +35,61 @@ Or development version: devtools::install_github("r-world-devs/GitStats") ``` -## GitStats workflow +## Start -On how to use GitStats, refer to the -[documentation](https://r-world-devs.github.io/GitStats/index.html). +``` r +library(GitStats) + +commits <- create_gitstats() |> + set_gitlab_host( + repos = "mbtests/gitstatstesting" + ) |> + set_github_host( + orgs = "r-world-devs", + repos = "openpharma/DataFakeR" + ) |> + get_commits( + since = "2022-01-01" + ) + +commits +#> # A tibble: 2,169 × 11 +#> id committed_date author author_login author_name additions deletions +#> +#> 1 7f48… 2024-09-10 11:12:59 Macie… maciekbanas Maciej Ban… 0 0 +#> 2 9c66… 2024-09-10 10:35:37 Macie… maciekbanas Maciej Ban… 0 0 +#> 3 fca2… 2024-09-10 10:31:24 Macie… maciekbanas Maciej Ban… 0 0 +#> 4 e8f2… 2023-03-30 14:15:33 Macie… maciekbanas Maciej Ban… 1 0 +#> 5 7e87… 2023-02-10 09:48:55 Macie… maciekbanas Maciej Ban… 1 1 +#> 6 62c4… 2023-02-10 09:17:24 Macie… maciekbanas Maciej Ban… 2 87 +#> 7 55cf… 2023-02-10 09:07:54 Macie… maciekbanas Maciej Ban… 92 0 +#> 8 C_kw… 2023-05-08 09:43:31 Kryst… krystian8207 Krystian I… 18 0 +#> 9 C_kw… 2023-04-28 12:30:40 Kamil… Kamil Kozi… 18 0 +#> 10 C_kw… 2023-03-01 15:05:10 Kryst… krystian8207 Krystian I… 296 153 +#> # ℹ 2,159 more rows +#> # ℹ 4 more variables: repository , organization , repo_url , +#> # api_url + +commits |> + get_commits_stats( + time_aggregation = "month", + group_var = author + ) +#> # A tibble: 224 × 4 +#> stats_date githost author stats +#> +#> 1 2022-01-01 00:00:00 github Admin_mschuemi 1 +#> 2 2022-01-01 00:00:00 github Gowtham Rao 5 +#> 3 2022-01-01 00:00:00 github Krystian Igras 1 +#> 4 2022-01-01 00:00:00 github Martijn Schuemie 1 +#> 5 2022-02-01 00:00:00 github Hadley Wickham 3 +#> 6 2022-02-01 00:00:00 github Martijn Schuemie 2 +#> 7 2022-02-01 00:00:00 github Maximilian Girlich 13 +#> 8 2022-02-01 00:00:00 github Reijo Sund 1 +#> 9 2022-02-01 00:00:00 github eitsupi 1 +#> 10 2022-03-01 00:00:00 github Maximilian Girlich 14 +#> # ℹ 214 more rows +``` ## Acknowledgement From 72cada13120dcf0c38bc2ec6473a21b6ee1ed0a4 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 15:11:26 +0000 Subject: [PATCH 73/99] Rename `.show_error` to `.error`. --- NEWS.md | 2 +- R/GitStats.R | 8 ++++---- R/set_host.R | 10 +++++----- inst/set_hosts.R | 4 ++-- man/set_github_host.Rd | 4 ++-- man/set_gitlab_host.Rd | 4 ++-- tests/testthat/_snaps/set_host.md | 2 +- tests/testthat/test-set_host.R | 2 +- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/NEWS.md b/NEWS.md index fd9d7ac4..3bbcfb6f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,7 +9,7 @@ - adding `yearly` aggregation to `time_aggregation` parameter, - changing basic input from `GitStats` to `commits_data` object which allows to build workflow in one pipeline (`create_gitstats() |> set_*_host() |> get_commits() |> get_commits_stats()`). - Merged two functions `get_files_content()` and `get_files_structure()` into one `get_files()` ([#564](https://github.com/r-world-devs/GitStats/issues/564)). -- Add `.show_error` parameter to the `set_*_host()` functions to control if error should pop up when wrong input is passed ([#547](https://github.com/r-world-devs/GitStats/issues/547)). +- Add `.error` parameter to the `set_*_host()` functions to control if error should pop up when wrong input is passed ([#547](https://github.com/r-world-devs/GitStats/issues/547)). ## Fixes: diff --git a/R/GitStats.R b/R/GitStats.R index 9bd78f4b..f427a155 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -9,7 +9,7 @@ GitStats <- R6::R6Class( orgs = NULL, repos = NULL, verbose = TRUE, - .show_error = TRUE) { + .error = TRUE) { new_host <- NULL new_host <- GitHostGitHub$new( orgs = orgs, @@ -17,7 +17,7 @@ GitStats <- R6::R6Class( token = token, host = host, verbose = verbose, - .error = .show_error + .error = .error ) private$add_new_host(new_host) }, @@ -27,7 +27,7 @@ GitStats <- R6::R6Class( orgs = NULL, repos = NULL, verbose = TRUE, - .show_error = TRUE) { + .error = TRUE) { new_host <- NULL new_host <- GitHostGitLab$new( orgs = orgs, @@ -35,7 +35,7 @@ GitStats <- R6::R6Class( token = token, host = host, verbose = verbose, - .error = .show_error + .error = .error ) private$add_new_host(new_host) }, diff --git a/R/set_host.R b/R/set_host.R index 9bab0d3f..63c07d2b 100644 --- a/R/set_host.R +++ b/R/set_host.R @@ -11,7 +11,7 @@ #' pass it, `orgs` parameter should stay `NULL`. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing #' output is switched off. -#' @param .show_error A logical to control if passing wrong input +#' @param .error A logical to control if passing wrong input #' (`repositories` and `organizations`) should end with an error or not. #' @details If you do not define `orgs` and `repos`, `GitStats` will be set to #' scan whole Git platform (such as enterprise version of GitHub or GitLab), @@ -33,14 +33,14 @@ set_github_host <- function(gitstats, orgs = NULL, repos = NULL, verbose = is_verbose(gitstats), - .show_error = TRUE) { + .error = TRUE) { gitstats$set_github_host( host = host, token = token, orgs = orgs, repos = repos, verbose = verbose, - .show_error = .show_error + .error = .error ) return(invisible(gitstats)) @@ -70,14 +70,14 @@ set_gitlab_host <- function(gitstats, orgs = NULL, repos = NULL, verbose = is_verbose(gitstats), - .show_error = TRUE) { + .error = TRUE) { gitstats$set_gitlab_host( host = host, token = token, orgs = orgs, repos = repos, verbose = verbose, - .show_error = .show_error + .error = .error ) return(invisible(gitstats)) diff --git a/inst/set_hosts.R b/inst/set_hosts.R index 80819bc0..f25078f6 100644 --- a/inst/set_hosts.R +++ b/inst/set_hosts.R @@ -5,13 +5,13 @@ git_stats <- create_gitstats() |> orgs = c("r-world-devs", "openparma"), repos = c("openpharma/DataFakR", "r-world-devs/GitAI"), token = Sys.getenv("GITHUB_PAT"), - .show_error = FALSE + .error = FALSE ) |> set_gitlab_host( orgs = c("mbtests", "makbest"), repos = c("makbest/something", "mbtests/gitstatstesting", "krystianigras/gitlab-test"), token = Sys.getenv("GITLAB_PAT_PUBLIC"), - .show_error = FALSE + .error = FALSE ) git_stats diff --git a/man/set_github_host.Rd b/man/set_github_host.Rd index 6d10db94..79e7d49b 100644 --- a/man/set_github_host.Rd +++ b/man/set_github_host.Rd @@ -11,7 +11,7 @@ set_github_host( orgs = NULL, repos = NULL, verbose = is_verbose(gitstats), - .show_error = TRUE + .error = TRUE ) } \arguments{ @@ -32,7 +32,7 @@ pass it, \code{orgs} parameter should stay \code{NULL}.} \item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing output is switched off.} -\item{.show_error}{A logical to control if passing wrong input +\item{.error}{A logical to control if passing wrong input (\code{repositories} and \code{organizations}) should end with an error or not.} } \value{ diff --git a/man/set_gitlab_host.Rd b/man/set_gitlab_host.Rd index c9df9756..17f5d304 100644 --- a/man/set_gitlab_host.Rd +++ b/man/set_gitlab_host.Rd @@ -11,7 +11,7 @@ set_gitlab_host( orgs = NULL, repos = NULL, verbose = is_verbose(gitstats), - .show_error = TRUE + .error = TRUE ) } \arguments{ @@ -32,7 +32,7 @@ pass it, \code{orgs} parameter should stay \code{NULL}.} \item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing output is switched off.} -\item{.show_error}{A logical to control if passing wrong input +\item{.error}{A logical to control if passing wrong input (\code{repositories} and \code{organizations}) should end with an error or not.} } \value{ diff --git a/tests/testthat/_snaps/set_host.md b/tests/testthat/_snaps/set_host.md index 60ec7dc7..69ddcd36 100644 --- a/tests/testthat/_snaps/set_host.md +++ b/tests/testthat/_snaps/set_host.md @@ -129,7 +129,7 @@ Code test_gitstats <- create_gitstats() %>% set_github_host(orgs = c("openpharma", "r_world_devs"), repos = c("r-world-devs/GitStats", "r-world-devs/GitMetrics"), - verbose = TRUE, .show_error = FALSE) + verbose = TRUE, .error = FALSE) Message i Using PAT from GITHUB_PAT envar. i Checking owners... diff --git a/tests/testthat/test-set_host.R b/tests/testthat/test-set_host.R index 17b91c99..76122725 100644 --- a/tests/testthat/test-set_host.R +++ b/tests/testthat/test-set_host.R @@ -165,7 +165,7 @@ test_that("When wrong orgs and repos are passed they are excluded but host is cr orgs = c("openpharma", "r_world_devs"), repos = c("r-world-devs/GitStats", "r-world-devs/GitMetrics"), verbose = TRUE, - .show_error = FALSE + .error = FALSE ) ) }) From 281842700d5259c53b473bdccd0be470e75db5d8 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 16:20:46 +0000 Subject: [PATCH 74/99] Small update. --- README.Rmd | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.Rmd b/README.Rmd index 41dca11f..3ee3adc0 100644 --- a/README.Rmd +++ b/README.Rmd @@ -39,7 +39,7 @@ From CRAN: install.packages("GitStats") ``` -Or development version: +From GitHub: ```r devtools::install_github("r-world-devs/GitStats") diff --git a/README.md b/README.md index 8a8f4c15..c274ed77 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ From CRAN: install.packages("GitStats") ``` -Or development version: +From GitHub: ``` r devtools::install_github("r-world-devs/GitStats") From 87dfca28b6e545ccb0c66ca0ca86f66dce39ce14 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 16:22:02 +0000 Subject: [PATCH 75/99] Organize Reference section. --- _pkgdown.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 987071a6..e666134c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -9,9 +9,23 @@ reference: - starts_with("set_") - matches("verbose") - matches("show_orgs") -- title: Get data + - matches("get_storage") +- title: Get git data + desc: > + Functions pulling git data in a tibble format. + contents: + - get_repos + - get_repos_urls + - get_commits + - get_release_logs + - get_files + - get_users +- title: Get statistics + desc: > + Functions summarizing git data. - contents: - - starts_with("get_") + - get_commits_stats + - get_R_package_usage articles: - title: Articles navbar: ~ From 5e864466e53461da07699e580abbbeced86ec79b Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Wed, 8 Jan 2025 16:30:37 +0000 Subject: [PATCH 76/99] Add progress bar. --- R/GitHost.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/GitHost.R b/R/GitHost.R index 58e54e74..b89c3637 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -496,7 +496,7 @@ GitHost <- R6::R6Class( } } return(org) - }) |> + }, .progress = verbose) |> purrr::keep(~ length(.) > 0) if (length(orgs) == 0) { return(NULL) From 23b2a172cf155668cf80479fb5bd8b52e39daae4 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 9 Jan 2025 09:15:38 +0000 Subject: [PATCH 77/99] Rename S3 output classes. --- R/GitStats.R | 14 +++++++------- R/get_commits_stats.R | 6 +++--- tests/testthat/_snaps/get_commits_stats.md | 2 +- tests/testthat/test-02-get_commits-GitStats.R | 2 +- tests/testthat/test-get_commits_stats.R | 4 ++-- tests/testthat/test-get_storage.R | 4 ++-- tests/testthat/test-get_usage_R_package.R | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/R/GitStats.R b/R/GitStats.R index f427a155..8c56dbf1 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -72,7 +72,7 @@ GitStats <- R6::R6Class( progress = progress ) %>% private$set_object_class( - class = "repos_table", + class = "gitstats_repos", attr_list = args_list ) private$save_to_storage( @@ -122,7 +122,7 @@ GitStats <- R6::R6Class( if (!is.null(repos_urls)) { repos_urls <- private$set_object_class( object = repos_urls, - class = "repos_urls", + class = "gitstats_repos_urls", attr_list = args_list ) private$save_to_storage( @@ -163,7 +163,7 @@ GitStats <- R6::R6Class( progress = progress ) %>% private$set_object_class( - class = "commits_data", + class = "gitstats_commits", attr_list = args_list ) private$save_to_storage( @@ -190,7 +190,7 @@ GitStats <- R6::R6Class( if (trigger) { users <- private$get_users_from_hosts(logins) %>% private$set_object_class( - class = "users_data", + class = "gitstats_users", attr_list = args_list ) private$save_to_storage(users) @@ -231,7 +231,7 @@ GitStats <- R6::R6Class( if (nrow(files) > 0) { files <- private$set_object_class( object = files, - class = "files_data", + class = "gitstats_files", attr_list = args_list ) private$save_to_storage(files) @@ -270,7 +270,7 @@ GitStats <- R6::R6Class( progress = progress ) %>% private$set_object_class( - class = "release_logs", + class = "gitstats_releases", attr_list = args_list ) private$save_to_storage(release_logs) @@ -313,7 +313,7 @@ GitStats <- R6::R6Class( (split_output && any(purrr::map_lgl(R_package_usage, ~ nrow(.) > 0)))) { R_package_usage <- private$set_object_class( object = R_package_usage, - class = "R_package_usage", + class = "gitstats_package_usage", attr_list = args_list ) private$save_to_storage(R_package_usage) diff --git a/R/get_commits_stats.R b/R/get_commits_stats.R index c73a0d08..9e4b7eef 100644 --- a/R/get_commits_stats.R +++ b/R/get_commits_stats.R @@ -28,9 +28,9 @@ get_commits_stats <- function(commits, time_aggregation = c("year", "month", "week", "day"), group_var) { - if (!inherits(commits, "commits_data")) { + if (!inherits(commits, "gitstats_commits")) { cli::cli_abort(c( - "x" = "`commits` must be a `commits_data` object.", + "x" = "`commits` must be a `gitstats_commits` object.", "i" = "Pull first your commits with `get_commits()` function." )) } @@ -62,7 +62,7 @@ get_commits_stats <- function(commits, set_commits_stats_class <- function(object, time_aggregation) { stopifnot(inherits(object, "grouped_df")) object <- dplyr::ungroup(object) - class(object) <- append(class(object), "commits_stats") + class(object) <- append(class(object), "gitstats_commits_stats") attr(object, "time_aggregation") <- time_aggregation object } diff --git a/tests/testthat/_snaps/get_commits_stats.md b/tests/testthat/_snaps/get_commits_stats.md index a457c06b..08184c3a 100644 --- a/tests/testthat/_snaps/get_commits_stats.md +++ b/tests/testthat/_snaps/get_commits_stats.md @@ -1,5 +1,5 @@ # get_commits_stats returns error when commits is not commits_data object - x `commits` must be a `commits_data` object. + x `commits` must be a `gitstats_commits` object. i Pull first your commits with `get_commits()` function. diff --git a/tests/testthat/test-02-get_commits-GitStats.R b/tests/testthat/test-02-get_commits-GitStats.R index 63474844..1a183e87 100644 --- a/tests/testthat/test-02-get_commits-GitStats.R +++ b/tests/testthat/test-02-get_commits-GitStats.R @@ -56,7 +56,7 @@ test_that("get_commits() works", { ) expect_s3_class( commits_data, - "commits_data" + "gitstats_commits" ) test_mocker$cache(commits_data) }) diff --git a/tests/testthat/test-get_commits_stats.R b/tests/testthat/test-get_commits_stats.R index 46cf7f45..b5ef8cfa 100644 --- a/tests/testthat/test-get_commits_stats.R +++ b/tests/testthat/test-get_commits_stats.R @@ -4,7 +4,7 @@ test_that("get_commits_stats method works", { time_aggregation = "month", group_var = organization ) - expect_s3_class(commits_stats, "commits_stats") + expect_s3_class(commits_stats, "gitstats_commits_stats") expect_equal( colnames(commits_stats), c("stats_date", "githost", "organization", "stats") @@ -18,7 +18,7 @@ test_that("get_commits_stats method works", { ) expect_equal(commits_stats_yearly$stats_date, as.POSIXct(c(rep("2023-01-01", 2), "2024-01-01"), tz = 'UTC')) - expect_s3_class(commits_stats_yearly, "commits_stats") + expect_s3_class(commits_stats_yearly, "gitstats_commits_stats") expect_equal( colnames(commits_stats_yearly), c("stats_date", "githost", "stats") diff --git a/tests/testthat/test-get_storage.R b/tests/testthat/test-get_storage.R index d434fb80..d419ef39 100644 --- a/tests/testthat/test-get_storage.R +++ b/tests/testthat/test-get_storage.R @@ -27,7 +27,7 @@ test_that("get_storage retrieves one table", { ) expect_s3_class( gitstats_storage, - "commits_data" + "gitstats_commits" ) expect_commits_table( gitstats_storage @@ -45,6 +45,6 @@ test_that("get_storage retrieves one table", { ) expect_s3_class( gitstats_storage, - "files_data" + "gitstats_files" ) }) diff --git a/tests/testthat/test-get_usage_R_package.R b/tests/testthat/test-get_usage_R_package.R index 050cd2de..3c341680 100644 --- a/tests/testthat/test-get_usage_R_package.R +++ b/tests/testthat/test-get_usage_R_package.R @@ -117,6 +117,6 @@ test_that("get_R_package_usage works", { expect_package_usage_table(R_package_usage_table) expect_s3_class( R_package_usage_table, - "R_package_usage" + "gitstats_package_usage" ) }) From 9babe7ccc8d6a7cf710a40a81abde26df58acd71 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 9 Jan 2025 11:05:59 +0000 Subject: [PATCH 78/99] Add more examples to README, remove article on get_files. --- README.Rmd | 50 +++++++++++++++- README.md | 126 +++++++++++++++++++++++++++++++++++++--- _pkgdown.yml | 2 - vignettes/get_files.Rmd | 47 --------------- 4 files changed, 166 insertions(+), 59 deletions(-) delete mode 100644 vignettes/get_files.Rmd diff --git a/README.Rmd b/README.Rmd index 3ee3adc0..3da5747d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -45,19 +45,27 @@ From GitHub: devtools::install_github("r-world-devs/GitStats") ``` -## Start +## Examples: + +Setup your `GitStats`: ```{r} library(GitStats) -commits <- create_gitstats() |> +git_stats <- create_gitstats() |> set_gitlab_host( repos = "mbtests/gitstatstesting" ) |> set_github_host( orgs = "r-world-devs", repos = "openpharma/DataFakeR" - ) |> + ) +``` + +Get commits: + +```{r} +commits <- git_stats |> get_commits( since = "2022-01-01" ) @@ -71,6 +79,42 @@ commits |> ) ``` +Get repositories with specific code: + +```{r} +git_stats |> + get_repos( + with_code = "shiny", + add_contributors = FALSE + ) +``` + +Get files: + +```{r} +git_stats |> + get_files( + pattern = "\\.md", + depth = 2L + ) +``` + +Get package usage: + +```{r} +git_stats |> + get_R_package_usage( + packages = c("shiny", "purrr"), + split_output = TRUE + ) +``` + +Print `GitStats` to see what it stores: + +```{r} +git_stats +``` + ## Acknowledgement Special thanks to [James Black](https://github.com/epijim), [Karolina Marcinkowska](https://github.com/marcinkowskak), [Kamil Koziej](https://github.com/Cotau), [Matt Secrest](https://github.com/mattsecrest), [Krystian Igras](https://github.com/krystian8207), [Kamil Wais](https://github.com/kalimu), [Adam Forys](https://github.com/galachad) - for the support in the package development. diff --git a/README.md b/README.md index c274ed77..13ea41e6 100644 --- a/README.md +++ b/README.md @@ -35,25 +35,33 @@ From GitHub: devtools::install_github("r-world-devs/GitStats") ``` -## Start +## Examples: + +Setup your `GitStats`: ``` r library(GitStats) -commits <- create_gitstats() |> +git_stats <- create_gitstats() |> set_gitlab_host( repos = "mbtests/gitstatstesting" ) |> set_github_host( orgs = "r-world-devs", repos = "openpharma/DataFakeR" - ) |> + ) +``` + +Get commits: + +``` r +commits <- git_stats |> get_commits( since = "2022-01-01" ) commits -#> # A tibble: 2,169 × 11 +#> # A tibble: 2,178 × 11 #> id committed_date author author_login author_name additions deletions #> #> 1 7f48… 2024-09-10 11:12:59 Macie… maciekbanas Maciej Ban… 0 0 @@ -66,7 +74,7 @@ commits #> 8 C_kw… 2023-05-08 09:43:31 Kryst… krystian8207 Krystian I… 18 0 #> 9 C_kw… 2023-04-28 12:30:40 Kamil… Kamil Kozi… 18 0 #> 10 C_kw… 2023-03-01 15:05:10 Kryst… krystian8207 Krystian I… 296 153 -#> # ℹ 2,159 more rows +#> # ℹ 2,168 more rows #> # ℹ 4 more variables: repository , organization , repo_url , #> # api_url @@ -75,7 +83,7 @@ commits |> time_aggregation = "month", group_var = author ) -#> # A tibble: 224 × 4 +#> # A tibble: 228 × 4 #> stats_date githost author stats #> #> 1 2022-01-01 00:00:00 github Admin_mschuemi 1 @@ -88,7 +96,111 @@ commits |> #> 8 2022-02-01 00:00:00 github Reijo Sund 1 #> 9 2022-02-01 00:00:00 github eitsupi 1 #> 10 2022-03-01 00:00:00 github Maximilian Girlich 14 -#> # ℹ 214 more rows +#> # ℹ 218 more rows +``` + +Get repositories: + +``` r +git_stats |> + get_repos( + with_code = "shiny", + add_contributors = FALSE + ) +#> # A tibble: 6 × 16 +#> repo_id repo_name organization fullname platform repo_url api_url +#> +#> 1 627452680 hypothesis r-world-devs r-world-d… github https:/… https:… +#> 2 604718884 shinyTimelines r-world-devs r-world-d… github https:/… https:… +#> 3 495151911 shinyCohortBuilder r-world-devs r-world-d… github https:/… https:… +#> 4 495144469 cohortBuilder r-world-devs r-world-d… github https:/… https:… +#> 5 884789327 GitAI r-world-devs r-world-d… github https:/… https:… +#> 6 586903986 GitStats r-world-devs r-world-d… github https:/… https:… +#> # ℹ 9 more variables: created_at , last_activity_at , +#> # last_activity , default_branch , stars , forks , +#> # languages , issues_open , issues_closed +``` + +Get files: + +``` r +git_stats |> + get_files( + pattern = "\\.md", + depth = 2L + ) +#> # A tibble: 51 × 8 +#> repo_name repo_id organization file_path file_content file_size repo_url +#> +#> 1 GitStats Test… gid://… mbtests README.md "# GitStats… 122 https:/… +#> 2 shinyGizmo R_kgDO… r-world-devs NEWS.md "# shinyGiz… 2186 https:/… +#> 3 shinyGizmo R_kgDO… r-world-devs README.md "\n# shinyG… 2337 https:/… +#> 4 shinyGizmo R_kgDO… r-world-devs cran-com… "## Test en… 1700 https:/… +#> 5 cohortBuilder R_kgDO… r-world-devs NEWS.md "# cohortBu… 917 https:/… +#> 6 cohortBuilder R_kgDO… r-world-devs README.md "\n# cohort… 15828 https:/… +#> 7 shinyCohortBu… R_kgDO… r-world-devs NEWS.md "# shinyCoh… 2018 https:/… +#> 8 shinyCohortBu… R_kgDO… r-world-devs README.md "\n# shinyC… 3355 https:/… +#> 9 cohortBuilder… R_kgDO… r-world-devs README.md "\n# cohort… 3472 https:/… +#> 10 GitStats R_kgDO… r-world-devs LICENSE.… "# MIT Lice… 1075 https:/… +#> # ℹ 41 more rows +#> # ℹ 1 more variable: api_url +``` + +Get package usage: + +``` r +git_stats |> + get_R_package_usage( + packages = c("shiny", "purrr"), + split_output = TRUE + ) +#> $shiny +#> # A tibble: 5 × 11 +#> package package_usage repo_id repo_fullname repo_name default_branch +#> +#> 1 shiny import 495144469 r-world-devs/cohor… cohortBu… dev +#> 2 shiny import, library 495151911 r-world-devs/shiny… shinyCoh… dev +#> 3 shiny import, library 604718884 r-world-devs/shiny… shinyTim… master +#> 4 shiny import, library 884789327 r-world-devs/GitAI GitAI main +#> 5 shiny import, library 627452680 r-world-devs/hypot… hypothes… master +#> # ℹ 5 more variables: created_at , organization , repo_url , +#> # api_url , platform +#> +#> $purrr +#> # A tibble: 6 × 11 +#> package package_usage repo_id repo_fullname repo_name default_branch +#> +#> 1 purrr import 495144469 r-world-devs/cohortB… cohortBu… dev +#> 2 purrr import 495151911 r-world-devs/shinyCo… shinyCoh… dev +#> 3 purrr import 586903986 r-world-devs/GitStats GitStats master +#> 4 purrr import 884789327 r-world-devs/GitAI GitAI main +#> 5 purrr import 627452680 r-world-devs/hypothe… hypothes… master +#> 6 purrr import 402384343 openpharma/DataFakeR DataFakeR master +#> # ℹ 5 more variables: created_at , organization , repo_url , +#> # api_url , platform +#> +#> attr(,"class") +#> [1] "R_package_usage" "list" +#> attr(,"packages") +#> [1] "shiny" "purrr" +#> attr(,"only_loading") +#> [1] FALSE +``` + +Print `GitStats` to see what it stores: + +``` r +git_stats +#> A GitStats object for 2 hosts: +#> Hosts: https://gitlab.com/api/v4, https://api.github.com +#> Scanning scope: +#> Organizations: [1] r-world-devs +#> Repositories: [2] mbtests/gitstatstesting, openpharma/DataFakeR +#> Storage: +#> Repositories: 6 +#> Commits: 2178 [date range: 2022-01-01 - 2025-01-10] +#> Files: 51 [file pattern: \.md] +#> R_package_usage: 2 [packages: shiny, purrr] ``` ## Acknowledgement diff --git a/_pkgdown.yml b/_pkgdown.yml index e666134c..1848e5b5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,5 +33,3 @@ articles: - set_hosts - get_and_store_data - get_repos_with_code - - get_files - diff --git a/vignettes/get_files.Rmd b/vignettes/get_files.Rmd deleted file mode 100644 index 1aed18bc..00000000 --- a/vignettes/get_files.Rmd +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: "Get files content" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Get files content} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.width = 7, - fig.height = 4 -) -``` - -Set connections to hosts. - -> Example workflow makes use of public GitHub and GitLab, but it is plausible, that you will use your internal git platforms, where you need to define `host` parameter. See `vignette("set_hosts")` article on that. - -```{r} -library(GitStats) - -git_stats <- create_gitstats() %>% - set_github_host( - orgs = c("r-world-devs", "openpharma"), - token = Sys.getenv("GITHUB_PAT") - ) %>% - set_gitlab_host( - orgs = c("mbtests"), - token = Sys.getenv("GITLAB_PAT_PUBLIC") - ) -``` - -With `GitStats` you can get the content of all text files in repo that are of your interest. You can pull specific types of files, by setting `pattern` with regular expression and `depth` with integer, which defines level of directories to look for the files. - -```{r} -files_structure <- get_files( - gitstats = git_stats, - pattern = "\\.md", - depth = 1L, - progress = FALSE -) -dplyr::glimpse(files_structure) -``` From c214f4a570e73483e6dc8af500fe950450bd5762 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 9 Jan 2025 11:08:02 +0000 Subject: [PATCH 79/99] Update NEWS. --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 3bbcfb6f..84e7dc85 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # GitStats 2.2.0 +This release brings some substantial improvements with making it possible to scan whole organizations and particular repositories for one host at the same time, boosting function to prepare commits statistics and simplifying workflow for getting files. + ## Features: - From now on it is possible to pass `orgs` and `repos` in `set_*_host()` functions ([#400](https://github.com/r-world-devs/GitStats/issues/400)). From 105a345609553b71d3792b9ed74eb5ff305f9f97 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 9 Jan 2025 14:57:41 +0000 Subject: [PATCH 80/99] Prettify messages when pulling data, add example with setting many repos. --- R/EngineRestGitHub.R | 6 +-- R/EngineRestGitLab.R | 6 +-- R/GitHost.R | 10 ++-- R/GitStats.R | 8 +-- R/message_handler.R | 16 ++++++ inst/set_many_repos.R | 51 +++++++++++++++++++ tests/testthat/_snaps/01-get_repos-GitHub.md | 2 +- .../testthat/_snaps/get_urls_repos-GitHub.md | 2 +- 8 files changed, 77 insertions(+), 24 deletions(-) create mode 100644 inst/set_many_repos.R diff --git a/R/EngineRestGitHub.R b/R/EngineRestGitHub.R index db6d95dd..d9540ef5 100644 --- a/R/EngineRestGitHub.R +++ b/R/EngineRestGitHub.R @@ -195,11 +195,7 @@ EngineRestGitHub <- R6::R6Class( error = function(e) { NA }) - }, .progress = if (progress) { - "[GitHost:GitHub] Pulling contributors..." - } else { - FALSE - }) + }, .progress = progress) } return(repos_table) } diff --git a/R/EngineRestGitLab.R b/R/EngineRestGitLab.R index fcccc21c..46571dc5 100644 --- a/R/EngineRestGitLab.R +++ b/R/EngineRestGitLab.R @@ -246,11 +246,7 @@ EngineRestGitLab <- R6::R6Class( NA }) return(contributors_vec) - }, .progress = if (progress) { - "[GitHost:GitLab] Pulling contributors..." - } else { - FALSE - }) + }, .progress = progress) } return(repos_table) }, diff --git a/R/GitHost.R b/R/GitHost.R index b89c3637..54f1e3cb 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -693,7 +693,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = org, + scope = set_repo_scope(org, private), information = "Pulling repositories" ) } @@ -813,7 +813,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "rest", - scope = org, + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = "Pulling repositories (URLs)" ) } @@ -1150,7 +1150,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = org, + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = glue::glue("Pulling files content: [{paste0(file_path, collapse = ', ')}]") ) } @@ -1280,7 +1280,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = org, + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = user_info ) } @@ -1383,7 +1383,7 @@ GitHost <- R6::R6Class( show_message( host = private$host_name, engine = "graphql", - scope = paste0(org, "/", private$orgs_repos[[org]]), + scope = paste0(org, "/", private$orgs_repos[[org]], collapse = ", "), information = "Pulling release logs" ) } diff --git a/R/GitStats.R b/R/GitStats.R index 8c56dbf1..e3ca54bb 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -915,13 +915,7 @@ GitStats <- R6::R6Class( item_to_print <- purrr::map_vec(item_to_print, function(element) { URLdecode(element) }) - if (length(item_to_print) < 10) { - list_items <- paste0(item_to_print, collapse = ", ") - } else { - item_to_print_cut <- item_to_print[1:10] - list_items <- paste0(item_to_print_cut, collapse = ", ") %>% - paste0("... and ", length(item_to_print) - 10, " more") - } + list_items <- cut_item_to_print(item_to_print) item_to_print <- paste0("[", cli::col_green(length(item_to_print)), "] ", list_items) } cat(paste0( diff --git a/R/message_handler.R b/R/message_handler.R index 7d578126..87094eff 100644 --- a/R/message_handler.R +++ b/R/message_handler.R @@ -13,6 +13,7 @@ show_message <- function(host, } else if (engine == "both") { paste0(msg_rest, "&", msg_graphql) } + information <- cli::col_br_blue(information) message <- if (is.null(scope)) { glue::glue("[Host:{host}][Engine:{engine_msg}] {information}...") } else { @@ -20,3 +21,18 @@ show_message <- function(host, } cli::cli_alert_info(message) } + +set_repo_scope <- function(org, private) { + cut_item_to_print(paste0(org, "/", private$orgs_repos[[org]])) +} + +cut_item_to_print <- function(item_to_print) { + if (length(item_to_print) < 10) { + list_items <- paste0(item_to_print, collapse = ", ") + } else { + item_to_print_cut <- item_to_print[1:10] + list_items <- paste0(item_to_print_cut, collapse = ", ") %>% + paste0("... and ", length(item_to_print) - 10, " more") + } + return(list_items) +} diff --git a/inst/set_many_repos.R b/inst/set_many_repos.R new file mode 100644 index 00000000..5330da93 --- /dev/null +++ b/inst/set_many_repos.R @@ -0,0 +1,51 @@ +pharma_stats <- create_gitstats() |> + set_github_host( + orgs = "pharmaverse" + ) + +pharma_repos <- pharma_stats |> + get_repos( + add_contributors = FALSE + ) + +pharma_stats <- create_gitstats() |> + set_github_host( + repos = pharma_repos$fullname + ) + +pharma_stats + +pharma_stats |> + get_release_logs( + since = "2020-01-01", + cache = FALSE + ) + +pharma_stats |> + get_commits( + since = "2020-01-01", + cache = FALSE + ) + +pharma_stats |> + get_repos( + cache = FALSE + ) + +# very slow, better to run it when whole orgs are set +pharma_stats |> + get_repos( + with_code = "shiny", + cache = FALSE + ) + +pharma_stats |> + get_repos_urls( + cache = FALSE + ) + +pharma_stats |> + get_files( + pattern = "\\.md", + depth = 1L + ) diff --git a/tests/testthat/_snaps/01-get_repos-GitHub.md b/tests/testthat/_snaps/01-get_repos-GitHub.md index 043a672d..fc543d7c 100644 --- a/tests/testthat/_snaps/01-get_repos-GitHub.md +++ b/tests/testthat/_snaps/01-get_repos-GitHub.md @@ -58,7 +58,7 @@ gh_repos_individual <- github_testhost_priv$get_repos_from_repos(verbose = TRUE, progress = FALSE) Message - i [Host:GitHub][Engine:GraphQl][Scope:test_org] Pulling repositories... + i [Host:GitHub][Engine:GraphQl][Scope:test_org/TestRepo] Pulling repositories... # `get_all_repos()` is set to scan whole git host diff --git a/tests/testthat/_snaps/get_urls_repos-GitHub.md b/tests/testthat/_snaps/get_urls_repos-GitHub.md index 34365484..7fd4dff0 100644 --- a/tests/testthat/_snaps/get_urls_repos-GitHub.md +++ b/tests/testthat/_snaps/get_urls_repos-GitHub.md @@ -12,7 +12,7 @@ gh_repos_urls <- github_testhost_priv$get_repos_urls_from_repos(type = "web", verbose = TRUE, progress = FALSE) Message - i [Host:GitHub][Engine:REST][Scope:test_org] Pulling repositories (URLs)... + i [Host:GitHub][Engine:REST][Scope:test_org/TestRepo] Pulling repositories (URLs)... # get_all_repos_urls prepares web repo_urls vector From 0b9f5843a672f187e587985425c7a8e18f5e7411 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 9 Jan 2025 15:32:23 +0000 Subject: [PATCH 81/99] Style. --- R/EngineGraphQLGitHub.R | 78 ++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/R/EngineGraphQLGitHub.R b/R/EngineGraphQLGitHub.R index 5a1f536a..60cbdcef 100644 --- a/R/EngineGraphQLGitHub.R +++ b/R/EngineGraphQLGitHub.R @@ -9,9 +9,11 @@ EngineGraphQLGitHub <- R6::R6Class( initialize = function(gql_api_url, token, scan_all = FALSE) { - super$initialize(gql_api_url = gql_api_url, - token = token, - scan_all = scan_all) + super$initialize( + gql_api_url = gql_api_url, + token = token, + scan_all = scan_all + ) self$gql_query <- GQLQueryGitHub$new() }, @@ -53,7 +55,7 @@ EngineGraphQLGitHub <- R6::R6Class( response$errors[[1]]$message ) } - orgs_list <- purrr::map(response$data$search$edges, ~stringr::str_match(.$node$url, "[^\\/]*$")) + orgs_list <- purrr::map(response$data$search$edges, ~ stringr::str_match(.$node$url, "[^\\/]*$")) full_orgs_list <- append(full_orgs_list, orgs_list) has_next_page <- response$data$search$pageInfo$hasNextPage end_cursor <- response$data$search$pageInfo$endCursor @@ -98,7 +100,7 @@ EngineGraphQLGitHub <- R6::R6Class( prepare_repos_table = function(repos_list, org) { if (length(repos_list) > 0) { repos_table <- purrr::map(repos_list, function(repo) { - repo$default_branch <- if (!is.null(repo$default_branch)) { + repo[["default_branch"]] <- if (!is.null(repo$default_branch)) { repo$default_branch$name } else { "" @@ -107,13 +109,13 @@ EngineGraphQLGitHub <- R6::R6Class( if (length(last_activity_at) == 0) { last_activity_at <- gts_to_posixt(repo$created_at) } - repo$languages <- purrr::map_chr(repo$languages$nodes, ~ .$name) %>% + repo[["languages"]] <- purrr::map_chr(repo$languages$nodes, ~ .$name) |> paste0(collapse = ", ") - repo$created_at <- gts_to_posixt(repo$created_at) - repo$issues_open <- repo$issues_open$totalCount - repo$issues_closed <- repo$issues_closed$totalCount - repo$last_activity_at <- last_activity_at - repo$organization <- repo$organization$login + repo[["created_at"]] <- gts_to_posixt(repo$created_at) + repo[["issues_open"]] <- repo$issues_open$totalCount + repo[["issues_closed"]] <- repo$issues_closed$totalCount + repo[["last_activity_at"]] <- last_activity_at + repo[["organization"]] <- repo$organization$login repo <- data.frame(repo) %>% dplyr::relocate( default_branch, @@ -200,7 +202,7 @@ EngineGraphQLGitHub <- R6::R6Class( repos, file_paths = NULL, host_files_structure = NULL, - verbose = TRUE, + verbose = TRUE, progress = TRUE) { repo_data <- private$get_repos_data( org = org, @@ -265,11 +267,11 @@ EngineGraphQLGitHub <- R6::R6Class( def_branches <- repo_data[["def_branches"]] files_structure <- purrr::map2(repositories, def_branches, function(repo, def_branch) { private$get_files_structure_from_repo( - org = org, - repo = repo, + org = org, + repo = repo, def_branch = def_branch, - pattern = pattern, - depth = depth + pattern = pattern, + depth = depth ) }, .progress = progress) names(files_structure) <- repositories @@ -293,7 +295,8 @@ EngineGraphQLGitHub <- R6::R6Class( user_data[["web_url"]] <- user_data$web_url %||% "" user_table <- tibble::as_tibble(user_data) %>% dplyr::relocate(c(commits, issues, pull_requests, reviews), - .after = starred_repos) + .after = starred_repos + ) } else { user_table <- NULL } @@ -431,27 +434,30 @@ EngineGraphQLGitHub <- R6::R6Class( commits_by_org_query <- self$gql_query$commits_from_repo( commits_cursor = commits_cursor ) - response <- tryCatch({ - self$gql_response( - gql_query = commits_by_org_query, - vars = list( - "org" = org, - "repo" = repo, - "since" = date_to_gts(since), - "until" = date_to_gts(until) + response <- tryCatch( + { + self$gql_response( + gql_query = commits_by_org_query, + vars = list( + "org" = org, + "repo" = repo, + "since" = date_to_gts(since), + "until" = date_to_gts(until) + ) ) - ) - }, error = function(e) { - self$gql_response( - gql_query = commits_by_org_query, - vars = list( - "org" = org, - "repo" = repo, - "since" = date_to_gts(since), - "until" = date_to_gts(until) + }, + error = function(e) { + self$gql_response( + gql_query = commits_by_org_query, + vars = list( + "org" = org, + "repo" = repo, + "since" = date_to_gts(since), + "until" = date_to_gts(until) + ) ) - ) - }) + } + ) return(response) }, From 85d9fdda7862cf67cc3948cf8e025013dc3d60f9 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 9 Jan 2025 15:35:17 +0000 Subject: [PATCH 82/99] Style. --- R/EngineGraphQLGitLab.R | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/R/EngineGraphQLGitLab.R b/R/EngineGraphQLGitLab.R index e806439e..a6ba5a76 100644 --- a/R/EngineGraphQLGitLab.R +++ b/R/EngineGraphQLGitLab.R @@ -24,7 +24,7 @@ EngineGraphQLGitLab <- R6::R6Class( response <- self$gql_response( gql_query = user_or_org_query, vars = list( - "username" = owner, + "username" = owner, "grouppath" = owner ) ) @@ -352,10 +352,10 @@ EngineGraphQLGitLab <- R6::R6Class( repositories <- repo_data[["repositories"]] files_structure <- purrr::map(repositories, function(repo) { private$get_files_structure_from_repo( - org = org, - repo = repo, + org = org, + repo = repo, pattern = pattern, - depth = depth + depth = depth ) }, .progress = progress) names(files_structure) <- repositories @@ -367,18 +367,20 @@ EngineGraphQLGitLab <- R6::R6Class( prepare_user_table = function(user_response) { if (!is.null(user_response$data$user)) { user_data <- user_response$data$user - user_data$name <- user_data$name %||% "" - user_data$starred_repos <- user_data$starred_repos$count - user_data$pull_requests <- user_data$pull_requests$count - user_data$reviews <- user_data$reviews$count - user_data$email <- user_data$email %||% "" - user_data$location <- user_data$location %||% "" - user_data$web_url <- user_data$web_url %||% "" - user_table <- tibble::as_tibble(user_data) %>% + user_data[["name"]] <- user_data$name %||% "" + user_data[["starred_repos"]] <- user_data$starred_repos$count + user_data[["pull_requests"]] <- user_data$pull_requests$count + user_data[["reviews"]] <- user_data$reviews$count + user_data[["email"]] <- user_data$email %||% "" + user_data[["location"]] <- user_data$location %||% "" + user_data[["web_url"]] <- user_data$web_url %||% "" + user_table <- tibble::as_tibble(user_data) |> dplyr::mutate(commits = NA, - issues = NA) %>% - dplyr::relocate(c(commits, issues), - .after = starred_repos) + issues = NA) |> + dplyr::relocate( + c(commits, issues), + .after = starred_repos + ) } else { user_table <- NULL } From 81d7235e2bee54efe0557ce477bbb6318ca3fe1a Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 10 Jan 2025 16:00:56 +0000 Subject: [PATCH 83/99] Fix tests for CRAN. Earlier snapshot tests, which do not run on CRAN, were mixed with "hard" tests which passed objects to next tests. Break of chain on CRAN resulted in NULL objects passed and tests failure. --- R/test_helpers.R | 8 +++- tests/testthat/_snaps/get_release-GitLab.md | 8 ++++ tests/testthat/test-01-get_repos-GitHub.R | 18 +++++++- tests/testthat/test-get_release-GitLab.R | 46 ++++++++++++++++++++- 4 files changed, 76 insertions(+), 4 deletions(-) diff --git a/R/test_helpers.R b/R/test_helpers.R index 18afac35..5e827f18 100644 --- a/R/test_helpers.R +++ b/R/test_helpers.R @@ -33,7 +33,7 @@ GitHostGitHubTest <- R6::R6Class( private$set_api_url(host) private$set_web_url(host) private$set_endpoints() - private$check_if_public(host) + private$is_public <- FALSE private$token <- token private$set_graphql_url() private$set_orgs_and_repos_mocked(orgs, repos) @@ -43,7 +43,11 @@ GitHostGitHubTest <- R6::R6Class( ), private = list( set_orgs_and_repos_mocked = function(orgs, repos) { - private$orgs <- orgs + if (is.null(orgs) && is.null(repos)) { + private$scan_all <- TRUE + } else { + private$orgs <- orgs + } if (!is.null(repos)) { private$repos <- repos orgs_repos <- private$extract_repos_and_orgs(repos) diff --git a/tests/testthat/_snaps/get_release-GitLab.md b/tests/testthat/_snaps/get_release-GitLab.md index b456d7ca..804295ae 100644 --- a/tests/testthat/_snaps/get_release-GitLab.md +++ b/tests/testthat/_snaps/get_release-GitLab.md @@ -5,6 +5,14 @@ Output [1] "query GetReleasesFromRepo($project_path: ID!) {\n project(fullPath: $project_path) {\n name\n webUrl\n \t\t\t\t\t\treleases {\n nodes{\n name\n tagName\n releasedAt\n links {\n selfUrl\n }\n description\n }\n }\n }\n }" +# `get_release_logs_from_orgs()` prints proper message + + Code + releases_from_orgs <- gitlab_testhost_priv$get_release_logs_from_orgs(since = "2023-05-01", + until = "2023-09-30", verbose = TRUE, progress = FALSE) + Message + i [Host:GitLab][Engine:GraphQl][Scope:test_group] Pulling release logs... + # `get_release_logs_from_repos()` works Code diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index d302c97a..52d5a93f 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -478,6 +478,20 @@ test_that("`get_repos_with_code_from_host()` pulls raw response", { verbose = TRUE ) ) +}) + +test_that("`get_repos_with_code_from_host()` pulls raw response", { + mockery::stub( + github_testhost_priv$get_repos_with_code_from_host, + "rest_engine$get_repos_by_code", + test_mocker$use("gh_repos_by_code_raw") + ) + repos_with_code_from_host_raw <- github_testhost_priv$get_repos_with_code_from_host( + code = "shiny", + in_files = c("DESCRIPTION", "NAMESPACE"), + output = "raw", + verbose = FALSE + ) expect_type(repos_with_code_from_host_raw, "list") expect_gt(length(repos_with_code_from_host_raw), 0) test_mocker$cache(repos_with_code_from_host_raw) @@ -520,12 +534,14 @@ test_that("get_repos_with_code() works", { test_that("get_repos_with_code() scans whole host", { + github_testhost_priv <- create_github_testhost( + mode = "private" + ) mockery::stub( github_testhost_priv$get_repos_with_code, "private$get_repos_with_code_from_host", test_mocker$use("repos_with_code_from_host_raw") ) - github_testhost_priv$scan_all <- TRUE github_repos_with_code_raw <- github_testhost_priv$get_repos_with_code( code = "test-code", output = "raw", diff --git a/tests/testthat/test-get_release-GitLab.R b/tests/testthat/test-get_release-GitLab.R index 05d8eb4f..bbd36659 100644 --- a/tests/testthat/test-get_release-GitLab.R +++ b/tests/testthat/test-get_release-GitLab.R @@ -72,13 +72,38 @@ test_that("`get_release_logs_from_orgs()` works", { test_mocker$cache(releases_from_orgs) }) +test_that("`get_release_logs_from_orgs()` prints proper message", { + mockery::stub( + gitlab_testhost_priv$get_release_logs_from_orgs, + "graphql_engine$prepare_releases_table", + test_mocker$use("releases_table") + ) + mockery::stub( + gitlab_testhost_priv$get_release_logs_from_orgs, + "private$get_repos_names", + test_mocker$use("repos_names") + ) + gitlab_testhost_priv$searching_scope <- "org" + expect_snapshot( + releases_from_orgs <- gitlab_testhost_priv$get_release_logs_from_orgs( + since = "2023-05-01", + until = "2023-09-30", + verbose = TRUE, + progress = FALSE + ) + ) +}) + test_that("`get_release_logs_from_repos()` works", { + gitlab_testhost_priv <- create_gitlab_testhost( + repos = "test_org/TestRepo", + mode = "private" + ) mockery::stub( gitlab_testhost_priv$get_release_logs_from_repos, "graphql_engine$prepare_releases_table", test_mocker$use("releases_table") ) - gitlab_testhost_priv$searching_scope <- "repo" gitlab_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") expect_snapshot( releases_from_repos <- gitlab_testhost_priv$get_release_logs_from_repos( @@ -88,6 +113,25 @@ test_that("`get_release_logs_from_repos()` works", { progress = FALSE ) ) +}) + +test_that("`get_release_logs_from_repos()` works", { + gitlab_testhost_priv <- create_gitlab_testhost( + repos = "test_org/TestRepo", + mode = "private" + ) + mockery::stub( + gitlab_testhost_priv$get_release_logs_from_repos, + "graphql_engine$prepare_releases_table", + test_mocker$use("releases_table") + ) + gitlab_testhost_priv$orgs_repos <- list("test_org" = "TestRepo") + releases_from_repos <- gitlab_testhost_priv$get_release_logs_from_repos( + since = "2023-05-01", + until = "2023-09-30", + verbose = FALSE, + progress = FALSE + ) expect_releases_table(releases_from_repos) test_mocker$cache(releases_from_repos) }) From ee5fd2641026a3d3beaab72443c5f90f18e338c1 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 10 Jan 2025 16:01:55 +0000 Subject: [PATCH 84/99] Bump version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e4e35d81..286ab854 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.2.0 +Version: 2.2.0.9000 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), From e38999437a38efc5a7fb19c6701aa2d603aee002 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 13 Jan 2025 09:15:08 +0000 Subject: [PATCH 85/99] Bump version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 286ab854..e4e35d81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.2.0.9000 +Version: 2.2.0 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), From 0ee9350544f26e1dd60892fae38e166de60d8492 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 08:33:22 +0000 Subject: [PATCH 86/99] Reorganize files. --- R/{get_commits_stats.R => get_commits.R} | 46 +++++++ R/get_repos.R | 121 ++++++++++++++++ R/gitstats_functions.R | 168 ----------------------- man/get_commits.Rd | 2 +- man/get_commits_stats.Rd | 2 +- man/get_repos.Rd | 2 +- man/get_repos_urls.Rd | 2 +- 7 files changed, 171 insertions(+), 172 deletions(-) rename R/{get_commits_stats.R => get_commits.R} (60%) create mode 100644 R/get_repos.R diff --git a/R/get_commits_stats.R b/R/get_commits.R similarity index 60% rename from R/get_commits_stats.R rename to R/get_commits.R index 9e4b7eef..f7031ebb 100644 --- a/R/get_commits_stats.R +++ b/R/get_commits.R @@ -1,3 +1,49 @@ +#' @title Get data on commits +#' @name get_commits +#' @description List all commits from all repositories for an organization or a +#' vector of repositories. +#' @param gitstats A GitStats object. +#' @param since A starting date. +#' @param until An end date. +#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last +#' result from its storage. +#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing +#' output is switched off. +#' @param progress A logical, by default set to `verbose` value. If `FALSE` no +#' `cli` progress bar will be displayed. +#' @return A data.frame. +#' @examples +#' \dontrun{ +#' my_gitstats <- create_gitstats() %>% +#' set_github_host( +#' token = Sys.getenv("GITHUB_PAT"), +#' repos = c("openpharma/DataFakeR", "openpharma/visR") +#' ) %>% +#' set_gitlab_host( +#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), +#' orgs = "mbtests" +#' ) +#' get_commits(my_gitstats, since = "2018-01-01") +#' } +#' @export +get_commits <- function(gitstats, + since = NULL, + until = Sys.Date() + lubridate::days(1), + cache = TRUE, + verbose = is_verbose(gitstats), + progress = verbose) { + if (is.null(since)) { + cli::cli_abort(cli::col_red("You need to pass date to `since` parameter."), call = NULL) + } + gitstats$get_commits( + since = since, + until = until, + cache = cache, + verbose = verbose, + progress = progress + ) +} + #' @title Get commits statistics #' @name get_commits_stats #' @description Prepare statistics from the pulled commits data. diff --git a/R/get_repos.R b/R/get_repos.R new file mode 100644 index 00000000..d6b4d19f --- /dev/null +++ b/R/get_repos.R @@ -0,0 +1,121 @@ +#' @title Get data on repositories +#' @name get_repos +#' @description Pulls data on all repositories for an organization, individual +#' user or those with a given text in code blobs (`with_code` parameter) or a +#' file (`with_files` parameter) and parse it into table format. +#' @param gitstats A GitStats object. +#' @param add_contributors A logical parameter to decide whether to add +#' information about repositories' contributors to the repositories output +#' (table). If set to `FALSE` it makes function run faster as, in the case of +#' `org` search mode, it reaches only `GraphQL` endpoint with a query on +#' repositories, and in the case of `code` search mode it reaches only +#' `repositories REST API` endpoint. However, the pitfall is that the result +#' does not convey information on contributors. \cr\cr When set to `TRUE` (by +#' default), `GitStats` iterates additionally over pulled repositories and +#' reaches to the `contributors APIs`, which makes it slower, but gives +#' additional information. +#' @param with_code A character vector, if defined, GitStats will pull +#' repositories with specified code phrases in code blobs. +#' @param in_files A character vector of file names. Works when `with_code` is +#' set - then it searches code blobs only in files passed to `in_files` +#' parameter. +#' @param with_files A character vector, if defined, GitStats will pull +#' repositories with specified files. +#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last +#' result from its storage. +#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing +#' output is switched off. +#' @param progress A logical, by default set to `verbose` value. If `FALSE` no +#' `cli` progress bar will be displayed. +#' @return A data.frame. +#' @examples +#' \dontrun{ +#' my_gitstats <- create_gitstats() %>% +#' set_github_host( +#' token = Sys.getenv("GITHUB_PAT"), +#' orgs = c("r-world-devs", "openpharma") +#' ) %>% +#' set_gitlab_host( +#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), +#' orgs = "mbtests" +#' ) +#' get_repos(my_gitstats) +#' get_repos(my_gitstats, add_contributors = FALSE) +#' get_repos(my_gitstats, with_code = "Shiny", in_files = "renv.lock") +#' get_repos(my_gitstats, with_files = "DESCRIPTION") +#' } +#' @export +get_repos <- function(gitstats, + add_contributors = TRUE, + with_code = NULL, + in_files = NULL, + with_files = NULL, + cache = TRUE, + verbose = is_verbose(gitstats), + progress = verbose) { + gitstats$get_repos( + add_contributors = add_contributors, + with_code = with_code, + in_files = in_files, + with_files = with_files, + cache = cache, + verbose = verbose, + progress = progress + ) +} + +#' @title Get repository URLS +#' @name get_repos_urls +#' @description Pulls a vector of repositories URLs (web or API): either all for +#' an organization or those with a given text in code blobs (`with_code` +#' parameter) or a file (`with_files` parameter). +#' @param gitstats A GitStats object. +#' @param type A character, choose if `api` or `web` (`html`) URLs should be +#' returned. +#' @param with_code A character vector, if defined, GitStats will pull +#' repositories with specified code phrases in code blobs. +#' @param in_files A character vector of file names. Works when `with_code` is +#' set - then it searches code blobs only in files passed to `in_files` +#' parameter. +#' @param with_files A character vector, if defined, GitStats will pull +#' repositories with specified files. +#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last +#' result from its storage. +#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing +#' output is switched off. +#' @param progress A logical, by default set to `verbose` value. If `FALSE` no +#' `cli` progress bar will be displayed. +#' @return A character vector. +#' @examples +#' \dontrun{ +#' my_gitstats <- create_gitstats() %>% +#' set_github_host( +#' token = Sys.getenv("GITHUB_PAT"), +#' orgs = c("r-world-devs", "openpharma") +#' ) %>% +#' set_gitlab_host( +#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), +#' orgs = "mbtests" +#' ) +#' get_repos_urls(my_gitstats, type = "api") +#' get_repos_urls(my_gitstats, with_files = c("DESCRIPTION", "LICENSE")) +#' } +#' @export +get_repos_urls <- function(gitstats, + type = "web", + with_code = NULL, + in_files = NULL, + with_files = NULL, + cache = TRUE, + verbose = is_verbose(gitstats), + progress = verbose) { + gitstats$get_repos_urls( + type = type, + with_code = with_code, + in_files = in_files, + with_files = with_files, + cache = cache, + verbose = verbose, + progress = progress + ) +} diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index 0af1c0a5..27a01d55 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -8,174 +8,6 @@ create_gitstats <- function() { GitStats$new() } -#' @title Get data on repositories -#' @name get_repos -#' @description Pulls data on all repositories for an organization, individual -#' user or those with a given text in code blobs (`with_code` parameter) or a -#' file (`with_files` parameter) and parse it into table format. -#' @param gitstats A GitStats object. -#' @param add_contributors A logical parameter to decide whether to add -#' information about repositories' contributors to the repositories output -#' (table). If set to `FALSE` it makes function run faster as, in the case of -#' `org` search mode, it reaches only `GraphQL` endpoint with a query on -#' repositories, and in the case of `code` search mode it reaches only -#' `repositories REST API` endpoint. However, the pitfall is that the result -#' does not convey information on contributors. \cr\cr When set to `TRUE` (by -#' default), `GitStats` iterates additionally over pulled repositories and -#' reaches to the `contributors APIs`, which makes it slower, but gives -#' additional information. -#' @param with_code A character vector, if defined, GitStats will pull -#' repositories with specified code phrases in code blobs. -#' @param in_files A character vector of file names. Works when `with_code` is -#' set - then it searches code blobs only in files passed to `in_files` -#' parameter. -#' @param with_files A character vector, if defined, GitStats will pull -#' repositories with specified files. -#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last -#' result from its storage. -#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing -#' output is switched off. -#' @param progress A logical, by default set to `verbose` value. If `FALSE` no -#' `cli` progress bar will be displayed. -#' @return A data.frame. -#' @examples -#' \dontrun{ -#' my_gitstats <- create_gitstats() %>% -#' set_github_host( -#' token = Sys.getenv("GITHUB_PAT"), -#' orgs = c("r-world-devs", "openpharma") -#' ) %>% -#' set_gitlab_host( -#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), -#' orgs = "mbtests" -#' ) -#' get_repos(my_gitstats) -#' get_repos(my_gitstats, add_contributors = FALSE) -#' get_repos(my_gitstats, with_code = "Shiny", in_files = "renv.lock") -#' get_repos(my_gitstats, with_files = "DESCRIPTION") -#' } -#' @export -get_repos <- function(gitstats, - add_contributors = TRUE, - with_code = NULL, - in_files = NULL, - with_files = NULL, - cache = TRUE, - verbose = is_verbose(gitstats), - progress = verbose) { - gitstats$get_repos( - add_contributors = add_contributors, - with_code = with_code, - in_files = in_files, - with_files = with_files, - cache = cache, - verbose = verbose, - progress = progress - ) -} - -#' @title Get repository URLS -#' @name get_repos_urls -#' @description Pulls a vector of repositories URLs (web or API): either all for -#' an organization or those with a given text in code blobs (`with_code` -#' parameter) or a file (`with_files` parameter). -#' @param gitstats A GitStats object. -#' @param type A character, choose if `api` or `web` (`html`) URLs should be -#' returned. -#' @param with_code A character vector, if defined, GitStats will pull -#' repositories with specified code phrases in code blobs. -#' @param in_files A character vector of file names. Works when `with_code` is -#' set - then it searches code blobs only in files passed to `in_files` -#' parameter. -#' @param with_files A character vector, if defined, GitStats will pull -#' repositories with specified files. -#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last -#' result from its storage. -#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing -#' output is switched off. -#' @param progress A logical, by default set to `verbose` value. If `FALSE` no -#' `cli` progress bar will be displayed. -#' @return A character vector. -#' @examples -#' \dontrun{ -#' my_gitstats <- create_gitstats() %>% -#' set_github_host( -#' token = Sys.getenv("GITHUB_PAT"), -#' orgs = c("r-world-devs", "openpharma") -#' ) %>% -#' set_gitlab_host( -#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), -#' orgs = "mbtests" -#' ) -#' get_repos_urls(my_gitstats, type = "api") -#' get_repos_urls(my_gitstats, with_files = c("DESCRIPTION", "LICENSE")) -#' } -#' @export -get_repos_urls <- function(gitstats, - type = "web", - with_code = NULL, - in_files = NULL, - with_files = NULL, - cache = TRUE, - verbose = is_verbose(gitstats), - progress = verbose) { - gitstats$get_repos_urls( - type = type, - with_code = with_code, - in_files = in_files, - with_files = with_files, - cache = cache, - verbose = verbose, - progress = progress - ) -} - -#' @title Get data on commits -#' @name get_commits -#' @description List all commits from all repositories for an organization or a -#' vector of repositories. -#' @param gitstats A GitStats object. -#' @param since A starting date. -#' @param until An end date. -#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last -#' result from its storage. -#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing -#' output is switched off. -#' @param progress A logical, by default set to `verbose` value. If `FALSE` no -#' `cli` progress bar will be displayed. -#' @return A data.frame. -#' @examples -#' \dontrun{ -#' my_gitstats <- create_gitstats() %>% -#' set_github_host( -#' token = Sys.getenv("GITHUB_PAT"), -#' repos = c("openpharma/DataFakeR", "openpharma/visR") -#' ) %>% -#' set_gitlab_host( -#' token = Sys.getenv("GITLAB_PAT_PUBLIC"), -#' orgs = "mbtests" -#' ) -#' get_commits(my_gitstats, since = "2018-01-01") -#' } -#' @export -get_commits <- function(gitstats, - since = NULL, - until = Sys.Date() + lubridate::days(1), - cache = TRUE, - verbose = is_verbose(gitstats), - progress = verbose) { - if (is.null(since)) { - cli::cli_abort(cli::col_red("You need to pass date to `since` parameter."), call = NULL) - } - gitstats$get_commits( - since = since, - until = until, - cache = cache, - verbose = verbose, - progress = progress - ) -} - #' @title Get users data #' @name get_users #' @param gitstats A GitStats object. diff --git a/man/get_commits.Rd b/man/get_commits.Rd index 6358cb43..aa9155d7 100644 --- a/man/get_commits.Rd +++ b/man/get_commits.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gitstats_functions.R +% Please edit documentation in R/get_commits.R \name{get_commits} \alias{get_commits} \title{Get data on commits} diff --git a/man/get_commits_stats.Rd b/man/get_commits_stats.Rd index b2a64d0d..497c9597 100644 --- a/man/get_commits_stats.Rd +++ b/man/get_commits_stats.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_commits_stats.R +% Please edit documentation in R/get_commits.R \name{get_commits_stats} \alias{get_commits_stats} \title{Get commits statistics} diff --git a/man/get_repos.Rd b/man/get_repos.Rd index 97f36a6c..5f6d9811 100644 --- a/man/get_repos.Rd +++ b/man/get_repos.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gitstats_functions.R +% Please edit documentation in R/get_repos.R \name{get_repos} \alias{get_repos} \title{Get data on repositories} diff --git a/man/get_repos_urls.Rd b/man/get_repos_urls.Rd index 2b5b21bc..f136b207 100644 --- a/man/get_repos_urls.Rd +++ b/man/get_repos_urls.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gitstats_functions.R +% Please edit documentation in R/get_repos.R \name{get_repos_urls} \alias{get_repos_urls} \title{Get repository URLS} From d87fe980a91a1acd1ca1112e4c7b94f1fe8d920c Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 08:33:33 +0000 Subject: [PATCH 87/99] Bump to dev version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e4e35d81..90e867e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.2.0 +Version: 2.1.0.9010 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), From 2758464b515b6a7c5414f3db84503e0bf50cb26d Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 08:34:21 +0000 Subject: [PATCH 88/99] Add more info on function parameters. --- R/get_files.R | 35 ++++++++++++++++++++++------------- man/get_files.Rd | 35 ++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/R/get_files.R b/R/get_files.R index cb65c508..79f0bf82 100644 --- a/R/get_files.R +++ b/R/get_files.R @@ -2,16 +2,18 @@ #' @name get_files #' @description Pulls text files and their content. #' @param gitstats A `GitStats` object. -#' @param pattern A regular expression. If defined, it pulls file structure for -#' a repository matching this pattern. Can be defined if `file_path` stays +#' @param pattern A regular expression. If defined, it pulls content of all +#' files in a repository matching this pattern reaching to the level of +#' directories defined by `depth` parameter. Works only if `file_path` stays #' `NULL`. -#' @param depth An optional integer. Defines level of directories to retrieve -#' files from. E.g. if set to `0`, it will pull files only from root, if `1L`, -#' will take data from `root` directory and directories visible in `root` -#' directory. If left with no argument, will pull files from all directories. -#' @param file_path Optional. A standardized path to file(s) in repositories. -#' May be a character vector if multiple files are to be pulled. Can be -#' defined if `pattern` stays `NULL`. +#' @param depth Defines level of directories to retrieve files from. E.g. if set +#' to `0`, it will pull files only from root, if `1L`, will take data from +#' `root` directory and directories visible in `root` directory. If left with +#' no argument, will pull files from all directories. +#' @param file_path A specific path to file(s) in repositories. If defined, the +#' function pulls content of this specific `file_path`. May be a character +#' vector if multiple files are to be pulled. Can be defined only if `pattern` +#' stays `NULL`. #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last #' result from its storage. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing @@ -20,20 +22,27 @@ #' `cli` progress bar will be displayed. #' @examples #' \dontrun{ -#' rmd_files <- create_gitstats() |> +#' git_stats <- create_gitstats() |> #' set_github_host( #' token = Sys.getenv("GITHUB_PAT"), #' orgs = c("r-world-devs") -#' ) %>% +#' ) |> #' set_gitlab_host( #' token = Sys.getenv("GITLAB_PAT_PUBLIC"), #' orgs = "mbtests" -#' ) |> -#' get_files( +#' ) +#' +#' rmd_files <- get_files( +#' gitstats = git_stats, #' pattern = "\\.Rmd", #' depth = 2L #' ) #' +#' app_files <- get_files( +#' gitstats = git_stats, +#' file_path = "R/app.R" +#' ) +#' #' } #' @return A data.frame. #' @export diff --git a/man/get_files.Rd b/man/get_files.Rd index d6439ab0..ca883b3d 100644 --- a/man/get_files.Rd +++ b/man/get_files.Rd @@ -17,18 +17,20 @@ get_files( \arguments{ \item{gitstats}{A \code{GitStats} object.} -\item{pattern}{A regular expression. If defined, it pulls file structure for -a repository matching this pattern. Can be defined if \code{file_path} stays +\item{pattern}{A regular expression. If defined, it pulls content of all +files in a repository matching this pattern reaching to the level of +directories defined by \code{depth} parameter. Works only if \code{file_path} stays \code{NULL}.} -\item{depth}{An optional integer. Defines level of directories to retrieve -files from. E.g. if set to \code{0}, it will pull files only from root, if \code{1L}, -will take data from \code{root} directory and directories visible in \code{root} -directory. If left with no argument, will pull files from all directories.} +\item{depth}{Defines level of directories to retrieve files from. E.g. if set +to \code{0}, it will pull files only from root, if \code{1L}, will take data from +\code{root} directory and directories visible in \code{root} directory. If left with +no argument, will pull files from all directories.} -\item{file_path}{Optional. A standardized path to file(s) in repositories. -May be a character vector if multiple files are to be pulled. Can be -defined if \code{pattern} stays \code{NULL}.} +\item{file_path}{A specific path to file(s) in repositories. If defined, the +function pulls content of this specific \code{file_path}. May be a character +vector if multiple files are to be pulled. Can be defined only if \code{pattern} +stays \code{NULL}.} \item{cache}{A logical, if set to \code{TRUE} GitStats will retrieve the last result from its storage.} @@ -47,19 +49,26 @@ Pulls text files and their content. } \examples{ \dontrun{ - rmd_files <- create_gitstats() |> + git_stats <- create_gitstats() |> set_github_host( token = Sys.getenv("GITHUB_PAT"), orgs = c("r-world-devs") - ) \%>\% + ) |> set_gitlab_host( token = Sys.getenv("GITLAB_PAT_PUBLIC"), orgs = "mbtests" - ) |> - get_files( + ) + + rmd_files <- get_files( + gitstats = git_stats, pattern = "\\\\.Rmd", depth = 2L ) + app_files <- get_files( + gitstats = git_stats, + file_path = "R/app.R" + ) + } } From d7a0e878c61f9932afdce4a5b3f1dcbdeb0fd07a Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 08:51:32 +0000 Subject: [PATCH 89/99] Enhance docs. --- R/get_files.R | 8 +++++--- man/get_files.Rd | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/R/get_files.R b/R/get_files.R index 79f0bf82..94fbf45c 100644 --- a/R/get_files.R +++ b/R/get_files.R @@ -7,14 +7,16 @@ #' directories defined by `depth` parameter. Works only if `file_path` stays #' `NULL`. #' @param depth Defines level of directories to retrieve files from. E.g. if set -#' to `0`, it will pull files only from root, if `1L`, will take data from +#' to `0`, it will pull files only from `root`, if `1L`, will take data from #' `root` directory and directories visible in `root` directory. If left with #' no argument, will pull files from all directories. #' @param file_path A specific path to file(s) in repositories. If defined, the #' function pulls content of this specific `file_path`. May be a character #' vector if multiple files are to be pulled. Can be defined only if `pattern` -#' stays `NULL`. -#' @param cache A logical, if set to `TRUE` GitStats will retrieve the last +#' stays `NULL`. It is more efficient to use `file_path` if we know exact file +#' from the repository we want to get, e.g. `DESCRIPTION` from `root` +#' directory or `R/app.R`. +#' @param cache A logical, if set to `TRUE` `GitStats` will retrieve the last #' result from its storage. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing #' output is switched off. diff --git a/man/get_files.Rd b/man/get_files.Rd index ca883b3d..a1648844 100644 --- a/man/get_files.Rd +++ b/man/get_files.Rd @@ -23,16 +23,18 @@ directories defined by \code{depth} parameter. Works only if \code{file_path} st \code{NULL}.} \item{depth}{Defines level of directories to retrieve files from. E.g. if set -to \code{0}, it will pull files only from root, if \code{1L}, will take data from +to \code{0}, it will pull files only from \code{root}, if \code{1L}, will take data from \code{root} directory and directories visible in \code{root} directory. If left with no argument, will pull files from all directories.} \item{file_path}{A specific path to file(s) in repositories. If defined, the function pulls content of this specific \code{file_path}. May be a character vector if multiple files are to be pulled. Can be defined only if \code{pattern} -stays \code{NULL}.} +stays \code{NULL}. It is more efficient to use \code{file_path} if we know exact file +from the repository we want to get, e.g. \code{DESCRIPTION} from \code{root} +directory or \code{R/app.R}.} -\item{cache}{A logical, if set to \code{TRUE} GitStats will retrieve the last +\item{cache}{A logical, if set to \code{TRUE} \code{GitStats} will retrieve the last result from its storage.} \item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing From 3d1aeed624d07ffdf885ae77522255ac9bc9b5e1 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 08:53:38 +0000 Subject: [PATCH 90/99] Fix cacheing, when user uses same arguments for `file_path` and `pattern` in following function calls. --- R/GitStats.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/GitStats.R b/R/GitStats.R index e3ca54bb..42fa3c27 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -211,7 +211,7 @@ GitStats <- R6::R6Class( progress = verbose) { private$check_for_host() args_list <- list( - "file_pattern" = paste0(file_path, pattern), + "file_pattern" = paste(file_path, pattern), "depth" = depth ) trigger <- private$trigger_pulling( @@ -1007,7 +1007,7 @@ GitStats <- R6::R6Class( } attr_data <- attr_data %>% paste0(collapse = separator) } - return(cli::col_grey(glue::glue("[{attr_name}: {attr_data}]"))) + return(cli::col_grey(glue::glue("[{attr_name}: {trimws(attr_data)}]"))) } else { return("") } From c9415367e92e8a3bcf66d491832d0c56aa0bb625 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 09:02:57 +0000 Subject: [PATCH 91/99] Fix workflow. --- inst/cache_workflow.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/cache_workflow.R b/inst/cache_workflow.R index e3d3a7b7..ec1a9b93 100644 --- a/inst/cache_workflow.R +++ b/inst/cache_workflow.R @@ -1,4 +1,4 @@ -test_gitstats <- create_gitstats() %>% +test_gitstats <- create_gitstats() |> set_github_host( orgs = "openpharma" ) @@ -74,13 +74,13 @@ release_logs <- get_release_logs(test_gitstats, since = "2024-01-01") release_logs <- get_release_logs(test_gitstats, since = "2024-01-01") # no cache -package_usage <- get_R_package_usage(test_gitstats, package_name = "shiny") +package_usage <- get_R_package_usage(test_gitstats, packages = "shiny") # cache -package_usage <- get_R_package_usage(test_gitstats, package_name = "shiny") +package_usage <- get_R_package_usage(test_gitstats, packages = "shiny") # no cache -package_usage <- get_R_package_usage(test_gitstats, package_name = "shiny", cache = FALSE) +package_usage <- get_R_package_usage(test_gitstats, packages = "shiny", cache = FALSE) # no cache -package_usage <- get_R_package_usage(test_gitstats, package_name = "shiny", only_loading = TRUE) +package_usage <- get_R_package_usage(test_gitstats, packages = "shiny", only_loading = TRUE) # cache -package_usage <- get_R_package_usage(test_gitstats, package_name = "shiny", only_loading = TRUE) +package_usage <- get_R_package_usage(test_gitstats, packages = "shiny", only_loading = TRUE) From 0445f71ff237b151181637825e8da8da3e357dc8 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 09:06:41 +0000 Subject: [PATCH 92/99] Fix examples. --- inst/get_commits_workflow.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/get_commits_workflow.R b/inst/get_commits_workflow.R index 8e66b709..7e62be0f 100644 --- a/inst/get_commits_workflow.R +++ b/inst/get_commits_workflow.R @@ -19,7 +19,7 @@ release_logs <- get_release_logs( release_logs # Check printing in storage -test_gitstats +git_stats get_commits( gitstats = git_stats, @@ -27,7 +27,7 @@ get_commits( ) # Check printing in storage -test_gitstats +git_stats commits_stats <- create_gitstats() %>% set_github_host( From 11ad4dfa8ba8b3bf8dd7439ebff0c0385fa0fa26 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 09:33:01 +0000 Subject: [PATCH 93/99] Cover code with tests. --- tests/testthat/test-get_urls_repos-GitStats.R | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-get_urls_repos-GitStats.R b/tests/testthat/test-get_urls_repos-GitStats.R index 87266b6f..23c5c3fa 100644 --- a/tests/testthat/test-get_urls_repos-GitStats.R +++ b/tests/testthat/test-get_urls_repos-GitStats.R @@ -82,17 +82,41 @@ test_that("get_repos_urls gets vector of repository URLS", { "private$get_repos_urls_from_hosts", test_mocker$use("repos_urls_from_hosts_with_code_in_files") ) - repo_urls <- test_gitstats$get_repos_urls( + repos_urls <- test_gitstats$get_repos_urls( with_code = "shiny", in_files = "DESCRIPTION", verbose = FALSE ) expect_type( - repo_urls, + repos_urls, "character" ) expect_gt( - length(repo_urls), + length(repos_urls), + 1 + ) + test_mocker$cache(repos_urls) +}) + +test_that("get_repos_urls gets vector of repository URLS", { + test_gitstats <- create_test_gitstats(hosts = 2) + mockery::stub( + get_repos_urls, + "gitstats$get_repos_urls", + test_mocker$use("repos_urls") + ) + repos_urls <- get_repos_urls( + gitstats = test_gitstats, + with_code = "shiny", + in_files = "DESCRIPTION", + verbose = FALSE + ) + expect_type( + repos_urls, + "character" + ) + expect_gt( + length(repos_urls), 1 ) }) From f9d915cd6c9d23d57569f67e61dec06e520deafe Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 09:57:32 +0000 Subject: [PATCH 94/99] Typo. --- inst/example_workflow.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/example_workflow.R b/inst/example_workflow.R index 2b17f83f..62979c41 100644 --- a/inst/example_workflow.R +++ b/inst/example_workflow.R @@ -72,6 +72,6 @@ get_release_logs( get_release_logs( gitstats = git_stats, - sinces = "2024-06-01", + since = "2024-06-01", verbose = TRUE ) From 828aa5c37725ac31abe867e7e0c6d296447fba6c Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 12:04:47 +0000 Subject: [PATCH 95/99] Add more docs on function usage and forbid passing vector as pattern argument. --- R/get_files.R | 41 ++++++++++++++++++++++----- man/get_files.Rd | 36 ++++++++++++++++++----- tests/testthat/_snaps/05-get_files.md | 4 +++ tests/testthat/test-05-get_files.R | 9 ++++++ 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/R/get_files.R b/R/get_files.R index 94fbf45c..df6a3e70 100644 --- a/R/get_files.R +++ b/R/get_files.R @@ -10,18 +10,39 @@ #' to `0`, it will pull files only from `root`, if `1L`, will take data from #' `root` directory and directories visible in `root` directory. If left with #' no argument, will pull files from all directories. -#' @param file_path A specific path to file(s) in repositories. If defined, the -#' function pulls content of this specific `file_path`. May be a character -#' vector if multiple files are to be pulled. Can be defined only if `pattern` -#' stays `NULL`. It is more efficient to use `file_path` if we know exact file -#' from the repository we want to get, e.g. `DESCRIPTION` from `root` -#' directory or `R/app.R`. +#' @param file_path A specific path to file(s) in repositories. May be a +#' character vector if multiple files are to be pulled. If defined, the +#' function pulls content of this specific `file_path`. Can be defined only if +#' `pattern` stays `NULL`. #' @param cache A logical, if set to `TRUE` `GitStats` will retrieve the last #' result from its storage. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing #' output is switched off. #' @param progress A logical, by default set to `verbose` value. If `FALSE` no #' `cli` progress bar will be displayed. +#' @details `get_files()` may be used in two ways: either with `pattern` (with +#' optional `depth`) or `file_path` argument defined. +#' +#' In the first scenario `GitStats` will pull first a files structure +#' responding to the passed `pattern` and `depth` arguments and afterwards +#' files content for all of these files. In the second scenario `GitStats` +#' will pull only the content of files for the given `file_path` of the +#' repository. +#' +#' If user wants to pull a particular file or files, a `file_path` approach +#' seems to more reasonable, a it is a faster way (it omits pulling the whole +#' file structure from the repo). +#' +#' On the other hand, if user wants to pull specific type of files (e.g. `.md` +#' or `.Rmd` files), without knowing their path, it is recommended to use a +#' `pattern` approach, which will trigger `GitStats` to find all the files in +#' the repository on the given level of directories (`pattern` argument) and +#' afterwards pull their content. +#' +#' The latter approach is slower than the former but may be more useful +#' depending on users' goals. Both approaches return data in the same format: +#' `tibble` with data on `files`, namely their `path` and their `content`. +#' #' @examples #' \dontrun{ #' git_stats <- create_gitstats() |> @@ -42,7 +63,7 @@ #' #' app_files <- get_files( #' gitstats = git_stats, -#' file_path = "R/app.R" +#' file_path = c("R/app.R", "R/ui.R", "R/server.R") #' ) #' #' } @@ -61,6 +82,12 @@ get_files <- function(gitstats, call = NULL ) } + if (!is.null(pattern) && length(pattern) > 1) { + cli::cli_abort( + "Please define regex in one string.", + call = NULL + ) + } gitstats$get_files( pattern = pattern, depth = depth, diff --git a/man/get_files.Rd b/man/get_files.Rd index a1648844..af441c5d 100644 --- a/man/get_files.Rd +++ b/man/get_files.Rd @@ -27,12 +27,10 @@ to \code{0}, it will pull files only from \code{root}, if \code{1L}, will take d \code{root} directory and directories visible in \code{root} directory. If left with no argument, will pull files from all directories.} -\item{file_path}{A specific path to file(s) in repositories. If defined, the -function pulls content of this specific \code{file_path}. May be a character -vector if multiple files are to be pulled. Can be defined only if \code{pattern} -stays \code{NULL}. It is more efficient to use \code{file_path} if we know exact file -from the repository we want to get, e.g. \code{DESCRIPTION} from \code{root} -directory or \code{R/app.R}.} +\item{file_path}{A specific path to file(s) in repositories. May be a +character vector if multiple files are to be pulled. If defined, the +function pulls content of this specific \code{file_path}. Can be defined only if +\code{pattern} stays \code{NULL}.} \item{cache}{A logical, if set to \code{TRUE} \code{GitStats} will retrieve the last result from its storage.} @@ -49,6 +47,30 @@ A data.frame. \description{ Pulls text files and their content. } +\details{ +\code{get_files()} may be used in two ways: either with \code{pattern} (with +optional \code{depth}) or \code{file_path} argument defined. + +In the first scenario \code{GitStats} will pull first a files structure +responding to the passed \code{pattern} and \code{depth} arguments and afterwards +files content for all of these files. In the second scenario \code{GitStats} +will pull only the content of files for the given \code{file_path} of the +repository. + +If user wants to pull a particular file or files, a \code{file_path} approach +seems to more reasonable, a it is a faster way (it omits pulling the whole +file structure from the repo). + +On the other hand, if user wants to pull specific type of files (e.g. \code{.md} +or \code{.Rmd} files), without knowing their path, it is recommended to use a +\code{pattern} approach, which will trigger \code{GitStats} to find all the files in +the repository on the given level of directories (\code{pattern} argument) and +afterwards pull their content. + +The latter approach is slower than the former but may be more useful +depending on users' goals. Both approaches return data in the same format: +\code{tibble} with data on \code{files}, namely their \code{path} and their \code{content}. +} \examples{ \dontrun{ git_stats <- create_gitstats() |> @@ -69,7 +91,7 @@ Pulls text files and their content. app_files <- get_files( gitstats = git_stats, - file_path = "R/app.R" + file_path = c("R/app.R", "R/ui.R", "R/server.R") ) } diff --git a/tests/testthat/_snaps/05-get_files.md b/tests/testthat/_snaps/05-get_files.md index ab594ce2..89eadac4 100644 --- a/tests/testthat/_snaps/05-get_files.md +++ b/tests/testthat/_snaps/05-get_files.md @@ -2,3 +2,7 @@ Please choose either `pattern` or `file_path`. +# error shows when pattern is defined as vector + + Please define regex in one string. + diff --git a/tests/testthat/test-05-get_files.R b/tests/testthat/test-05-get_files.R index b6590d52..e4237fb4 100644 --- a/tests/testthat/test-05-get_files.R +++ b/tests/testthat/test-05-get_files.R @@ -94,3 +94,12 @@ test_that("error shows when file_path and pattern are defined at the same time", progress = FALSE) ) }) + +test_that("error shows when pattern is defined as vector", { + expect_snapshot_error( + get_files(test_gitstats, + pattern = c("\\.md", "meta_data.yaml"), + verbose = FALSE, + progress = FALSE) + ) +}) From 8e390d0a0430d0fc1dac4b94d2c54cbd69552fab Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 12:30:21 +0000 Subject: [PATCH 96/99] Return possibility to pass a vector to pattern argument, fix pulling and showing file pattern when it is a vector, add more docs. --- R/EngineGraphQL.R | 2 +- R/GitHost.R | 8 ++++++-- R/get_files.R | 20 +++++++++----------- man/get_files.Rd | 14 +++++++++----- tests/testthat/_snaps/05-get_files.md | 4 ---- tests/testthat/test-05-get_files.R | 9 --------- 6 files changed, 25 insertions(+), 32 deletions(-) diff --git a/R/EngineGraphQL.R b/R/EngineGraphQL.R index 99d7f7aa..382cbbb2 100644 --- a/R/EngineGraphQL.R +++ b/R/EngineGraphQL.R @@ -71,7 +71,7 @@ EngineGraphQL <- R6::R6Class( }, filter_files_by_pattern = function(files_structure, pattern) { - files_structure[grepl(pattern, files_structure)] + files_structure[grepl(paste0(pattern, collapse = "|"), files_structure)] }, get_path_from_files_structure = function(host_files_structure, diff --git a/R/GitHost.R b/R/GitHost.R index 54f1e3cb..4242406a 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -1226,7 +1226,9 @@ GitHost <- R6::R6Class( files_structure_list <- purrr::map(private$orgs, function(org) { if (verbose) { user_info <- if (!is.null(pattern)) { - glue::glue("Pulling files structure...[files matching pattern: '{pattern}']") + glue::glue( + "Pulling files structure...[files matching pattern: '{paste0(pattern, collapse = '|')}']" + ) } else { glue::glue("Pulling files structure...") } @@ -1273,7 +1275,9 @@ GitHost <- R6::R6Class( files_structure_list <- purrr::map(orgs, function(org) { if (verbose) { user_info <- if (!is.null(pattern)) { - glue::glue("Pulling files structure...[files matching pattern: '{pattern}']") + glue::glue( + "Pulling files structure...[files matching pattern: '{paste0(pattern, collapse = '|')}']" + ) } else { glue::glue("Pulling files structure...") } diff --git a/R/get_files.R b/R/get_files.R index df6a3e70..67b0362f 100644 --- a/R/get_files.R +++ b/R/get_files.R @@ -33,11 +33,15 @@ #' seems to more reasonable, a it is a faster way (it omits pulling the whole #' file structure from the repo). #' -#' On the other hand, if user wants to pull specific type of files (e.g. `.md` -#' or `.Rmd` files), without knowing their path, it is recommended to use a -#' `pattern` approach, which will trigger `GitStats` to find all the files in -#' the repository on the given level of directories (`pattern` argument) and -#' afterwards pull their content. +#' For example, if user wants to pull content of `README.md` and/or `NEWS.md` +#' files placed in the `root` directories of the repositories, he should take +#' the `file_path` approach as he already knows precisely paths of the files. +#' +#' On the other hand, if user wants to pull specific type of files (e.g. all +#' `.md` or `.Rmd` files in the repository), without knowing their path, it is +#' recommended to use a `pattern` approach, which will trigger `GitStats` to +#' find all the files in the repository on the given level of directories +#' (`pattern` argument) and afterwards pull their content. #' #' The latter approach is slower than the former but may be more useful #' depending on users' goals. Both approaches return data in the same format: @@ -82,12 +86,6 @@ get_files <- function(gitstats, call = NULL ) } - if (!is.null(pattern) && length(pattern) > 1) { - cli::cli_abort( - "Please define regex in one string.", - call = NULL - ) - } gitstats$get_files( pattern = pattern, depth = depth, diff --git a/man/get_files.Rd b/man/get_files.Rd index af441c5d..1fbc165a 100644 --- a/man/get_files.Rd +++ b/man/get_files.Rd @@ -61,11 +61,15 @@ If user wants to pull a particular file or files, a \code{file_path} approach seems to more reasonable, a it is a faster way (it omits pulling the whole file structure from the repo). -On the other hand, if user wants to pull specific type of files (e.g. \code{.md} -or \code{.Rmd} files), without knowing their path, it is recommended to use a -\code{pattern} approach, which will trigger \code{GitStats} to find all the files in -the repository on the given level of directories (\code{pattern} argument) and -afterwards pull their content. +For example, if user wants to pull content of \code{README.md} and/or \code{NEWS.md} +files placed in the \code{root} directories of the repositories, he should take +the \code{file_path} approach as he already knows precisely paths of the files. + +On the other hand, if user wants to pull specific type of files (e.g. all +\code{.md} or \code{.Rmd} files in the repository), without knowing their path, it is +recommended to use a \code{pattern} approach, which will trigger \code{GitStats} to +find all the files in the repository on the given level of directories +(\code{pattern} argument) and afterwards pull their content. The latter approach is slower than the former but may be more useful depending on users' goals. Both approaches return data in the same format: diff --git a/tests/testthat/_snaps/05-get_files.md b/tests/testthat/_snaps/05-get_files.md index 89eadac4..ab594ce2 100644 --- a/tests/testthat/_snaps/05-get_files.md +++ b/tests/testthat/_snaps/05-get_files.md @@ -2,7 +2,3 @@ Please choose either `pattern` or `file_path`. -# error shows when pattern is defined as vector - - Please define regex in one string. - diff --git a/tests/testthat/test-05-get_files.R b/tests/testthat/test-05-get_files.R index e4237fb4..b6590d52 100644 --- a/tests/testthat/test-05-get_files.R +++ b/tests/testthat/test-05-get_files.R @@ -94,12 +94,3 @@ test_that("error shows when file_path and pattern are defined at the same time", progress = FALSE) ) }) - -test_that("error shows when pattern is defined as vector", { - expect_snapshot_error( - get_files(test_gitstats, - pattern = c("\\.md", "meta_data.yaml"), - verbose = FALSE, - progress = FALSE) - ) -}) From d1cc481a7af9e902c7dcedcc383a70ea64b3b08f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Bana=C5=9B?= <74212933+maciekbanas@users.noreply.github.com> Date: Tue, 14 Jan 2025 13:52:28 +0100 Subject: [PATCH 97/99] Apply suggestions from code review Co-authored-by: Karolina Marcinkowska <32685910+marcinkowskak@users.noreply.github.com> --- R/get_files.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/get_files.R b/R/get_files.R index 67b0362f..ac7f7dfc 100644 --- a/R/get_files.R +++ b/R/get_files.R @@ -23,15 +23,15 @@ #' @details `get_files()` may be used in two ways: either with `pattern` (with #' optional `depth`) or `file_path` argument defined. #' -#' In the first scenario `GitStats` will pull first a files structure +#' In the first scenario, `GitStats` will pull first a files structure #' responding to the passed `pattern` and `depth` arguments and afterwards -#' files content for all of these files. In the second scenario `GitStats` -#' will pull only the content of files for the given `file_path` of the +#' files content for all of these files. In the second scenario, `GitStats` +#' will pull only the content of files for the specific `file_path` of the #' repository. #' #' If user wants to pull a particular file or files, a `file_path` approach -#' seems to more reasonable, a it is a faster way (it omits pulling the whole -#' file structure from the repo). +#' seems more reasonable, as it is a faster since it omits pulling the whole +#' file structure from the repo. #' #' For example, if user wants to pull content of `README.md` and/or `NEWS.md` #' files placed in the `root` directories of the repositories, he should take From 9537be8d1037703467348e54c8a5e9a6749a7d83 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 12:53:53 +0000 Subject: [PATCH 98/99] Last typo and roxygenize. --- R/get_files.R | 4 ++-- man/get_files.Rd | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/get_files.R b/R/get_files.R index ac7f7dfc..17405c81 100644 --- a/R/get_files.R +++ b/R/get_files.R @@ -30,8 +30,8 @@ #' repository. #' #' If user wants to pull a particular file or files, a `file_path` approach -#' seems more reasonable, as it is a faster since it omits pulling the whole -#' file structure from the repo. +#' seems more reasonable, as it is a faster way since it omits pulling the +#' whole file structure from the repo. #' #' For example, if user wants to pull content of `README.md` and/or `NEWS.md` #' files placed in the `root` directories of the repositories, he should take diff --git a/man/get_files.Rd b/man/get_files.Rd index 1fbc165a..56fc8162 100644 --- a/man/get_files.Rd +++ b/man/get_files.Rd @@ -51,15 +51,15 @@ Pulls text files and their content. \code{get_files()} may be used in two ways: either with \code{pattern} (with optional \code{depth}) or \code{file_path} argument defined. -In the first scenario \code{GitStats} will pull first a files structure +In the first scenario, \code{GitStats} will pull first a files structure responding to the passed \code{pattern} and \code{depth} arguments and afterwards -files content for all of these files. In the second scenario \code{GitStats} -will pull only the content of files for the given \code{file_path} of the +files content for all of these files. In the second scenario, \code{GitStats} +will pull only the content of files for the specific \code{file_path} of the repository. If user wants to pull a particular file or files, a \code{file_path} approach -seems to more reasonable, a it is a faster way (it omits pulling the whole -file structure from the repo). +seems more reasonable, as it is a faster way since it omits pulling the +whole file structure from the repo. For example, if user wants to pull content of \code{README.md} and/or \code{NEWS.md} files placed in the \code{root} directories of the repositories, he should take From d06f139c1959979994807375350c19e93ba55bf9 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Tue, 14 Jan 2025 13:12:50 +0000 Subject: [PATCH 99/99] Bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 90e867e8..e4e35d81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Standardized Git Repository Data -Version: 2.1.0.9010 +Version: 2.2.0 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"),