Skip to content

Commit

Permalink
Move parsing methods for commits, files, users and release logs.
Browse files Browse the repository at this point in the history
  • Loading branch information
maciekbanas committed Oct 15, 2024
1 parent 405ee3c commit 2c143d8
Show file tree
Hide file tree
Showing 15 changed files with 337 additions and 333 deletions.
127 changes: 127 additions & 0 deletions R/EngineGraphQLGitHub.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,45 @@ EngineGraphQLGitHub <- R6::R6Class(
return(repos_list_with_commits)
},

# Parses repositories' list with commits into table of commits.
prepare_commits_table = function(repos_list_with_commits,
org) {
commits_table <- purrr::imap(repos_list_with_commits, function(repo, repo_name) {
commits_row <- purrr::map_dfr(repo, function(commit) {
commit_author <- commit$node$author
commit$node$author <- commit_author$name
commit$node$author_login <- if (!is.null(commit_author$user$login)) {
commit_author$user$login
} else {
NA
}
commit$node$author_name <- if (!is.null(commit_author$user$name)) {
commit_author$user$name
} else {
NA
}
commit$node$committed_date <- gts_to_posixt(commit$node$committed_date)
commit$node
})
commits_row$repository <- repo_name
commits_row
}) %>%
purrr::discard(~ length(.) == 1) %>%
purrr::list_rbind()
if (nrow(commits_table) > 0) {
commits_table <- commits_table %>%
dplyr::mutate(
organization = org,
api_url = self$gql_api_url
) %>%
dplyr::relocate(
any_of(c("author_login", "author_name")),
.after = author
)
}
return(commits_table)
},

# Pull all given files from all repositories of an organization.
get_files_from_org = function(org,
type,
Expand Down Expand Up @@ -157,6 +196,30 @@ EngineGraphQLGitHub <- R6::R6Class(
return(org_files_list)
},

# Prepare files table.
prepare_files_table = function(files_response, org, file_path) {
if (!is.null(files_response)) {
files_table <- purrr::map(files_response, function(repository) {
purrr::imap(repository, function(file_data, file_name) {
data.frame(
"repo_name" = file_data$repo_name,
"repo_id" = file_data$repo_id,
"organization" = org,
"file_path" = file_name,
"file_content" = file_data$file$text %||% NA,
"file_size" = file_data$file$byteSize,
"repo_url" = file_data$repo_url
)
}) %>%
purrr::list_rbind()
}) %>%
purrr::list_rbind()
} else {
files_table <- NULL
}
return(files_table)
},

# Pull all files from all repositories of an organization.
get_files_structure_from_org = function(org,
type,
Expand Down Expand Up @@ -186,6 +249,29 @@ EngineGraphQLGitHub <- R6::R6Class(
return(files_structure)
},

# Prepare user table.
prepare_user_table = function(user_response) {
if (!is.null(user_response$data$user)) {
user_data <- user_response$data$user
user_data[["name"]] <- user_data$name %||% ""
user_data[["starred_repos"]] <- user_data$starred_repos$totalCount
user_data[["commits"]] <- user_data$contributions$totalCommitContributions
user_data[["issues"]] <- user_data$contributions$totalIssueContributions
user_data[["pull_requests"]] <- user_data$contributions$totalPullRequestContributions
user_data[["reviews"]] <- user_data$contributions$totalPullRequestReviewContributions
user_data[["contributions"]] <- NULL
user_data[["email"]] <- user_data$email %||% ""
user_data[["location"]] <- user_data$location %||% ""
user_data[["web_url"]] <- user_data$web_url %||% ""
user_table <- tibble::as_tibble(user_data) %>%
dplyr::relocate(c(commits, issues, pull_requests, reviews),
.after = starred_repos)
} else {
user_table <- NULL
}
return(user_table)
},

# Pull release logs from organization
get_release_logs_from_org = function(repos_names, org) {
release_responses <- purrr::map(repos_names, function(repository) {
Expand All @@ -201,6 +287,47 @@ EngineGraphQLGitHub <- R6::R6Class(
}) %>%
purrr::discard(~ length(.$data$repository$releases$nodes) == 0)
return(release_responses)
},

# Prepare releases table.
prepare_releases_table = function(releases_response, org, date_from, date_until) {
if (!is.null(releases_response)) {
releases_table <-
purrr::map(releases_response, function(release) {
release_table <- purrr::map(release$data$repository$releases$nodes, function(node) {
data.frame(
release_name = node$name,
release_tag = node$tagName,
published_at = gts_to_posixt(node$publishedAt),
release_url = node$url,
release_log = node$description
)
}) %>%
purrr::list_rbind() %>%
dplyr::mutate(
repo_name = release$data$repository$name,
repo_url = release$data$repository$url
) %>%
dplyr::relocate(
repo_name, repo_url,
.before = release_name
)
return(release_table)
}) %>%
purrr::list_rbind() %>%
dplyr::filter(
published_at <= as.POSIXct(date_until)
)
if (!is.null(date_from)) {
releases_table <- releases_table %>%
dplyr::filter(
published_at >= as.POSIXct(date_from)
)
}
} else {
releases_table <- NULL
}
return(releases_table)
}
),
private = list(
Expand Down
110 changes: 110 additions & 0 deletions R/EngineGraphQLGitLab.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,49 @@ EngineGraphQLGitLab <- R6::R6Class(
}, .progress = progress)
return(org_files_list)
},

# Prepare files table.
prepare_files_table = function(files_response, org, file_path) {
if (!is.null(files_response)) {
if (private$response_prepared_by_iteration(files_response)) {
files_table <- purrr::map(files_response, function(response_data) {
purrr::map(response_data$data$project$repository$blobs$nodes, function(file) {
data.frame(
"repo_name" = response_data$data$project$name,
"repo_id" = response_data$data$project$id,
"organization" = org,
"file_path" = file$name,
"file_content" = file$rawBlob,
"file_size" = as.integer(file$size),
"repo_url" = response_data$data$project$webUrl
)
}) %>%
purrr::list_rbind()
}) %>%
purrr::list_rbind()
} else {
files_table <- purrr::map(files_response, function(project) {
purrr::map(project$repository$blobs$nodes, function(file) {
data.frame(
"repo_name" = project$name,
"repo_id" = project$id,
"organization" = org,
"file_path" = file$name,
"file_content" = file$rawBlob,
"file_size" = as.integer(file$size),
"repo_url" = project$webUrl
)
}) %>%
purrr::list_rbind()
}) %>%
purrr::list_rbind()
}
} else {
files_table <- NULL
}
return(files_table)
},

get_files_structure_from_org = function(org,
type,
repos,
Expand All @@ -296,6 +339,28 @@ EngineGraphQLGitLab <- R6::R6Class(
return(files_structure)
},

# Prepare user table.
prepare_user_table = function(user_response) {
if (!is.null(user_response$data$user)) {
user_data <- user_response$data$user
user_data$name <- user_data$name %||% ""
user_data$starred_repos <- user_data$starred_repos$count
user_data$pull_requests <- user_data$pull_requests$count
user_data$reviews <- user_data$reviews$count
user_data$email <- user_data$email %||% ""
user_data$location <- user_data$location %||% ""
user_data$web_url <- user_data$web_url %||% ""
user_table <- tibble::as_tibble(user_data) %>%
dplyr::mutate(commits = NA,
issues = NA) %>%
dplyr::relocate(c(commits, issues),
.after = starred_repos)
} else {
user_table <- NULL
}
return(user_table)
},

# Pull all releases from all repositories of an organization.
get_release_logs_from_org = function(repos_names, org) {
release_responses <- purrr::map(repos_names, function(repository) {
Expand All @@ -310,6 +375,47 @@ EngineGraphQLGitLab <- R6::R6Class(
}) %>%
purrr::discard(~ length(.$data$project$releases$nodes) == 0)
return(release_responses)
},

# Prepare releases table.
prepare_releases_table = function(releases_response, org, date_from, date_until) {
if (length(releases_response) > 0) {
releases_table <-
purrr::map(releases_response, function(release) {
release_table <- purrr::map(release$data$project$releases$nodes, function(node) {
data.frame(
release_name = node$name,
release_tag = node$tagName,
published_at = gts_to_posixt(node$releasedAt),
release_url = node$links$selfUrl,
release_log = node$description
)
}) %>%
purrr::list_rbind() %>%
dplyr::mutate(
repo_name = release$data$project$name,
repo_url = release$data$project$webUrl
) %>%
dplyr::relocate(
repo_name, repo_url,
.before = release_name
)
return(release_table)
}) %>%
purrr::list_rbind() %>%
dplyr::filter(
published_at <= as.POSIXct(date_until)
)
if (!is.null(date_from)) {
releases_table <- releases_table %>%
dplyr::filter(
published_at >= as.POSIXct(date_from)
)
}
} else {
releases_table <- NULL
}
return(releases_table)
}
),
private = list(
Expand Down Expand Up @@ -455,6 +561,10 @@ EngineGraphQLGitLab <- R6::R6Class(
"files" = files
)
return(result)
},

response_prepared_by_iteration = function(files_response) {
!all(purrr::map_lgl(files_response, ~ all(c("name", "id", "webUrl", "repository") %in% names(.))))
}
)
)
62 changes: 62 additions & 0 deletions R/EngineRestGitLab.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,29 @@ EngineRestGitLab <- R6::R6Class(
return(files_list)
},

# Prepare files table from REST API.
prepare_files_table = function(files_list) {
files_table <- NULL
if (!is.null(files_list)) {
files_table <- purrr::map(files_list, function(file_data) {
org_repo <- stringr::str_split_1(file_data$repo_fullname, "/")
org <- paste0(org_repo[1:(length(org_repo) - 1)], collapse = "/")
data.frame(
"repo_name" = file_data$repo_name,
"repo_id" = as.character(file_data$repo_id),
"organization" = org,
"file_path" = file_data$file_path,
"file_content" = file_data$content,
"file_size" = file_data$size,
"repo_url" = file_data$repo_url
)
}) %>%
purrr::list_rbind() %>%
unique()
}
return(files_table)
},

# Wrapper for iteration over GitLab search API response
# @details For the time being there is no possibility to search GitLab with
# filtering by language. For more information look here:
Expand Down Expand Up @@ -87,6 +110,45 @@ EngineRestGitLab <- R6::R6Class(
return(repos_list)
},

# Get only important info on commits.
tailor_commits_info = function(repos_list_with_commits,
org) {
repos_list_with_commits_cut <- purrr::map(repos_list_with_commits, function(repo) {
purrr::map(repo, function(commit) {
list(
"id" = commit$id,
"committed_date" = gts_to_posixt(commit$committed_date),
"author" = commit$author_name,
"additions" = commit$stats$additions,
"deletions" = commit$stats$deletions,
"repository" = gsub(
pattern = paste0("/-/commit/", commit$id),
replacement = "",
x = gsub(paste0("(.*)", org, "/"), "", commit$web_url)
),
"organization" = org
)
})
})
return(repos_list_with_commits_cut)
},

# A helper to turn list of data.frames into one data.frame
prepare_commits_table = function(commits_list) {
commits_dt <- purrr::map(commits_list, function(commit) {
purrr::map(commit, ~ data.frame(.)) %>%
purrr::list_rbind()
}) %>%
purrr::list_rbind()
if (length(commits_dt) > 0) {
commits_dt <- dplyr::mutate(
commits_dt,
api_url = self$rest_api_url
)
}
return(commits_dt)
},

# Pull all repositories URLs from organization
get_repos_urls = function(type, org) {
repos_urls <- self$response(
Expand Down
Loading

0 comments on commit 2c143d8

Please sign in to comment.