From 95ab021b5ad0c9ed54a27efb3bae8c44b70688ee Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 17 Oct 2024 07:40:16 +0000 Subject: [PATCH 1/5] Optimize get_R_package_usage: make possible passing vector of package names to function, change name of the main parameter to `packages` and make the output more concise. The consequence of second feature was introduction of new parameter `output` instead of `raw_output` in private methods handling repositories responses. --- DESCRIPTION | 2 +- NEWS.md | 8 + R/EngineRest.R | 29 ++- R/EngineRestGitHub.R | 57 +++-- R/EngineRestGitLab.R | 64 +++-- R/GitHost.R | 197 ++++++++------- R/GitStats.R | 134 ++++++---- R/gitstats_functions.R | 6 +- inst/package_usage_workflow.R | 13 + man/get_R_package_usage.Rd | 4 +- tests/testthat/_snaps/01-get_repos-GitHub.md | 17 +- tests/testthat/_snaps/get_usage_R_package.md | 4 +- tests/testthat/helper-expect-tables.R | 13 +- tests/testthat/test-01-get_repos-GitHub.R | 251 +++++++++++++++++-- tests/testthat/test-01-get_repos-GitStats.R | 42 ++-- tests/testthat/test-get_usage_R_package.R | 63 ++++- vignettes/get_repos_with_code.Rmd | 4 +- 17 files changed, 632 insertions(+), 276 deletions(-) create mode 100644 inst/package_usage_workflow.R diff --git a/DESCRIPTION b/DESCRIPTION index d2bd5b57..99e7d83e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitStats Title: Get Statistics from GitHub and GitLab -Version: 2.1.0.9004 +Version: 2.1.0.9005 Authors@R: c( person(given = "Maciej", family = "Banas", email = "banasmaciek@gmail.com", role = c("aut", "cre")), person(given = "Kamil", family = "Koziej", email = "koziej.k@gmail.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 2b101654..911cb248 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,14 @@ # GitStats (development version) +## Features: + +- Optimized `get_R_package_usage()` function: + - you can now pass a vector of packages names (new `packages` parameter replacing old `package_name`) ([#494](https://github.com/r-world-devs/GitStats/issues/494)), + - on the other hand, output of the function has been limited to contain only most necessary data (removing all repository stats), making thus process of obtaining package usage faster ([#474](https://github.com/r-world-devs/GitStats/issues/474)). - Added possibility to get repositories for individual users with `get_repos()` ([#492](https://github.com/r-world-devs/GitStats/issues/492)). Earlier this was only possible for GitHub organizations and GitLab groups. + +## Fixes: + - Fixed getting large search responses for GitHub ([#491](https://github.com/r-world-devs/GitStats/issues/491)). - Fixed checking token scopes ([#501](https://github.com/r-world-devs/GitStats/issues/501)). If token scopes are insufficient error is returned and `GitHost` is not passed to `GitStats`. This also applies to situation when `GitStats` looks for default tokens (not defined by user). Earlier, if tests for token failed, an empty token was passed and `GitStats` was created, which was misleading for the user. - User can now optionally pass public GitHub host name (`github.com` or `https://github.com`) to `set_github_host()` ([#475](https://github.com/r-world-devs/GitStats/issues/475)). diff --git a/R/EngineRest.R b/R/EngineRest.R index 6b8ad1fc..c6bea57d 100644 --- a/R/EngineRest.R +++ b/R/EngineRest.R @@ -39,7 +39,7 @@ EngineRest <- R6::R6Class("EngineRest", }, # Prepare table for repositories content - prepare_repos_table = function(repos_list, verbose = TRUE) { + prepare_repos_table = function(repos_list, output = "table_full", verbose = TRUE) { repos_dt <- purrr::map(repos_list, function(repo) { repo <- purrr::map(repo, function(attr) { attr <- attr %||% "" @@ -51,15 +51,24 @@ EngineRest <- R6::R6Class("EngineRest", cli::cli_alert_info("Preparing repositories table...") } if (length(repos_dt) > 0) { - repos_dt <- dplyr::mutate( - repos_dt, - repo_id = as.character(repo_id), - created_at = as.POSIXct(created_at), - last_activity_at = as.POSIXct(last_activity_at), - forks = as.integer(forks), - issues_open = as.integer(issues_open), - issues_closed = as.integer(issues_closed) - ) + if (output == "table_full") { + repos_dt <- dplyr::mutate( + repos_dt, + repo_id = as.character(repo_id), + created_at = as.POSIXct(created_at), + last_activity_at = as.POSIXct(last_activity_at), + forks = as.integer(forks), + issues_open = as.integer(issues_open), + issues_closed = as.integer(issues_closed) + ) + } + if (output == "table_min") { + repos_dt <- dplyr::mutate( + repos_dt, + repo_id = as.character(repo_id), + created_at = as.POSIXct(created_at) + ) + } } return(repos_dt) } diff --git a/R/EngineRestGitHub.R b/R/EngineRestGitHub.R index 7ddd1784..5b8a69b1 100644 --- a/R/EngineRestGitHub.R +++ b/R/EngineRestGitHub.R @@ -29,7 +29,7 @@ EngineRestGitHub <- R6::R6Class( org = NULL, filename = NULL, in_path = FALSE, - raw_output = FALSE, + output = "table_full", verbose = TRUE, progress = TRUE) { user_query <- if (!is.null(org)) { @@ -53,12 +53,12 @@ EngineRestGitHub <- R6::R6Class( search_endpoint = search_endpoint, total_n = total_n ) - if (!raw_output) { + if (output == "table_full" || output == "table_min") { search_output <- private$map_search_into_repos( search_response = search_result, progress = progress ) - } else { + } else if (output == "raw") { search_output <- search_result } } else { @@ -68,26 +68,39 @@ EngineRestGitHub <- R6::R6Class( }, # Retrieve only important info from repositories response - tailor_repos_response = function(repos_response) { + tailor_repos_response = function(repos_response, output = "table_full") { repos_list <- purrr::map(repos_response, function(repo) { - list( - "repo_id" = repo$id, - "repo_name" = repo$name, - "default_branch" = repo$default_branch, - "stars" = repo$stargazers_count, - "forks" = repo$forks_count, - "created_at" = gts_to_posixt(repo$created_at), - "last_activity_at" = if (!is.null(repo$pushed_at)) { - gts_to_posixt(repo$pushed_at) - } else { - gts_to_posixt(repo$created_at) - }, - "languages" = repo$language, - "issues_open" = repo$issues_open, - "issues_closed" = repo$issues_closed, - "organization" = repo$owner$login, - "repo_url" = repo$html_url - ) + if (output == "table_full") { + repo_data <- list( + "repo_id" = repo$id, + "repo_name" = repo$name, + "default_branch" = repo$default_branch, + "stars" = repo$stargazers_count, + "forks" = repo$forks_count, + "created_at" = gts_to_posixt(repo$created_at), + "last_activity_at" = if (!is.null(repo$pushed_at)) { + gts_to_posixt(repo$pushed_at) + } else { + gts_to_posixt(repo$created_at) + }, + "languages" = repo$language, + "issues_open" = repo$issues_open, + "issues_closed" = repo$issues_closed, + "organization" = repo$owner$login, + "repo_url" = repo$html_url + ) + } + if (output == "table_min") { + repo_data <- list( + "repo_id" = repo$id, + "repo_name" = repo$name, + "default_branch" = repo$default_branch, + "created_at" = gts_to_posixt(repo$created_at), + "organization" = repo$owner$login, + "repo_url" = repo$html_url + ) + } + return(repo_data) }) return(repos_list) }, diff --git a/R/EngineRestGitLab.R b/R/EngineRestGitLab.R index 6d023a0f..a11048ce 100644 --- a/R/EngineRestGitLab.R +++ b/R/EngineRestGitLab.R @@ -62,11 +62,11 @@ EngineRestGitLab <- R6::R6Class( # filtering by language. For more information look here: # https://gitlab.com/gitlab-org/gitlab/-/issues/340333 get_repos_by_code = function(code, - org = NULL, + org = NULL, filename = NULL, - in_path = FALSE, - raw_output = FALSE, - verbose = TRUE, + in_path = FALSE, + output = "table_full", + verbose = TRUE, progress = TRUE) { search_response <- private$search_for_code( code = code, @@ -75,37 +75,53 @@ EngineRestGitLab <- R6::R6Class( org = org, verbose = verbose ) - if (raw_output) { + if (output == "raw") { search_output <- search_response - } else { + } else if (output == "table_full" || output == "table_min") { search_output <- search_response %>% private$map_search_into_repos( progress = progress - ) %>% - private$get_repos_languages( - progress = progress ) + if (output == "table_full") { + search_output <- search_output %>% + private$get_repos_languages( + progress = progress + ) + } } return(search_output) }, # Retrieve only important info from repositories response - tailor_repos_response = function(repos_response) { + tailor_repos_response = function(repos_response, output = "table_full") { repos_list <- purrr::map(repos_response, function(project) { - list( - "repo_id" = project$id, - "repo_name" = project$name, - "default_branch" = project$default_branch, - "stars" = project$star_count, - "forks" = project$fork_count, - "created_at" = project$created_at, - "last_activity_at" = project$last_activity_at, - "languages" = paste0(project$languages, collapse = ", "), - "issues_open" = project$issues_open, - "issues_closed" = project$issues_closed, - "organization" = project$namespace$path, - "repo_url" = project$web_url - ) + if (output == "table_full") { + repo_data <- list( + "repo_id" = project$id, + "repo_name" = project$name, + "default_branch" = project$default_branch, + "stars" = project$star_count, + "forks" = project$fork_count, + "created_at" = project$created_at, + "last_activity_at" = project$last_activity_at, + "languages" = paste0(project$languages, collapse = ", "), + "issues_open" = project$issues_open, + "issues_closed" = project$issues_closed, + "organization" = project$namespace$path, + "repo_url" = project$web_url + ) + } + if (output == "table_min") { + repo_data <- list( + "repo_id" = project$id, + "repo_name" = project$name, + "default_branch" = project$default_branch, + "created_at" = project$created_at, + "organization" = project$namespace$path, + "repo_url" = project$web_url + ) + } + return(repo_data) }) return(repos_list) }, diff --git a/R/GitHost.R b/R/GitHost.R index 53d84766..4717d5d2 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -45,6 +45,7 @@ GitHost <- R6::R6Class( with_code = NULL, in_files = NULL, with_file = NULL, + output = "table_full", verbose = TRUE, progress = TRUE) { if (is.null(with_code) && is.null(with_file)) { @@ -57,6 +58,7 @@ GitHost <- R6::R6Class( repos_table <- private$get_repos_with_code( code = with_code, in_files = in_files, + output = output, verbose = verbose, progress = progress ) @@ -64,17 +66,21 @@ GitHost <- R6::R6Class( repos_table <- private$get_repos_with_code( code = with_file, in_path = TRUE, + output = output, verbose = verbose, progress = progress ) } - repos_table <- private$add_repo_api_url(repos_table) - if (add_contributors) { - repos_table <- private$get_repos_contributors( - repos_table = repos_table, - verbose = verbose, - progress = progress - ) + if (output == "table_full" || output == "table_min") { + repos_table <- private$add_repo_api_url(repos_table) %>% + private$add_platform() + if (add_contributors) { + repos_table <- private$get_repos_contributors( + repos_table = repos_table, + verbose = verbose, + progress = progress + ) + } } return(repos_table) }, @@ -88,10 +94,10 @@ GitHost <- R6::R6Class( progress = TRUE) { if (!is.null(with_code)) { repo_urls <- private$get_repos_with_code( - code = with_code, - in_files = in_files, - raw_output = TRUE, - verbose = verbose + code = with_code, + in_files = in_files, + output = "raw", + verbose = verbose ) %>% private$get_repo_url_from_response( type = type, @@ -99,10 +105,10 @@ GitHost <- R6::R6Class( ) } else if (!is.null(with_file)) { repo_urls <- private$get_repos_with_code( - code = with_file, - in_path = TRUE, - raw_output = TRUE, - verbose = verbose + code = with_file, + in_path = TRUE, + output = "raw", + verbose = verbose ) %>% private$get_repo_url_from_response( type = type, @@ -641,27 +647,27 @@ GitHost <- R6::R6Class( get_repos_with_code = function(code, in_files = NULL, in_path = FALSE, - raw_output = FALSE, + output = "table_full", verbose = TRUE, progress = TRUE) { if (private$scan_all) { repos_table <- private$get_repos_with_code_from_host( - code = code, - in_files = in_files, - in_path = in_path, - raw_output = raw_output, - verbose = verbose, - progress = progress + code = code, + in_files = in_files, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress ) } if (!private$scan_all) { repos_table <- private$get_repos_with_code_from_orgs( - code = code, - in_files = in_files, - in_path = in_path, - raw_output = raw_output, - verbose = verbose, - progress = progress + code = code, + in_files = in_files, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress ) } return(repos_table) @@ -702,11 +708,11 @@ GitHost <- R6::R6Class( # Pull repositories with code from whole Git Host get_repos_with_code_from_host = function(code, - in_files = NULL, - in_path = FALSE, - raw_output = FALSE, - verbose = TRUE, - progress = TRUE) { + in_files = NULL, + in_path = FALSE, + output = "table_full", + verbose = TRUE, + progress = TRUE) { if (verbose) { show_message( host = private$host_name, @@ -715,23 +721,29 @@ GitHost <- R6::R6Class( ) } repos_response <- private$get_repos_response_with_code( - code = code, - in_files = in_files, - in_path = in_path, - raw_output = raw_output, - verbose = verbose, - progress = progress + code = code, + in_files = in_files, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress ) - if (!raw_output) { + if (output != "raw") { rest_engine <- private$engines$rest repos_table <- repos_response %>% - rest_engine$tailor_repos_response() %>% + rest_engine$tailor_repos_response( + output = output + ) %>% rest_engine$prepare_repos_table( + output = output, verbose = verbose - ) %>% - rest_engine$get_repos_issues( - progress = progress ) + if (output == "table_full") { + repos_table <- repos_table %>% + rest_engine$get_repos_issues( + progress = progress + ) + } return(repos_table) } else { return(repos_response) @@ -740,11 +752,11 @@ GitHost <- R6::R6Class( # Pull repositories with code from given organizations get_repos_with_code_from_orgs = function(code, - in_files = NULL, - in_path = FALSE, - raw_output = FALSE, - verbose = TRUE, - progress = TRUE) { + in_files = NULL, + in_path = FALSE, + output = "table_full", + verbose = TRUE, + progress = TRUE) { repos_list <- purrr::map(private$orgs, function(org) { if (verbose) { show_message( @@ -755,31 +767,52 @@ GitHost <- R6::R6Class( information = "Pulling repositories" ) } - repos_response <- private$get_repos_response_with_code( - org = org, - code = code, - in_files = in_files, - in_path = in_path, - raw_output = raw_output, - verbose = verbose, - progress = progress - ) - if (!raw_output) { + rest_engine <- private$engines$rest + if (is.null(in_files)) { + repos_response <- rest_engine$get_repos_by_code( + org = org, + code = code, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress + ) + } else { + repos_response <- purrr::map(in_files, function(filename) { + rest_engine$get_repos_by_code( + org = org, + code = code, + filename = filename, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress + ) + }) %>% + purrr::list_flatten() + } + if (output != "raw") { rest_engine <- private$engines$rest repos_table <- repos_response %>% - rest_engine$tailor_repos_response() %>% + rest_engine$tailor_repos_response( + output = output + ) %>% rest_engine$prepare_repos_table( + output = output, verbose = verbose - ) %>% - rest_engine$get_repos_issues( - progress = progress ) + if (output == "table_full") { + repos_table <- repos_table %>% + rest_engine$get_repos_issues( + progress = progress + ) + } return(repos_table) } else { return(repos_response) } }, .progress = progress) - if (!raw_output) { + if (output != "raw") { repos_output <- purrr::list_rbind(repos_list) } else { repos_output <- purrr::list_flatten(repos_list) @@ -787,39 +820,13 @@ GitHost <- R6::R6Class( return(repos_output) }, - # Wrapper in case in_files is fed. - get_repos_response_with_code = function(org = NULL, - code, - in_files, - in_path, - raw_output, - verbose, - progress) { - rest_engine <- private$engines$rest - if (is.null(in_files)) { - repos_response <- rest_engine$get_repos_by_code( - org = org, - code = code, - in_path = in_path, - raw_output = raw_output, - verbose = verbose, - progress = progress + add_platform = function(repos_table) { + if (nrow(repos_table) > 0) { + dplyr::mutate( + repos_table, + platform = retrieve_platform(api_url) ) - } else { - repos_response <- purrr::map(in_files, function(filename) { - rest_engine$get_repos_by_code( - org = org, - code = code, - filename = filename, - in_path = in_path, - raw_output = raw_output, - verbose = verbose, - progress = progress - ) - }) %>% - purrr::list_flatten() } - return(repos_response) }, #' Add information on repository contributors. diff --git a/R/GitStats.R b/R/GitStats.R index 27ff5759..565e265d 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -449,23 +449,23 @@ GitStats <- R6::R6Class( }, #' @description Wrapper over pulling repositories by code. - #' @param package_name A character, name of the package. + #' @param packages A character vector, names of R packages to look for. #' @param only_loading A boolean, if `TRUE` function will check only if #' package is loaded in repositories, not used as dependencies. #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last #' result from its storage. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and #' printing output is switched off. - get_R_package_usage = function(package_name, + get_R_package_usage = function(packages, only_loading = FALSE, cache = TRUE, verbose = TRUE) { private$check_for_host() - if (is.null(package_name)) { - cli::cli_abort("You need to define `package_name`.", call = NULL) + if (is.null(packages)) { + cli::cli_abort("You need to define at least one `package_name`.", call = NULL) } args_list <- list( - "package_name" = package_name, + "packages" = packages, "only_loading" = only_loading ) trigger <- private$trigger_pulling( @@ -476,7 +476,7 @@ GitStats <- R6::R6Class( ) if (trigger) { R_package_usage <- private$get_R_package_usage_from_hosts( - package_name = package_name, + packages = packages, only_loading = only_loading, verbose = verbose ) @@ -487,14 +487,6 @@ GitStats <- R6::R6Class( attr_list = args_list ) private$save_to_storage(R_package_usage) - } else { - if (verbose) { - cli::cli_alert_warning( - cli::col_yellow( - "No usage of R packages found." - ) - ) - } } } else { R_package_usage <- private$get_from_storage( @@ -680,6 +672,7 @@ GitStats <- R6::R6Class( with_code, in_files = NULL, with_files, + output = "table_full", verbose = TRUE, progress = TRUE) { repos_table <- purrr::map(private$hosts, function(host) { @@ -689,6 +682,7 @@ GitStats <- R6::R6Class( add_contributors = add_contributors, with_code = with_code, in_files = in_files, + output = output, verbose = verbose, progress = progress ) @@ -697,6 +691,7 @@ GitStats <- R6::R6Class( host = host, add_contributors = add_contributors, with_files = with_files, + output = output, verbose = verbose, progress = progress ) @@ -709,8 +704,12 @@ GitStats <- R6::R6Class( } }) %>% purrr::list_rbind() %>% - private$add_stats_to_repos() %>% dplyr::as_tibble() + if (output == "table_full") { + repos_table <- repos_table %>% + private$add_stats_to_repos() %>% + dplyr::as_tibble() + } return(repos_table) }, @@ -719,6 +718,7 @@ GitStats <- R6::R6Class( add_contributors, with_code, in_files, + output, verbose, progress) { purrr::map(with_code, function(with_code) { @@ -726,6 +726,7 @@ GitStats <- R6::R6Class( add_contributors = add_contributors, with_code = with_code, in_files = in_files, + output = output, verbose = verbose, progress = progress ) @@ -737,12 +738,14 @@ GitStats <- R6::R6Class( get_repos_from_host_with_files = function(host, add_contributors, with_files, + output, verbose, progress) { purrr::map(with_files, function(with_file) { host$get_repos( add_contributors = add_contributors, with_file = with_file, + output = output, verbose = verbose, progress = progress ) @@ -951,36 +954,60 @@ GitStats <- R6::R6Class( }, # Pull information on package usage in a table form - get_R_package_usage_from_hosts = function(package_name, + get_R_package_usage_from_hosts = function(packages, only_loading, - verbose) { - if (!only_loading) { - repos_with_package_as_dependency <- private$get_R_package_as_dependency( + verbose = TRUE) { + packages_usage_tables <- purrr::map(packages, function(package_name) { + if (!only_loading) { + repos_with_package_as_dependency <- private$get_R_package_as_dependency( + package_name = package_name, + verbose = verbose + ) + } else { + repos_with_package_as_dependency <- NULL + } + repos_using_package <- private$get_R_package_loading( package_name = package_name, verbose = verbose ) - } else { - repos_with_package_as_dependency <- NULL - } - repos_using_package <- private$get_R_package_loading( - package_name = package_name, - verbose = verbose - ) - package_usage_table <- purrr::list_rbind( - list( - repos_with_package_as_dependency, - repos_using_package + package_usage_table <- purrr::list_rbind( + list( + repos_with_package_as_dependency, + repos_using_package + ) ) - ) - if (nrow(package_usage_table) > 0) { - duplicated_repos <- package_usage_table$api_url[duplicated(package_usage_table$api_url)] - package_usage_table <- package_usage_table[!duplicated(package_usage_table$api_url), ] - package_usage_table <- package_usage_table %>% - dplyr::mutate( - package_usage = ifelse(api_url %in% duplicated_repos, "import, library", package_usage) + if (nrow(package_usage_table) > 0) { + duplicated_repos <- package_usage_table$api_url[duplicated(package_usage_table$api_url)] + package_usage_table <- package_usage_table[!duplicated(package_usage_table$api_url), ] + package_usage_table <- package_usage_table %>% + dplyr::mutate( + package_usage = ifelse(api_url %in% duplicated_repos, "import, library", package_usage) + ) + package_usage_table <- dplyr::mutate( + package_usage_table, + package = package_name, + repo_fullname = paste0(organization, "/", repo_name) + ) %>% + dplyr::relocate( + package, package_usage, + .before = repo_id + ) %>% + dplyr::relocate( + repo_fullname, + .after = repo_id + ) + } + return(package_usage_table) + }) %>% + purrr::list_rbind() + if (nrow(packages_usage_tables) == 0 && verbose) { + cli::cli_alert_warning( + cli::col_yellow( + "No usage of R packages found." ) + ) } - return(package_usage_table) + return(packages_usage_tables) }, # Search repositories with `library(package_name)` in code blobs. @@ -994,14 +1021,13 @@ GitStats <- R6::R6Class( ) repos_using_package <- purrr::map(package_usage_phrases, ~ { repos_using_package <- private$get_repos_from_hosts( - with_code = ., - verbose = FALSE, - progress = FALSE + with_code = ., + output = "table_min", + verbose = FALSE, + progress = FALSE ) - if (!is.null(repos_using_package)) { + if (nrow(repos_using_package) > 0) { repos_using_package$package_usage <- "library" - repos_using_package <- repos_using_package %>% - dplyr::select(repo_name, organization, fullname, platform, repo_url, api_url, package_usage) } return(repos_using_package) }) %>% @@ -1017,16 +1043,15 @@ GitStats <- R6::R6Class( cli::cli_alert_info("Checking where [{package_name}] is used as a dependency...") } repos_with_package <- private$get_repos_from_hosts( - with_code = package_name, - in_files = c("DESCRIPTION", "NAMESPACE"), - verbose = FALSE, - progress = FALSE + with_code = package_name, + in_files = c("DESCRIPTION", "NAMESPACE"), + output = "table_min", + verbose = FALSE, + progress = FALSE ) - if (!is.null(repos_with_package)) { + if (nrow(repos_with_package) > 0) { repos_with_package <- repos_with_package[!duplicated(repos_with_package$api_url), ] repos_with_package$package_usage <- "import" - repos_with_package <- repos_with_package %>% - dplyr::select(repo_name, organization, fullname, platform, repo_url, api_url, package_usage) } return(repos_with_package) }, @@ -1043,8 +1068,7 @@ GitStats <- R6::R6Class( Sys.time(), last_activity_at, units = "days" - ) %>% round(2), - platform = retrieve_platform(api_url) + ) %>% round(2) ) %>% dplyr::relocate( organization, fullname, platform, repo_url, api_url, created_at, @@ -1207,13 +1231,13 @@ GitStats <- R6::R6Class( "commits" = "dates_range", "release_logs" = "dates_range", "users" = "logins", - "R_package_usage" = "package_name") + "R_package_usage" = "packages") attr_data <- attr(storage_table, storage_attr) attr_name <- switch(storage_attr, "file_path" = "files", "pattern" = "files matching pattern", "dates_range" = "date range", - "package_name" = "package", + "packages" = "packages", "logins" = "logins") if (length(attr_data) > 1) { separator <- if (storage_attr == "dates_range") { diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index a4cbd1c2..91a8f3a2 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -440,7 +440,7 @@ get_files_structure <- function(gitstats_object, #' loading package (`library(package)` and `require(package)` in all files) or #' using it as a dependency (`package` in `DESCRIPTION` and `NAMESPACE` files). #' @param gitstats_object A GitStats object. -#' @param package_name A character, name of the package. +#' @param packages A character vector, names of R packages to look for. #' @param only_loading A boolean, if `TRUE` function will check only if package #' is loaded in repositories, not used as dependencies. #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last @@ -460,12 +460,12 @@ get_files_structure <- function(gitstats_object, #' } #' @export get_R_package_usage <- function(gitstats_object, - package_name, + packages, only_loading = FALSE, cache = TRUE, verbose = is_verbose(gitstats_object)) { gitstats_object$get_R_package_usage( - package_name = package_name, + packages = packages, only_loading = only_loading, cache = cache, verbose = verbose diff --git a/inst/package_usage_workflow.R b/inst/package_usage_workflow.R new file mode 100644 index 00000000..26b5288f --- /dev/null +++ b/inst/package_usage_workflow.R @@ -0,0 +1,13 @@ +devtools::load_all(".") + +test_gitstats <- create_gitstats() %>% + set_github_host( + orgs = "openpharma" + ) + +get_R_package_usage(test_gitstats, packages = "no_such_package") + +get_R_package_usage( + test_gitstats, + packages = c("purrr", "shiny") +) diff --git a/man/get_R_package_usage.Rd b/man/get_R_package_usage.Rd index 759fac09..3fce37da 100644 --- a/man/get_R_package_usage.Rd +++ b/man/get_R_package_usage.Rd @@ -6,7 +6,7 @@ \usage{ get_R_package_usage( gitstats_object, - package_name, + packages, only_loading = FALSE, cache = TRUE, verbose = is_verbose(gitstats_object) @@ -15,7 +15,7 @@ get_R_package_usage( \arguments{ \item{gitstats_object}{A GitStats object.} -\item{package_name}{A character, name of the package.} +\item{packages}{A character vector, names of R packages to look for.} \item{only_loading}{A boolean, if \code{TRUE} function will check only if package is loaded in repositories, not used as dependencies.} diff --git a/tests/testthat/_snaps/01-get_repos-GitHub.md b/tests/testthat/_snaps/01-get_repos-GitHub.md index 72bd89bc..9ca42bbd 100644 --- a/tests/testthat/_snaps/01-get_repos-GitHub.md +++ b/tests/testthat/_snaps/01-get_repos-GitHub.md @@ -13,6 +13,14 @@ Message i Preparing repositories table... +# `prepare_repos_table()` prepares minimum version of repos table + + Code + gh_repos_by_code_table_min <- test_rest_github$prepare_repos_table(repos_list = test_mocker$ + use("gh_repos_by_code_tailored_min"), output = "table_min") + Message + i Preparing repositories table... + # `get_all_repos()` works as expected Code @@ -20,9 +28,12 @@ Message i [Host:GitHub][Engine:GraphQl][Scope:test-org] Pulling repositories... -# `get_repos_contributors()` adds contributors to repos table +# `get_repos_contributors()` works on GitHost level Code - gh_repos_by_code_table <- test_rest_github$get_repos_contributors(repos_table = test_mocker$ - use("gh_repos_by_code_table"), progress = FALSE) + gh_repos_with_contributors <- github_testhost_priv$get_repos_contributors( + repos_table = test_mocker$use("gh_repos_table_with_platform"), verbose = TRUE, + progress = FALSE) + Message + i [Host:GitHub][Engine:REST] Pulling contributors... diff --git a/tests/testthat/_snaps/get_usage_R_package.md b/tests/testthat/_snaps/get_usage_R_package.md index 69eef51f..bf264385 100644 --- a/tests/testthat/_snaps/get_usage_R_package.md +++ b/tests/testthat/_snaps/get_usage_R_package.md @@ -1,8 +1,8 @@ # when get_R_package_usage_from_hosts output is empty return warning Code - test_gitstats$get_R_package_usage(package_name = "shiny", only_loading = FALSE, - verbose = TRUE) + test_gitstats$get_R_package_usage_from_hosts(packages = "non-existing-package", + only_loading = FALSE, verbose = TRUE) Message ! No usage of R packages found. Output diff --git a/tests/testthat/helper-expect-tables.R b/tests/testthat/helper-expect-tables.R index 65fa3c79..74b555b9 100644 --- a/tests/testthat/helper-expect-tables.R +++ b/tests/testthat/helper-expect-tables.R @@ -4,13 +4,18 @@ repo_gitstats_colnames <- c( "languages", "issues_open", "issues_closed" ) -repo_host_colnames <- c('repo_id', 'repo_name', 'default_branch', 'stars', 'forks', - 'created_at', 'last_activity_at', 'languages', 'issues_open', - 'issues_closed', 'organization', 'repo_url') +repo_host_colnames <- c("repo_id", "repo_name", "default_branch", "stars", "forks", + "created_at", "last_activity_at", "languages", "issues_open", + "issues_closed", "organization", "repo_url") + +repo_min_colnames <- c("repo_id", "repo_name", "default_branch", + "created_at", "organization", "repo_url") expect_package_usage_table <- function(object, with_cols = NULL) { expect_s3_class(object, "data.frame") - expect_named(object, c('repo_name', 'organization', 'fullname', 'platform', 'repo_url', 'api_url', 'package_usage')) + expect_named(object, c("package", "package_usage", "repo_id", "repo_fullname", + "repo_name", "default_branch", "created_at", "organization", + "repo_url", "api_url", "platform")) expect_gt(nrow(object), 0) } diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index c4bc6da6..4e1856af 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -154,7 +154,7 @@ test_that("`get_repos_by_code()` returns repos output for code search in files", test_mocker$cache(gh_repos_by_code) }) -test_that("`get_repos_by_code()` for GitHub prepares a raw (raw_output = TRUE) search response", { +test_that("`get_repos_by_code()` for GitHub prepares a raw search response", { mockery::stub( test_rest_github$get_repos_by_code, "self$response", @@ -171,10 +171,10 @@ test_that("`get_repos_by_code()` for GitHub prepares a raw (raw_output = TRUE) s test_mocker$use("gh_mapped_repos") ) gh_repos_by_code_raw <- test_rest_github$get_repos_by_code( - code = "test_code", - org = "test_org", - raw_output = TRUE, - verbose = FALSE + code = "test_code", + org = "test_org", + output = "raw", + verbose = FALSE ) expect_gh_search_response(gh_repos_by_code_raw$items) test_mocker$cache(gh_repos_by_code_raw) @@ -202,6 +202,29 @@ test_that("GitHub tailors precisely `repos_list`", { test_mocker$cache(gh_repos_by_code_tailored) }) +test_that("GitHub tailors `repos_list` to minimal version of table", { + gh_repos_by_code <- test_mocker$use("gh_repos_by_code") + gh_repos_by_code_tailored_min <- + test_rest_github$tailor_repos_response( + repos_response = gh_repos_by_code, + output = "table_min" + ) + gh_repos_by_code_tailored_min %>% + expect_type("list") %>% + expect_length(length(gh_repos_by_code)) + expect_list_contains_only( + gh_repos_by_code_tailored_min[[1]], + c( + "repo_id", "repo_name", "created_at", "organization" + ) + ) + expect_lt( + length(gh_repos_by_code_tailored_min[[1]]), + length(gh_repos_by_code[[1]]) + ) + test_mocker$cache(gh_repos_by_code_tailored_min) +}) + test_that("`prepare_repos_table()` prepares repos table", { expect_snapshot( gh_repos_by_code_table <- test_rest_github$prepare_repos_table( @@ -211,10 +234,23 @@ test_that("`prepare_repos_table()` prepares repos table", { expect_repos_table( gh_repos_by_code_table ) - gh_repos_by_code_table <- github_testhost_priv$add_repo_api_url(gh_repos_by_code_table) test_mocker$cache(gh_repos_by_code_table) }) +test_that("`prepare_repos_table()` prepares minimum version of repos table", { + expect_snapshot( + gh_repos_by_code_table_min <- test_rest_github$prepare_repos_table( + repos_list = test_mocker$use("gh_repos_by_code_tailored_min"), + output = "table_min" + ) + ) + expect_repos_table( + gh_repos_by_code_table_min, + repo_cols = repo_min_colnames + ) + test_mocker$cache(gh_repos_by_code_table_min) +}) + test_that("`get_repos_issues()` adds issues to repos table", { mockery::stub( test_rest_github$get_repos_issues, @@ -242,7 +278,7 @@ test_that("`get_repos_issues()` adds issues to repos table", { test_that("`get_repos_with_code_from_orgs()` works", { mockery::stub( github_testhost_priv$get_repos_with_code_from_orgs, - "private$get_repos_response_with_code", + "rest_engine$get_repos_by_code", test_mocker$use("gh_repos_by_code") ) mockery::stub( @@ -250,11 +286,84 @@ test_that("`get_repos_with_code_from_orgs()` works", { "rest_engine$get_repos_issues", test_mocker$use("gh_repos_by_code_table") ) - repos_with_code <- github_testhost_priv$get_repos_with_code_from_orgs( + repos_with_code_from_orgs_full <- github_testhost_priv$get_repos_with_code_from_orgs( + code = "shiny", + output = "table_full", + verbose = FALSE + ) + expect_repos_table(repos_with_code_from_orgs_full) + test_mocker$cache(repos_with_code_from_orgs_full) +}) + +test_that("`get_repos_with_code_from_orgs()` pulls minimum version of table", { + mockery::stub( + github_testhost_priv$get_repos_with_code_from_orgs, + "rest_engine$get_repos_by_code", + test_mocker$use("gh_repos_by_code") + ) + mockery::stub( + github_testhost_priv$get_repos_with_code_from_orgs, + "rest_engine$prepare_repos_table", + test_mocker$use("gh_repos_by_code_table_min") + ) + repos_with_code_from_orgs_min <- github_testhost_priv$get_repos_with_code_from_orgs( code = "shiny", + output = "table_min", verbose = FALSE ) - expect_repos_table(repos_with_code, with_cols = "api_url") + expect_repos_table(repos_with_code_from_orgs_min, + repo_cols = repo_min_colnames) + test_mocker$cache(repos_with_code_from_orgs_min) +}) + +test_that("`get_repos_with_code_from_orgs()` pulls raw response", { + mockery::stub( + github_testhost_priv$get_repos_with_code_from_orgs, + "rest_engine$get_repos_by_code", + test_mocker$use("gh_repos_by_code_raw") + ) + repos_with_code_from_orgs_raw <- github_testhost_priv$get_repos_with_code_from_orgs( + code = "shiny", + output = "raw", + verbose = FALSE + ) + expect_type(repos_with_code_from_orgs_raw, "list") + expect_gt(length(repos_with_code_from_orgs_raw), 0) +}) + +test_that("get_repos_with_code() works", { + mockery::stub( + github_testhost_priv$get_repos_with_code, + "private$get_repos_with_code_from_orgs", + test_mocker$use("repos_with_code_from_orgs_full") + ) + github_repos_with_code <- github_testhost_priv$get_repos_with_code( + code = "test-code", + verbose = FALSE, + progress = FALSE + ) + expect_repos_table( + github_repos_with_code + ) +}) + +test_that("get_repos_with_code() works", { + mockery::stub( + github_testhost_priv$get_repos_with_code, + "private$get_repos_with_code_from_orgs", + test_mocker$use("repos_with_code_from_orgs_min") + ) + github_repos_with_code_min <- github_testhost_priv$get_repos_with_code( + code = "test-code", + output = "table_min", + verbose = FALSE, + progress = FALSE + ) + expect_repos_table( + github_repos_with_code_min, + repo_cols = repo_min_colnames + ) + test_mocker$cache(github_repos_with_code_min) }) test_that("GitHub prepares repos table from repositories response", { @@ -267,29 +376,40 @@ test_that("GitHub prepares repos table from repositories response", { test_mocker$cache(gh_repos_table) }) -test_that("GitHost adds `repo_api_url` column to GitHub repos table", { - repos_table <- test_mocker$use("gh_repos_table") - gh_repos_table_with_api_url <- github_testhost_priv$add_repo_api_url(repos_table) - expect_true(all(grepl("api.github.com", gh_repos_table_with_api_url$api_url))) - test_mocker$cache(gh_repos_table_with_api_url) -}) - test_that("`get_all_repos()` works as expected", { mockery::stub( github_testhost_priv$get_all_repos, "graphql_engine$prepare_repos_table", - test_mocker$use("gh_repos_table_with_api_url") + test_mocker$use("gh_repos_table") ) expect_snapshot( gh_repos_table <- github_testhost_priv$get_all_repos() ) expect_repos_table( - gh_repos_table, - with_cols = "api_url" + gh_repos_table ) test_mocker$cache(gh_repos_table) }) +test_that("GitHost adds `repo_api_url` column to GitHub repos table", { + repos_table <- test_mocker$use("gh_repos_table") + gh_repos_table_with_api_url <- github_testhost_priv$add_repo_api_url(repos_table) + expect_true(all(grepl("api.github.com", gh_repos_table_with_api_url$api_url))) + test_mocker$cache(gh_repos_table_with_api_url) +}) + +test_that("add_platform adds data on Git platform to repos table", { + gh_repos_table_with_platform <- github_testhost_priv$add_platform( + repos_table = test_mocker$use("gh_repos_table_with_api_url") + ) + expect_repos_table( + gh_repos_table_with_platform, + with_cols = c("api_url", "platform") + ) + test_mocker$cache(gh_repos_table_with_platform) +}) + + test_that("get_contributors_from_repo", { mockery::stub( test_rest_github_priv$get_contributors_from_repo, @@ -311,19 +431,100 @@ test_that("`get_repos_contributors()` adds contributors to repos table", { "private$get_contributors_from_repo", test_mocker$use("github_contributors") ) + gh_repos_with_contributors <- test_rest_github$get_repos_contributors( + repos_table = test_mocker$use("gh_repos_table_with_platform"), + progress = FALSE + ) + expect_repos_table( + gh_repos_with_contributors, + with_cols = c("api_url", "platform", "contributors") + ) + expect_gt( + length(gh_repos_with_contributors$contributors), + 0 + ) + test_mocker$cache(gh_repos_with_contributors) +}) + +test_that("`get_repos_contributors()` works on GitHost level", { + mockery::stub( + github_testhost_priv$get_repos_contributors, + "rest_engine$get_repos_contributors", + test_mocker$use("gh_repos_with_contributors") + ) expect_snapshot( - gh_repos_by_code_table <- test_rest_github$get_repos_contributors( - repos_table = test_mocker$use("gh_repos_by_code_table"), + gh_repos_with_contributors <- github_testhost_priv$get_repos_contributors( + repos_table = test_mocker$use("gh_repos_table_with_platform"), + verbose = TRUE, progress = FALSE ) ) expect_repos_table( - gh_repos_by_code_table, - with_cols = c("api_url", "contributors") + gh_repos_with_contributors, + with_cols = c("api_url", "platform", "contributors") ) expect_gt( - length(gh_repos_by_code_table$contributors), + length(gh_repos_with_contributors$contributors), 0 ) - test_mocker$cache(gh_repos_by_code_table) + test_mocker$cache(gh_repos_with_contributors) +}) + +test_that("`get_repos()` works as expected", { + mockery::stub( + github_testhost$get_repos, + "private$get_all_repos", + test_mocker$use("gh_repos_table") + ) + gh_repos_table <- github_testhost$get_repos( + add_contributors = FALSE, + verbose = FALSE + ) + expect_repos_table( + gh_repos_table, + with_cols = c("api_url", "platform") + ) + test_mocker$cache(gh_repos_table) +}) + +test_that("`get_repos()` works as expected", { + mockery::stub( + github_testhost$get_repos, + "private$get_all_repos", + test_mocker$use("gh_repos_table") + ) + mockery::stub( + github_testhost$get_repos, + "private$get_repos_contributors", + test_mocker$use("gh_repos_with_contributors") + ) + gh_repos_table_full <- github_testhost$get_repos( + add_contributors = TRUE, + verbose = FALSE + ) + expect_repos_table( + gh_repos_table_full, + with_cols = c("api_url", "platform", "contributors") + ) + test_mocker$cache(gh_repos_table_full) +}) + +test_that("`get_repos()` pulls table in minimalist version", { + mockery::stub( + github_testhost$get_repos, + "private$get_repos_with_code", + test_mocker$use("github_repos_with_code_min") + ) + gh_repos_table_min <- github_testhost$get_repos( + add_contributors = FALSE, + with_code = "test_code", + output = "table_min", + verbose = FALSE + ) + expect_repos_table( + gh_repos_table_min, + repo_cols = repo_min_colnames, + with_cols = c("api_url", "platform") + ) + test_mocker$cache(gh_repos_table_min) }) diff --git a/tests/testthat/test-01-get_repos-GitStats.R b/tests/testthat/test-01-get_repos-GitStats.R index 10fab878..20d4f34c 100644 --- a/tests/testthat/test-01-get_repos-GitStats.R +++ b/tests/testthat/test-01-get_repos-GitStats.R @@ -3,11 +3,12 @@ test_that("get_repos_from_hosts works", { test_gitstats_priv$get_repos_from_hosts, "host$get_repos", purrr::list_rbind(list( - test_mocker$use("gh_repos_table_with_api_url"), + test_mocker$use("gh_repos_table_full"), test_mocker$use("gl_repos_table_with_api_url") )) ) - repos_table <- test_gitstats_priv$get_repos_from_hosts( + repos_from_hosts <- test_gitstats_priv$get_repos_from_hosts( + add_contributors = TRUE, with_code = NULL, in_files = NULL, with_files = NULL, @@ -15,38 +16,39 @@ test_that("get_repos_from_hosts works", { progress = FALSE ) expect_repos_table( - repos_table, - repo_cols = repo_gitstats_colnames + repos_from_hosts, + repo_cols = repo_gitstats_colnames, + with_cols = c("contributors", "contributors_n") ) + test_mocker$cache(repos_from_hosts) }) -test_that("get_repos_from_hosts with_code works", { +test_that("get_repos_from_hosts pulls table in minimalist version", { mockery::stub( test_gitstats_priv$get_repos_from_hosts, - "private$get_repos_from_host_with_code", - purrr::list_rbind( - list(test_mocker$use("gh_repos_by_code_table"), - test_mocker$use("gl_repos_by_code_table")) - ) + "host$get_repos", + test_mocker$use("gh_repos_table_min") ) - repos_table <- test_gitstats_priv$get_repos_from_hosts( - with_code = "shiny", - in_files = "DESCRIPTION", + repos_from_hosts_min <- test_gitstats_priv$get_repos_from_hosts( + add_contributors = TRUE, + with_code = NULL, + in_files = NULL, with_files = NULL, - verbose = FALSE, - progress = FALSE + output = "table_min", + verbose = FALSE, + progress = FALSE ) expect_repos_table( - repos_table, - repo_cols = repo_gitstats_colnames, - with_cols = c("contributors", "contributors_n") + repos_from_hosts_min, + repo_cols = repo_min_colnames, + with_cols = c("api_url", "platform") ) - test_mocker$cache(repos_table) + test_mocker$cache(repos_from_hosts_min) }) test_that("set_object_class for repos_table works correctly", { repos_table <- test_gitstats_priv$set_object_class( - object = test_mocker$use("repos_table"), + object = test_mocker$use("repos_from_hosts"), class = "repos_table", attr_list = list( "with_code" = NULL, diff --git a/tests/testthat/test-get_usage_R_package.R b/tests/testthat/test-get_usage_R_package.R index 025c923f..71b4e74d 100644 --- a/tests/testthat/test-get_usage_R_package.R +++ b/tests/testthat/test-get_usage_R_package.R @@ -2,7 +2,7 @@ test_that("get_R_package_as_dependency work correctly", { mockery::stub( test_gitstats_priv$get_R_package_as_dependency, "private$get_repos_from_hosts", - test_mocker$use("repos_table") + test_mocker$use("repos_from_hosts_min") ) R_package_as_dependency <- test_gitstats_priv$get_R_package_as_dependency( package_name = "shiny", @@ -19,6 +19,27 @@ test_that("get_R_package_as_dependency work correctly", { test_mocker$cache(R_package_as_dependency) }) +test_that("get_R_package_loading work correctly", { + mockery::stub( + test_gitstats_priv$get_R_package_loading, + "private$get_repos_from_hosts", + test_mocker$use("repos_from_hosts_min") + ) + R_package_loading <- test_gitstats_priv$get_R_package_loading( + package_name = "purrr", + verbose = FALSE + ) + expect_s3_class( + R_package_loading, + "data.frame" + ) + expect_gt( + nrow(R_package_loading), + 0 + ) + test_mocker$cache(R_package_loading) +}) + test_that("get_R_package_usage_from_hosts works as expected", { test_gitstats <- create_test_gitstats(hosts = 2, priv_mode = TRUE) mockery::stub( @@ -29,25 +50,51 @@ test_that("get_R_package_usage_from_hosts works as expected", { mockery::stub( test_gitstats$get_R_package_usage_from_hosts, "private$get_R_package_loading", - test_mocker$use("R_package_as_dependency") + test_mocker$use("R_package_loading") ) R_package_usage_table <- test_gitstats$get_R_package_usage_from_hosts( - package_name = "shiny", only_loading = FALSE, verbose = FALSE + packages = c("shiny", "purrr"), + only_loading = FALSE, + verbose = FALSE ) expect_package_usage_table(R_package_usage_table) test_mocker$cache(R_package_usage_table) }) test_that("when get_R_package_usage_from_hosts output is empty return warning", { - test_gitstats <- create_test_gitstats(hosts = 2) + test_gitstats <- create_test_gitstats(hosts = 2, priv_mode = TRUE) mockery::stub( - test_gitstats$get_R_package_usage, - "private$get_R_package_usage_from_hosts", + test_gitstats$get_R_package_usage_from_hosts, + "private$get_R_package_loading", + data.frame() + ) + mockery::stub( + test_gitstats$get_R_package_usage_from_hosts, + "private$get_R_package_as_dependency", data.frame() ) expect_snapshot( - test_gitstats$get_R_package_usage( - package_name = "shiny", only_loading = FALSE, verbose = TRUE + test_gitstats$get_R_package_usage_from_hosts( + packages = "non-existing-package", + only_loading = FALSE, + verbose = TRUE ) ) }) + +test_that("get_R_package_usage works", { + mockery::stub( + test_gitstats$get_R_package_usage, + "private$get_R_package_usage_from_hosts", + test_mocker$use("R_package_usage_table") + ) + R_package_usage_table <- test_gitstats$get_R_package_usage( + packages = c("shiny", "purrr"), + verbose = FALSE + ) + expect_package_usage_table(R_package_usage_table) + expect_s3_class( + R_package_usage_table, + "R_package_usage" + ) +}) diff --git a/vignettes/get_repos_with_code.Rmd b/vignettes/get_repos_with_code.Rmd index 65c28e0b..163223bf 100644 --- a/vignettes/get_repos_with_code.Rmd +++ b/vignettes/get_repos_with_code.Rmd @@ -56,11 +56,11 @@ repos_urls <- get_repos_urls( ## Package usage -`GitStats` allows you to search for repositories which make use of certain R packages. This function scans repositories if they import packages (look for package name in `DESCRIPTION` or `NAMESPACE`) or if the load package with `library()` or `request()`. +`GitStats` allows you to search for repositories which make use of certain R packages. This function scans repositories if they import packages (look for package name in `DESCRIPTION` or `NAMESPACE`) or if they load package with `library()` or `request()`. ```{r, eval = FALSE} package_usage <- get_R_package_usage( gitstats_object = github_stats, - package_name = "shiny" + packages = c("shiny", "purrr") ) ``` From 8484f15b39092688619843ce1703401ed0add778 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 17 Oct 2024 08:28:23 +0000 Subject: [PATCH 2/5] Add split_output parameter, update example in docs. --- NEWS.md | 1 + R/GitStats.R | 39 +++++++++++++++++------ R/gitstats_functions.R | 19 ++++++++--- inst/package_usage_workflow.R | 6 ++++ man/get_R_package_usage.Rd | 19 ++++++++--- tests/testthat/test-get_usage_R_package.R | 22 +++++++++++++ vignettes/get_repos_with_code.Rmd | 3 +- 7 files changed, 90 insertions(+), 19 deletions(-) diff --git a/NEWS.md b/NEWS.md index 911cb248..e2f3fc3e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ - Optimized `get_R_package_usage()` function: - you can now pass a vector of packages names (new `packages` parameter replacing old `package_name`) ([#494](https://github.com/r-world-devs/GitStats/issues/494)), - on the other hand, output of the function has been limited to contain only most necessary data (removing all repository stats), making thus process of obtaining package usage faster ([#474](https://github.com/r-world-devs/GitStats/issues/474)). + - new `split_output` parameter has been added - when set to `TRUE` a list with tibbles (every element of the list for every package) instead of one tibble is returned. - Added possibility to get repositories for individual users with `get_repos()` ([#492](https://github.com/r-world-devs/GitStats/issues/492)). Earlier this was only possible for GitHub organizations and GitLab groups. ## Fixes: diff --git a/R/GitStats.R b/R/GitStats.R index 565e265d..ca14f603 100644 --- a/R/GitStats.R +++ b/R/GitStats.R @@ -452,12 +452,17 @@ GitStats <- R6::R6Class( #' @param packages A character vector, names of R packages to look for. #' @param only_loading A boolean, if `TRUE` function will check only if #' package is loaded in repositories, not used as dependencies. + #' @param split_output Optional, a boolean. If `TRUE` will return a list of + #' tables, where every element of the list stands for the package passed to + #' `packages` parameter. If `FALSE`, will return only one table with name of + #' the package stored in first column. #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last #' result from its storage. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and #' printing output is switched off. get_R_package_usage = function(packages, only_loading = FALSE, + split_output = FALSE, cache = TRUE, verbose = TRUE) { private$check_for_host() @@ -478,9 +483,11 @@ GitStats <- R6::R6Class( R_package_usage <- private$get_R_package_usage_from_hosts( packages = packages, only_loading = only_loading, + split_output = split_output, verbose = verbose ) - if (nrow(R_package_usage) > 0) { + if ((!split_output && nrow(R_package_usage) > 0) || + (split_output && any(purrr::map_lgl(R_package_usage, ~ nrow(.) > 0)))) { R_package_usage <- private$set_object_class( object = R_package_usage, class = "R_package_usage", @@ -956,8 +963,9 @@ GitStats <- R6::R6Class( # Pull information on package usage in a table form get_R_package_usage_from_hosts = function(packages, only_loading, + split_output = FALSE, verbose = TRUE) { - packages_usage_tables <- purrr::map(packages, function(package_name) { + packages_usage_list <- purrr::map(packages, function(package_name) { if (!only_loading) { repos_with_package_as_dependency <- private$get_R_package_as_dependency( package_name = package_name, @@ -998,16 +1006,27 @@ GitStats <- R6::R6Class( ) } return(package_usage_table) - }) %>% - purrr::list_rbind() - if (nrow(packages_usage_tables) == 0 && verbose) { - cli::cli_alert_warning( - cli::col_yellow( - "No usage of R packages found." + }) + if (split_output) { + packages_usage_result <- purrr::set_names(packages_usage_list, packages) + if (all(purrr::map_lgl(packages_usage_result, ~ nrow(.) == 0)) && verbose) { + cli::cli_alert_warning( + cli::col_yellow( + "No usage of R packages found." + ) ) - ) + } + } else { + packages_usage_result <- purrr::list_rbind(packages_usage_list) + if (nrow(packages_usage_result) == 0 && verbose) { + cli::cli_alert_warning( + cli::col_yellow( + "No usage of R packages found." + ) + ) + } } - return(packages_usage_tables) + return(packages_usage_result) }, # Search repositories with `library(package_name)` in code blobs. diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index 91a8f3a2..f693041b 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -438,15 +438,20 @@ get_files_structure <- function(gitstats_object, #' @name get_R_package_usage #' @description Wrapper over searching repositories by code blobs related to #' loading package (`library(package)` and `require(package)` in all files) or -#' using it as a dependency (`package` in `DESCRIPTION` and `NAMESPACE` files). +#' using it as a dependency (`package` in `DESCRIPTION` and `NAMESPACE` +#' files). #' @param gitstats_object A GitStats object. #' @param packages A character vector, names of R packages to look for. #' @param only_loading A boolean, if `TRUE` function will check only if package #' is loaded in repositories, not used as dependencies. +#' @param split_output Optional, a boolean. If `TRUE` will return a list of +#' tables, where every element of the list stands for the package passed to +#' `packages` parameter. If `FALSE`, will return only one table with name of +#' the package stored in first column. #' @param cache A logical, if set to `TRUE` GitStats will retrieve the last #' result from its storage. -#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and -#' printing output is switched off. +#' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing +#' output is switched off. #' @return A data.frame. #' @examples #' \dontrun{ @@ -456,17 +461,23 @@ get_files_structure <- function(gitstats_object, #' orgs = c("r-world-devs", "openpharma") #' ) #' -#' get_R_package_usage(my_gitstats, "Shiny") +#' get_R_package_usage( +#' gitstats_object = my_gitstats, +#' packages = c("purrr", "shiny"), +#' split_output = TRUE +#' ) #' } #' @export get_R_package_usage <- function(gitstats_object, packages, only_loading = FALSE, + split_output = FALSE, cache = TRUE, verbose = is_verbose(gitstats_object)) { gitstats_object$get_R_package_usage( packages = packages, only_loading = only_loading, + split_output = split_output, cache = cache, verbose = verbose ) diff --git a/inst/package_usage_workflow.R b/inst/package_usage_workflow.R index 26b5288f..d7cf27e8 100644 --- a/inst/package_usage_workflow.R +++ b/inst/package_usage_workflow.R @@ -11,3 +11,9 @@ get_R_package_usage( test_gitstats, packages = c("purrr", "shiny") ) + +get_R_package_usage( + test_gitstats, + packages = c("dplyr", "shiny"), + split_output = TRUE +) diff --git a/man/get_R_package_usage.Rd b/man/get_R_package_usage.Rd index 3fce37da..e19087b1 100644 --- a/man/get_R_package_usage.Rd +++ b/man/get_R_package_usage.Rd @@ -8,6 +8,7 @@ get_R_package_usage( gitstats_object, packages, only_loading = FALSE, + split_output = FALSE, cache = TRUE, verbose = is_verbose(gitstats_object) ) @@ -20,11 +21,16 @@ get_R_package_usage( \item{only_loading}{A boolean, if \code{TRUE} function will check only if package is loaded in repositories, not used as dependencies.} +\item{split_output}{Optional, a boolean. If \code{TRUE} will return a list of +tables, where every element of the list stands for the package passed to +\code{packages} parameter. If \code{FALSE}, will return only one table with name of +the package stored in first column.} + \item{cache}{A logical, if set to \code{TRUE} GitStats will retrieve the last result from its storage.} -\item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and -printing output is switched off.} +\item{verbose}{A logical, \code{TRUE} by default. If \code{FALSE} messages and printing +output is switched off.} } \value{ A data.frame. @@ -32,7 +38,8 @@ A data.frame. \description{ Wrapper over searching repositories by code blobs related to loading package (\code{library(package)} and \code{require(package)} in all files) or -using it as a dependency (\code{package} in \code{DESCRIPTION} and \code{NAMESPACE} files). +using it as a dependency (\code{package} in \code{DESCRIPTION} and \code{NAMESPACE} +files). } \examples{ \dontrun{ @@ -42,6 +49,10 @@ using it as a dependency (\code{package} in \code{DESCRIPTION} and \code{NAMESPA orgs = c("r-world-devs", "openpharma") ) - get_R_package_usage(my_gitstats, "Shiny") + get_R_package_usage( + gitstats_object = my_gitstats, + packages = c("purrr", "shiny"), + split_output = TRUE + ) } } diff --git a/tests/testthat/test-get_usage_R_package.R b/tests/testthat/test-get_usage_R_package.R index 71b4e74d..050cd2de 100644 --- a/tests/testthat/test-get_usage_R_package.R +++ b/tests/testthat/test-get_usage_R_package.R @@ -61,6 +61,28 @@ test_that("get_R_package_usage_from_hosts works as expected", { test_mocker$cache(R_package_usage_table) }) +test_that("get_R_package_usage_from_hosts with split_output works", { + test_gitstats <- create_test_gitstats(hosts = 2, priv_mode = TRUE) + mockery::stub( + test_gitstats$get_R_package_usage_from_hosts, + "private$get_R_package_as_dependency", + test_mocker$use("R_package_as_dependency") + ) + mockery::stub( + test_gitstats$get_R_package_usage_from_hosts, + "private$get_R_package_loading", + test_mocker$use("R_package_loading") + ) + R_package_usage_list <- test_gitstats$get_R_package_usage_from_hosts( + packages = c("shiny", "purrr"), + only_loading = FALSE, + split_output = TRUE, + verbose = FALSE + ) + expect_equal(names(R_package_usage_list), c("shiny", "purrr")) + purrr::walk(R_package_usage_list, expect_package_usage_table) +}) + test_that("when get_R_package_usage_from_hosts output is empty return warning", { test_gitstats <- create_test_gitstats(hosts = 2, priv_mode = TRUE) mockery::stub( diff --git a/vignettes/get_repos_with_code.Rmd b/vignettes/get_repos_with_code.Rmd index 163223bf..71c58a26 100644 --- a/vignettes/get_repos_with_code.Rmd +++ b/vignettes/get_repos_with_code.Rmd @@ -61,6 +61,7 @@ repos_urls <- get_repos_urls( ```{r, eval = FALSE} package_usage <- get_R_package_usage( gitstats_object = github_stats, - packages = c("shiny", "purrr") + packages = c("shiny", "purrr"), + split_output = TRUE ) ``` From b257b8bcc38f374099cdbb2d9a67ecf6440e7335 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 17 Oct 2024 09:02:52 +0000 Subject: [PATCH 3/5] Adjust and test getting repos for whole hosts. --- R/GitHost.R | 32 ++++++++++++++++------- tests/testthat/test-01-get_repos-GitHub.R | 17 ++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/R/GitHost.R b/R/GitHost.R index 4717d5d2..9d7015e4 100644 --- a/R/GitHost.R +++ b/R/GitHost.R @@ -720,16 +720,29 @@ GitHost <- R6::R6Class( information = "Pulling repositories" ) } - repos_response <- private$get_repos_response_with_code( - code = code, - in_files = in_files, - in_path = in_path, - output = output, - verbose = verbose, - progress = progress - ) + rest_engine <- private$engines$rest + if (is.null(in_files)) { + repos_response <- rest_engine$get_repos_by_code( + code = code, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress + ) + } else { + repos_response <- purrr::map(in_files, function(filename) { + rest_engine$get_repos_by_code( + code = code, + filename = filename, + in_path = in_path, + output = output, + verbose = verbose, + progress = progress + ) + }) %>% + purrr::list_flatten() + } if (output != "raw") { - rest_engine <- private$engines$rest repos_table <- repos_response %>% rest_engine$tailor_repos_response( output = output @@ -792,7 +805,6 @@ GitHost <- R6::R6Class( purrr::list_flatten() } if (output != "raw") { - rest_engine <- private$engines$rest repos_table <- repos_response %>% rest_engine$tailor_repos_response( output = output diff --git a/tests/testthat/test-01-get_repos-GitHub.R b/tests/testthat/test-01-get_repos-GitHub.R index 4e1856af..6111ee36 100644 --- a/tests/testthat/test-01-get_repos-GitHub.R +++ b/tests/testthat/test-01-get_repos-GitHub.R @@ -324,6 +324,7 @@ test_that("`get_repos_with_code_from_orgs()` pulls raw response", { ) repos_with_code_from_orgs_raw <- github_testhost_priv$get_repos_with_code_from_orgs( code = "shiny", + in_files = c("DESCRIPTION", "NAMESPACE"), output = "raw", verbose = FALSE ) @@ -331,6 +332,22 @@ test_that("`get_repos_with_code_from_orgs()` pulls raw response", { expect_gt(length(repos_with_code_from_orgs_raw), 0) }) +test_that("`get_repos_with_code_from_host()` pulls raw response", { + mockery::stub( + github_testhost_priv$get_repos_with_code_from_host, + "rest_engine$get_repos_by_code", + test_mocker$use("gh_repos_by_code_raw") + ) + repos_with_code_from_host_raw <- github_testhost_priv$get_repos_with_code_from_host( + code = "shiny", + in_files = c("DESCRIPTION", "NAMESPACE"), + output = "raw", + verbose = FALSE + ) + expect_type(repos_with_code_from_host_raw, "list") + expect_gt(length(repos_with_code_from_host_raw), 0) +}) + test_that("get_repos_with_code() works", { mockery::stub( github_testhost_priv$get_repos_with_code, From 1dc7a055972f269267fc4723aea5e343dc876b97 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 17 Oct 2024 10:19:36 +0000 Subject: [PATCH 4/5] Fix spellcheck. --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e2f3fc3e..96b57bae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ - Optimized `get_R_package_usage()` function: - you can now pass a vector of packages names (new `packages` parameter replacing old `package_name`) ([#494](https://github.com/r-world-devs/GitStats/issues/494)), - on the other hand, output of the function has been limited to contain only most necessary data (removing all repository stats), making thus process of obtaining package usage faster ([#474](https://github.com/r-world-devs/GitStats/issues/474)). - - new `split_output` parameter has been added - when set to `TRUE` a list with tibbles (every element of the list for every package) instead of one tibble is returned. + - new `split_output` parameter has been added - when set to `TRUE` a `list` with `tibbles` (every element of the `list` for every package) instead of one `tibble` is returned. - Added possibility to get repositories for individual users with `get_repos()` ([#492](https://github.com/r-world-devs/GitStats/issues/492)). Earlier this was only possible for GitHub organizations and GitLab groups. ## Fixes: From 498eb4e0a46f7514da6b011b0a73649456fb9c46 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Thu, 17 Oct 2024 10:22:51 +0000 Subject: [PATCH 5/5] Update docs. --- R/gitstats_functions.R | 3 ++- man/get_R_package_usage.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/gitstats_functions.R b/R/gitstats_functions.R index f693041b..54c16d07 100644 --- a/R/gitstats_functions.R +++ b/R/gitstats_functions.R @@ -452,7 +452,8 @@ get_files_structure <- function(gitstats_object, #' result from its storage. #' @param verbose A logical, `TRUE` by default. If `FALSE` messages and printing #' output is switched off. -#' @return A data.frame. +#' @return A `tibble` or `list` of `tibbles` depending on `split_output` +#' parameter. #' @examples #' \dontrun{ #' my_gitstats <- create_gitstats() %>% diff --git a/man/get_R_package_usage.Rd b/man/get_R_package_usage.Rd index e19087b1..9e56c0e5 100644 --- a/man/get_R_package_usage.Rd +++ b/man/get_R_package_usage.Rd @@ -33,7 +33,8 @@ result from its storage.} output is switched off.} } \value{ -A data.frame. +A \code{tibble} or \code{list} of \code{tibbles} depending on \code{split_output} +parameter. } \description{ Wrapper over searching repositories by code blobs related to