-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
gutenberg_get_all_mirrors()
(#68)
* Separate mirror functions * Add `gutenberg_get_all_mirrors`
- Loading branch information
1 parent
4443a05
commit 2a73b8e
Showing
10 changed files
with
188 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#' Get the recommended mirror for Gutenberg files | ||
#' | ||
#' Get the recommended mirror for Gutenberg files and set the global | ||
#' `gutenberg_mirror` options. | ||
#' | ||
#' @param verbose Whether to show messages about the Project Gutenberg mirror | ||
#' that was chosen | ||
#' | ||
#' @return A character vector with the url for the chosen mirror. | ||
#' | ||
#' @examplesIf interactive() | ||
#' | ||
#' gutenberg_get_mirror() | ||
#' | ||
#' @export | ||
gutenberg_get_mirror <- function(verbose = TRUE) { | ||
mirror <- getOption("gutenberg_mirror") | ||
if (!is.null(mirror)) { | ||
return(mirror) | ||
} | ||
|
||
# figure out the mirror for this location from wget | ||
harvest_url <- "https://www.gutenberg.org/robot/harvest" | ||
maybe_message( | ||
verbose, | ||
"Determining mirror for Project Gutenberg from {harvest_url}." | ||
) | ||
wget_url <- glue::glue("{harvest_url}?filetypes[]=txt") | ||
lines <- read_url(wget_url) | ||
a <- stringr::str_subset(lines, stringr::fixed("<a href="))[1] | ||
mirror_full_url <- stringr::str_match(a, "href=\"(.*?)\"")[2] | ||
|
||
# parse and leave out the path | ||
parsed <- urltools::url_parse(mirror_full_url) | ||
if (parsed$domain == "www.gutenberg.lib.md.us") { | ||
# Broken mirror. PG has been contacted. For now, replace: | ||
parsed$domain <- "aleph.gutenberg.org" # nocov | ||
} | ||
|
||
mirror <- unclass(glue::glue_data(parsed, "{scheme}://{domain}")) | ||
maybe_message(verbose, "Using mirror {mirror}.") | ||
|
||
# set option for next time | ||
options(gutenberg_mirror = mirror) | ||
return(mirror) | ||
} | ||
|
||
|
||
#' Get all mirror data from Project Gutenberg | ||
#' | ||
#' Get all the mirror data from \url{https://www.gutenberg.org/MIRRORS.ALL} | ||
#' | ||
#' @return A tbl_df of Project Gutenberg mirrors and related data | ||
#' \describe{ | ||
#' | ||
#' \item{continent}{Continent where the mirror is located} | ||
#' | ||
#' \item{nation}{Nation where the mirror is located} | ||
#' | ||
#' \item{location}{Location of the mirror} | ||
#' | ||
#' \item{provider}{Provider of the mirror} | ||
#' | ||
#' \item{url}{URL of the mirror} | ||
#' | ||
#' \item{note}{Special notes} | ||
#' } | ||
#' @examplesIf interactive() | ||
#' | ||
#' gutenberg_get_all_mirrors() | ||
#' | ||
#' @export | ||
gutenberg_get_all_mirrors <- function() { | ||
mirrors_url <- "https://www.gutenberg.org/MIRRORS.ALL" | ||
mirrors_md <- read_url(mirrors_url) | ||
tmp <- tempfile(fileext = ".md") | ||
writeLines(mirrors_md, tmp) | ||
mirrors <- suppressWarnings( | ||
readr::read_delim( | ||
tmp, | ||
delim = "|", | ||
trim_ws = TRUE | ||
) |> | ||
dplyr::slice(2:(dplyr::n() - 1)) | ||
) | ||
|
||
return(mirrors) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
continent | nation | location | provider | url | note | ||
---------------+---------------+---------------------+----------------------------------------------+------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------- | ||
Europe | Great Britain | Kent | UK Mirror Service | http://www.mirrorservice.org/sites/ftp.ibiblio.org/pub/docs/books/gutenberg/ | | ||
Europe | Great Britain | Kent | UK Mirror Service | ftp://ftp.mirrorservice.org/sites/ftp.ibiblio.org/pub/docs/books/gutenberg/ | | ||
Europe | Great Britain | Kent | UK Mirror Service | rsync://rsync.mirrorservice.org/gutenberg/ | | ||
Europe | Portugal | Braga | Universidade do Minho | http://eremita.di.uminho.pt/gutenberg/ | | ||
Europe | Portugal | Braga | Universidade do Minho | ftp://eremita.di.uminho.pt/pub/gutenberg/ | | ||
North America | Canada | Waterloo | University of Waterloo Computer Science Club | http://mirror.csclub.uwaterloo.ca/gutenberg/ | | ||
North America | United States | Buffalo, NY | Jake Nabasny | https://gutenberg.nabasny.com/ | | ||
North America | United States | Chapel Hill | iBiblio | https://www.gutenberg.org/dirs/ | Main Project Gutenberg Collection Site | ||
North America | United States | Chapel Hill | iBiblio | ftp://ftp.ibiblio.org/pub/docs/books/gutenberg/ | Main Project Gutenberg FTP Site. | ||
North America | United States | Pikeville, Kentucky | SandyRiver.NET | https://mirror2.sandyriver.net/pub/gutenberg | High speed mirror on a 10Gb network connection. Also available by http, and by rsync to rsync://mirror2.sandyriver.net/pub/gutenberg | ||
North America | United States | Salt Lake City | Xmission ISP - FTP | ftp://mirrors.xmission.com/gutenberg/ | | ||
North America | United States | Salt Lake City | Xmission ISP - HTTP | http://mirrors.xmission.com/gutenberg/ | | ||
North America | United States | San Diego | Project Gutenberg | ftp://gutenberg.pglaf.org | High-speed mirror. Includes cache/generated files (epub, mobi, etc.). | ||
North America | United States | San Diego | Project Gutenberg | https://aleph.gutenberg.org/ | High-speed mirror. Includes cache/generated files (epub, mobi, etc.). Also available via rsync and ftp. | ||
North America | United States | San Diego | Project Gutenberg | https://gutenberg.pglaf.org/ | High-speed mirror. Includes cache/generated files (epub, mobi, etc.). | ||
North America | United States | San Diego | Project Gutenberg | gopher://gopher.pglaf.org/ | Gopher server. | ||
North America | United States | San Diego | Project Gutenberg | rsync://gutenberg.pglaf.org/gutenberg | High-speed mirror. Includes cache/generated files (epub, mobi, etc.). | ||
(17 rows) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
test_that("gutenberg_get_mirror works with no option set", { | ||
local_dl_and_read() | ||
withr::local_options(gutenberg_mirror = NULL) | ||
expect_message( | ||
expect_identical( | ||
gutenberg_get_mirror(), | ||
"http://aleph.gutenberg.org" | ||
), | ||
"Determining mirror" | ||
) | ||
expect_no_message( | ||
expect_identical( | ||
gutenberg_get_mirror(), | ||
"http://aleph.gutenberg.org" | ||
) | ||
) | ||
}) | ||
|
||
test_that("gutenberg_get_mirror respects verbose", { | ||
local_dl_and_read() | ||
withr::local_options(gutenberg_mirror = NULL) | ||
expect_no_message(gutenberg_get_mirror(verbose = FALSE)) | ||
}) | ||
|
||
test_that("gutenberg_get_mirror uses existing option", { | ||
local_dl_and_read() | ||
withr::local_options(gutenberg_mirror = "mirror") | ||
expect_identical( | ||
gutenberg_get_mirror(), "mirror" | ||
) | ||
}) | ||
|
||
test_that("gutenberg_get_all_mirrors works", { | ||
local_dl_and_read() | ||
mirrors <- gutenberg_get_all_mirrors() | ||
expect_true(inherits(mirrors, "data.frame")) | ||
expect_true(inherits(mirrors, "tbl_df")) | ||
expect_equal(ncol(mirrors), 6) | ||
expect_true(nrow(mirrors) > 10) | ||
}) |