From 07d3976910c65143015f87ecef3100b2903a9304 Mon Sep 17 00:00:00 2001 From: Jordan Bradford <36420801+jrdnbradford@users.noreply.github.com> Date: Sat, 31 Aug 2024 17:27:16 -0400 Subject: [PATCH] Add `gutenberg_get_all_mirrors` --- NAMESPACE | 1 + NEWS.md | 2 ++ R/gutenberg_mirrors.R | 42 +++++++++++++++++++++++ man/gutenberg_get_all_mirrors.Rd | 34 ++++++++++++++++++ tests/testthat/fixtures/MIRRORS-ALL | 21 ++++++++++++ tests/testthat/fixtures/create_fixtures.R | 1 + tests/testthat/test-gutenberg_mirrors.R | 9 +++++ 7 files changed, 110 insertions(+) create mode 100644 man/gutenberg_get_all_mirrors.Rd create mode 100644 tests/testthat/fixtures/MIRRORS-ALL diff --git a/NAMESPACE b/NAMESPACE index 1184f37..e33b5a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(gutenberg_download) +export(gutenberg_get_all_mirrors) export(gutenberg_get_mirror) export(gutenberg_strip) export(gutenberg_works) diff --git a/NEWS.md b/NEWS.md index 3a8479f..541e859 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # gutenbergr (development version) +* `gutenberg_get_all_mirrors()` has been added to retrieve mirror data (@jrdnbradford, #58) + # gutenbergr 0.2.4 * Update data scraping process to use R end-to-end (@jonthegeek, #36). diff --git a/R/gutenberg_mirrors.R b/R/gutenberg_mirrors.R index d86a7ec..ad085be 100644 --- a/R/gutenberg_mirrors.R +++ b/R/gutenberg_mirrors.R @@ -44,3 +44,45 @@ gutenberg_get_mirror <- function(verbose = TRUE) { options(gutenberg_mirror = mirror) return(mirror) } + + +#' Get all mirror data from Project Gutenberg +#' +#' Get all the mirror data from \url{https://www.gutenberg.org/MIRRORS.ALL} +#' +#' @return A tbl_df of Project Gutenberg mirrors and related data +#' \describe{ +#' +#' \item{continent}{Continent where the mirror is located} +#' +#' \item{nation}{Nation where the mirror is located} +#' +#' \item{location}{Location of the mirror} +#' +#' \item{provider}{Provider of the mirror} +#' +#' \item{url}{URL of the mirror} +#' +#' \item{note}{Special notes} +#' } +#' @examplesIf interactive() +#' +#' gutenberg_get_all_mirrors() +#' +#' @export +gutenberg_get_all_mirrors <- function() { + mirrors_url <- "https://www.gutenberg.org/MIRRORS.ALL" + mirrors_md <- read_url(mirrors_url) + tmp <- tempfile(fileext = ".md") + writeLines(mirrors_md, tmp) + mirrors <- suppressWarnings( + readr::read_delim( + tmp, + delim = "|", + trim_ws = TRUE + ) |> + dplyr::slice(2:(dplyr::n() - 1)) + ) + + return(mirrors) +} diff --git a/man/gutenberg_get_all_mirrors.Rd b/man/gutenberg_get_all_mirrors.Rd new file mode 100644 index 0000000..87c9e3f --- /dev/null +++ b/man/gutenberg_get_all_mirrors.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gutenberg_mirrors.R +\name{gutenberg_get_all_mirrors} +\alias{gutenberg_get_all_mirrors} +\title{Get all mirror data from Project Gutenberg} +\usage{ +gutenberg_get_all_mirrors() +} +\value{ +A tbl_df of Project Gutenberg mirrors and related data +\describe{ + +\item{continent}{Continent where the mirror is located} + +\item{nation}{Nation where the mirror is located} + +\item{location}{Location of the mirror} + +\item{provider}{Provider of the mirror} + +\item{url}{URL of the mirror} + +\item{note}{Special notes} +} +} +\description{ +Get all the mirror data from \url{https://www.gutenberg.org/MIRRORS.ALL} +} +\examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + +gutenberg_get_all_mirrors() +\dontshow{\}) # examplesIf} +} diff --git a/tests/testthat/fixtures/MIRRORS-ALL b/tests/testthat/fixtures/MIRRORS-ALL new file mode 100644 index 0000000..69c73bb --- /dev/null +++ b/tests/testthat/fixtures/MIRRORS-ALL @@ -0,0 +1,21 @@ + continent | nation | location | provider | url | note +---------------+---------------+---------------------+----------------------------------------------+------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------- + Europe | Great Britain | Kent | UK Mirror Service | http://www.mirrorservice.org/sites/ftp.ibiblio.org/pub/docs/books/gutenberg/ | + Europe | Great Britain | Kent | UK Mirror Service | ftp://ftp.mirrorservice.org/sites/ftp.ibiblio.org/pub/docs/books/gutenberg/ | + Europe | Great Britain | Kent | UK Mirror Service | rsync://rsync.mirrorservice.org/gutenberg/ | + Europe | Portugal | Braga | Universidade do Minho | http://eremita.di.uminho.pt/gutenberg/ | + Europe | Portugal | Braga | Universidade do Minho | ftp://eremita.di.uminho.pt/pub/gutenberg/ | + North America | Canada | Waterloo | University of Waterloo Computer Science Club | http://mirror.csclub.uwaterloo.ca/gutenberg/ | + North America | United States | Buffalo, NY | Jake Nabasny | https://gutenberg.nabasny.com/ | + North America | United States | Chapel Hill | iBiblio | https://www.gutenberg.org/dirs/ | Main Project Gutenberg Collection Site + North America | United States | Chapel Hill | iBiblio | ftp://ftp.ibiblio.org/pub/docs/books/gutenberg/ | Main Project Gutenberg FTP Site. + North America | United States | Pikeville, Kentucky | SandyRiver.NET | https://mirror2.sandyriver.net/pub/gutenberg | High speed mirror on a 10Gb network connection. Also available by http, and by rsync to rsync://mirror2.sandyriver.net/pub/gutenberg + North America | United States | Salt Lake City | Xmission ISP - FTP | ftp://mirrors.xmission.com/gutenberg/ | + North America | United States | Salt Lake City | Xmission ISP - HTTP | http://mirrors.xmission.com/gutenberg/ | + North America | United States | San Diego | Project Gutenberg | ftp://gutenberg.pglaf.org | High-speed mirror. Includes cache/generated files (epub, mobi, etc.). + North America | United States | San Diego | Project Gutenberg | https://aleph.gutenberg.org/ | High-speed mirror. Includes cache/generated files (epub, mobi, etc.). Also available via rsync and ftp. + North America | United States | San Diego | Project Gutenberg | https://gutenberg.pglaf.org/ | High-speed mirror. Includes cache/generated files (epub, mobi, etc.). + North America | United States | San Diego | Project Gutenberg | gopher://gopher.pglaf.org/ | Gopher server. + North America | United States | San Diego | Project Gutenberg | rsync://gutenberg.pglaf.org/gutenberg | High-speed mirror. Includes cache/generated files (epub, mobi, etc.). +(17 rows) + diff --git a/tests/testthat/fixtures/create_fixtures.R b/tests/testthat/fixtures/create_fixtures.R index 25f19c0..318a673 100644 --- a/tests/testthat/fixtures/create_fixtures.R +++ b/tests/testthat/fixtures/create_fixtures.R @@ -16,3 +16,4 @@ dl_fixture("https://www.gutenberg.org/cache/epub/68283/pg68283.txt") dl_fixture("https://www.gutenberg.org/robot/harvest?filetypes[]=txt") dl_fixture("http://aleph.gutenberg.org/1/0/105/105-0.zip") dl_fixture("http://aleph.gutenberg.org/1/0/109/109.zip") +dl_fixture("https://www.gutenberg.org/MIRRORS.ALL") diff --git a/tests/testthat/test-gutenberg_mirrors.R b/tests/testthat/test-gutenberg_mirrors.R index 24e0c88..5af7b14 100644 --- a/tests/testthat/test-gutenberg_mirrors.R +++ b/tests/testthat/test-gutenberg_mirrors.R @@ -29,3 +29,12 @@ test_that("gutenberg_get_mirror uses existing option", { gutenberg_get_mirror(), "mirror" ) }) + +test_that("gutenberg_get_all_mirrors works", { + local_dl_and_read() + mirrors <- gutenberg_get_all_mirrors() + expect_true(inherits(mirrors, "data.frame")) + expect_true(inherits(mirrors, "tbl_df")) + expect_equal(ncol(mirrors), 6) + expect_true(nrow(mirrors) > 10) +})