diff --git a/.Rbuildignore b/.Rbuildignore index 3407b1b..21cb578 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,16 +2,9 @@ ^\.Rproj\.user$ ^data-raw$ ^README\.Rmd$ -^README-.*\.png$ -^\.travis\.yml$ ^cran-comments\.md$ -^appveyor\.yml$ -^doc$ -^Meta$ -^CRAN-RELEASE$ ^\.github$ -^CODE_OF_CONDUCT\.md$ -^CRAN-SUBMISSION$ -^revdep$ -^README\.md$ -^NEWS\.md$ +^codecov\.yml$ +^docs$ +^_pkgdown\.yml$ +^pkgdown$ diff --git a/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md similarity index 100% rename from CODE_OF_CONDUCT.md rename to .github/CODE_OF_CONDUCT.md diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a3ac618..21ccc7e 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,12 +2,14 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main] pull_request: - branches: [main, master] + branches: [main] name: R-CMD-check +permissions: read-all + jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} @@ -18,18 +20,18 @@ jobs: fail-fast: false matrix: config: + - {os: ubuntu-latest, r: 'release'} - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -47,3 +49,4 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml deleted file mode 100644 index a3ac618..0000000 --- a/.github/workflows/check-standard.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -on: - push: - branches: [main, master] - pull_request: - branches: [main, master] - -name: R-CMD-check - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes - - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-pandoc@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - http-user-agent: ${{ matrix.config.http-user-agent }} - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::rcmdcheck - needs: check - - - uses: r-lib/actions/check-r-package@v2 - with: - upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..4bbce75 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown.yaml + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml new file mode 100644 index 0000000..2edd93f --- /dev/null +++ b/.github/workflows/pr-commands.yaml @@ -0,0 +1,85 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + issue_comment: + types: [created] + +name: pr-commands.yaml + +permissions: read-all + +jobs: + document: + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} + name: document + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/pr-fetch@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::roxygen2 + needs: pr-document + + - name: Document + run: roxygen2::roxygenise() + shell: Rscript {0} + + - name: commit + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git add man/\* NAMESPACE + git commit -m 'Document' + + - uses: r-lib/actions/pr-push@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + style: + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} + name: style + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/pr-fetch@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - uses: r-lib/actions/setup-r@v2 + + - name: Install dependencies + run: install.packages("styler") + shell: Rscript {0} + + - name: Style + run: styler::style_pkg() + shell: Rscript {0} + + - name: commit + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git add \*.R + git commit -m 'Style' + + - uses: r-lib/actions/pr-push@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..9882260 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,61 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage.yaml + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index 8c990fa..63e0f3e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ inst/doc doc Meta +docs diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a116dae..0000000 --- a/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r - -language: R -sudo: false -cache: packages -r_packages: - - covr -after_success: - - Rscript -e 'covr::codecov()' diff --git a/DESCRIPTION b/DESCRIPTION index 2e656a8..9577a7b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,8 @@ Description: Download and process public domain works in the Project retrieved. License: GPL-2 URL: https://docs.ropensci.org/gutenbergr/, - https://github.com/ropensci/gutenbergr + https://github.com/ropensci/gutenbergr, + http://ropensci.github.io/gutenbergr/ BugReports: https://github.com/ropensci/gutenbergr/issues Depends: R (>= 2.10) diff --git a/README.Rmd b/README.Rmd index 4bc068e..8bfcfa6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -4,31 +4,24 @@ output: github_document - ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", - fig.path = "README-", + fig.path = "man/figures/README-", message = FALSE, warning = FALSE ) ``` -gutenbergr: R package to search and download public domain texts from Project Gutenberg ----------------- - -**Authors:** [David Robinson](http://varianceexplained.org/)
-**License:** [GPL-2](https://opensource.org/license/gpl-2-0/) +# gutenbergr -[![Build Status](https://app.travis-ci.com/ropensci/gutenbergr.svg?branch=master)](https://app.travis-ci.com/ropensci/gutenbergr) -[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/gutenbergr)]( https://CRAN.R-project.org/package=gutenbergr) -[![Build status](https://ci.appveyor.com/api/projects/status/lqb7hngtj5epsmd1?svg=true)](https://ci.appveyor.com/project/ropensci/gutenbergr-dujv9) -[![Coverage Status](https://img.shields.io/codecov/c/github/ropensci/gutenbergr/master.svg)](https://app.codecov.io/github/ropensci/gutenbergr?branch=master) +[![CRAN status](https://www.r-pkg.org/badges/version/gutenbergr)](https://CRAN.R-project.org/package=gutenbergr) [![rOpenSci peer-review](https://badges.ropensci.org/41_status.svg)](https://github.com/ropensci/software-review/issues/41) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![R-CMD-check](https://github.com/ropensci/gutenbergr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ropensci/gutenbergr/actions/workflows/R-CMD-check.yaml) +[![Codecov test coverage](https://codecov.io/gh/ropensci/gutenbergr/graph/badge.svg)](https://app.codecov.io/gh/ropensci/gutenbergr) Download and process public domain works from the [Project Gutenberg](https://www.gutenberg.org/) collection. Includes @@ -39,21 +32,26 @@ Download and process public domain works from the [Project Gutenberg](https://ww * `gutenberg_authors` contains information about each author, such as aliases and birth/death year * `gutenberg_subjects` contains pairings of works with Library of Congress subjects and topics -### Installation +## Installation -Install the package with: +::: .pkgdown-release +Install the released version of gutenbergr from [CRAN](https://cran.r-project.org/): -```{r eval = FALSE} +```{r, eval = FALSE} install.packages("gutenbergr") ``` +::: -Or install the development version using [devtools](https://github.com/r-lib/devtools) with: +::: .pkgdown-devel +Install the development version of gutenbergr from [GitHub](https://github.com/): -```{r eval = FALSE} -devtools::install_github("ropensci/gutenbergr") +```{r, eval = FALSE} +# install.packages("pak") +pak::pak("ropensci/gutenbergr") ``` +::: -### Examples +## Examples The `gutenberg_works()` function retrieves, by default, a table of metadata for all unique English-language Project Gutenberg works that have text associated with them. (The `gutenberg_metadata` dataset has all Gutenberg works, unfiltered). @@ -102,20 +100,20 @@ aristotle_books <- gutenberg_works(author == "Aristotle") %>% aristotle_books ``` -### FAQ +## FAQ -#### What do I do with the text once I have it? +### What do I do with the text once I have it? * The [Natural Language Processing CRAN View](https://CRAN.R-project.org/view=NaturalLanguageProcessing) suggests many R packages related to text mining, especially around the [tm package](https://cran.r-project.org/package=tm). * The [tidytext](https://github.com/juliasilge/tidytext) package is useful for tokenization and analysis, especially since gutenbergr downloads books as a data frame already. * You could match the `wikipedia` column in `gutenberg_author` to Wikipedia content with the [WikipediR](https://cran.r-project.org/package=WikipediR) package or to pageview statistics with the [wikipediatrend](https://cran.r-project.org/package=wikipediatrend) package. * If you're considering an analysis based on author name, you may find the [humaniformat](https://cran.r-project.org/package=humaniformat) (for extraction of first names) and [gender](https://cran.r-project.org/package=gender) (prediction of gender from first names) packages useful. (Note that humaniformat has a `format_reverse` function for reversing "Last, First" names). -#### How were the metadata R files generated? +### How were the metadata R files generated? See the [data-raw](https://github.com/ropensci/gutenbergr/tree/master/data-raw) directory for the scripts that generate these datasets. As of now, these were generated from [the Project Gutenberg catalog](https://www.gutenberg.org/ebooks/offline_catalogs.html) on **`r format(attr(gutenberg_metadata, "date_updated"), '%d %B %Y')`**. -#### Do you respect the rules regarding robot access to Project Gutenberg? +### Do you respect the rules regarding robot access to Project Gutenberg? Yes! The package respects [these rules](https://www.gutenberg.org/policy/robot_access.html) and complies to the best of our ability. Namely: @@ -125,7 +123,7 @@ Yes! The package respects [these rules](https://www.gutenberg.org/policy/robot_a Still, this package is *not* the right way to download the entire Project Gutenberg corpus (or all from a particular language). For that, follow [their recommendation](https://www.gutenberg.org/policy/robot_access.html) to use wget or set up a mirror. This package is recommended for downloading a single work, or works for a particular author or topic. -### Code of Conduct +## Code of Conduct Please note that the gutenbergr project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. diff --git a/README.md b/README.md index 9399769..f9682f2 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,20 @@ -## gutenbergr: R package to search and download public domain texts from Project Gutenberg - -**Authors:** [David Robinson](http://varianceexplained.org/)
-**License:** [GPL-2](https://opensource.org/license/gpl-2-0/) +# gutenbergr -[![Build -Status](https://app.travis-ci.com/ropensci/gutenbergr.svg?branch=master)](https://app.travis-ci.com/ropensci/gutenbergr) -[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/gutenbergr)](https://CRAN.R-project.org/package=gutenbergr) -[![Build -status](https://ci.appveyor.com/api/projects/status/lqb7hngtj5epsmd1?svg=true)](https://ci.appveyor.com/project/ropensci/gutenbergr-dujv9) -[![Coverage -Status](https://img.shields.io/codecov/c/github/ropensci/gutenbergr/master.svg)](https://app.codecov.io/github/ropensci/gutenbergr?branch=master) +[![CRAN +status](https://www.r-pkg.org/badges/version/gutenbergr)](https://CRAN.R-project.org/package=gutenbergr) [![rOpenSci peer-review](https://badges.ropensci.org/41_status.svg)](https://github.com/ropensci/software-review/issues/41) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![R-CMD-check](https://github.com/ropensci/gutenbergr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ropensci/gutenbergr/actions/workflows/R-CMD-check.yaml) +[![Codecov test +coverage](https://codecov.io/gh/ropensci/gutenbergr/graph/badge.svg)](https://app.codecov.io/gh/ropensci/gutenbergr) Download and process public domain works from the [Project @@ -38,22 +32,32 @@ Gutenberg](https://www.gutenberg.org/) collection. Includes - `gutenberg_subjects` contains pairings of works with Library of Congress subjects and topics -### Installation +## Installation + +
-Install the package with: +Install the released version of gutenbergr from +[CRAN](https://cran.r-project.org/): ``` r install.packages("gutenbergr") ``` -Or install the development version using -[devtools](https://github.com/r-lib/devtools) with: +
+ +
+ +Install the development version of gutenbergr from +[GitHub](https://github.com/): ``` r -devtools::install_github("ropensci/gutenbergr") +# install.packages("pak") +pak::pak("ropensci/gutenbergr") ``` -### Examples +
+ +## Examples The `gutenberg_works()` function retrieves, by default, a table of metadata for all unique English-language Project Gutenberg works that @@ -151,7 +155,7 @@ aristotle_books <- gutenberg_works(author == "Aristotle") %>% gutenberg_download(meta_fields = "title") aristotle_books -#> # A tibble: 17,147 × 3 +#> # A tibble: 43,801 × 3 #> gutenberg_id text #> #> 1 1974 "THE POETICS OF ARISTOTLE" @@ -176,12 +180,12 @@ aristotle_books #> 8 The Poetics of Aristotle #> 9 The Poetics of Aristotle #> 10 The Poetics of Aristotle -#> # ℹ 17,137 more rows +#> # ℹ 43,791 more rows ``` -### FAQ +## FAQ -#### What do I do with the text once I have it? +### What do I do with the text once I have it? - The [Natural Language Processing CRAN View](https://CRAN.R-project.org/view=NaturalLanguageProcessing) @@ -203,16 +207,16 @@ aristotle_books gender from first names) packages useful. (Note that humaniformat has a `format_reverse` function for reversing “Last, First” names). -#### How were the metadata R files generated? +### How were the metadata R files generated? See the [data-raw](https://github.com/ropensci/gutenbergr/tree/master/data-raw) directory for the scripts that generate these datasets. As of now, these were generated from [the Project Gutenberg -catalog](https://www.gutenberg.org/ebooks/offline_catalogs.html) on **19 -December 2022**. +catalog](https://www.gutenberg.org/ebooks/offline_catalogs.html) on **29 +November 2023**. -#### Do you respect the rules regarding robot access to Project Gutenberg? +### Do you respect the rules regarding robot access to Project Gutenberg? Yes! The package respects [these rules](https://www.gutenberg.org/policy/robot_access.html) and complies @@ -236,7 +240,7 @@ recommendation](https://www.gutenberg.org/policy/robot_access.html) to use wget or set up a mirror. This package is recommended for downloading a single work, or works for a particular author or topic. -### Code of Conduct +## Code of Conduct Please note that the gutenbergr project is released with a [Contributor Code of diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..184d90c --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,3 @@ +url: http://ropensci.github.io/gutenbergr/ +template: + bootstrap: 5 diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index e32d316..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,42 +0,0 @@ -# DO NOT CHANGE the "init" and "install" sections below - -# Download script file from GitHub -init: - ps: | - $ErrorActionPreference = "Stop" - Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" - Import-Module '..\appveyor-tool.ps1' - -install: - ps: Bootstrap - -# Adapt as necessary starting from here - -build_script: - - travis-tool.sh install_deps - -test_script: - - travis-tool.sh run_tests - -on_failure: - - 7z a failure.zip *.Rcheck\* - - appveyor PushArtifact failure.zip - -artifacts: - - path: '*.Rcheck\**\*.log' - name: Logs - - - path: '*.Rcheck\**\*.out' - name: Logs - - - path: '*.Rcheck\**\*.fail' - name: Logs - - - path: '*.Rcheck\**\*.Rout' - name: Logs - - - path: '\*_*.tar.gz' - name: Bits - - - path: '\*_*.zip' - name: Bits diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..04c5585 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,14 @@ +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true