diff --git a/.Rbuildignore b/.Rbuildignore
index 3407b1b..21cb578 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -2,16 +2,9 @@
^\.Rproj\.user$
^data-raw$
^README\.Rmd$
-^README-.*\.png$
-^\.travis\.yml$
^cran-comments\.md$
-^appveyor\.yml$
-^doc$
-^Meta$
-^CRAN-RELEASE$
^\.github$
-^CODE_OF_CONDUCT\.md$
-^CRAN-SUBMISSION$
-^revdep$
-^README\.md$
-^NEWS\.md$
+^codecov\.yml$
+^docs$
+^_pkgdown\.yml$
+^pkgdown$
diff --git a/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md
similarity index 100%
rename from CODE_OF_CONDUCT.md
rename to .github/CODE_OF_CONDUCT.md
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index a3ac618..21ccc7e 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -2,12 +2,14 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
- branches: [main, master]
+ branches: [main]
pull_request:
- branches: [main, master]
+ branches: [main]
name: R-CMD-check
+permissions: read-all
+
jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}
@@ -18,18 +20,18 @@ jobs:
fail-fast: false
matrix:
config:
+ - {os: ubuntu-latest, r: 'release'}
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- - {os: ubuntu-latest, r: 'release'}
- - {os: ubuntu-latest, r: 'oldrel-1'}
+ - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
+ - {os: ubuntu-latest, r: 'oldrel-1'}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2
@@ -47,3 +49,4 @@ jobs:
- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
+ build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml
deleted file mode 100644
index a3ac618..0000000
--- a/.github/workflows/check-standard.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
-# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
-on:
- push:
- branches: [main, master]
- pull_request:
- branches: [main, master]
-
-name: R-CMD-check
-
-jobs:
- R-CMD-check:
- runs-on: ${{ matrix.config.os }}
-
- name: ${{ matrix.config.os }} (${{ matrix.config.r }})
-
- strategy:
- fail-fast: false
- matrix:
- config:
- - {os: macos-latest, r: 'release'}
- - {os: windows-latest, r: 'release'}
- - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- - {os: ubuntu-latest, r: 'release'}
- - {os: ubuntu-latest, r: 'oldrel-1'}
-
- env:
- GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
- R_KEEP_PKG_SOURCE: yes
-
- steps:
- - uses: actions/checkout@v3
-
- - uses: r-lib/actions/setup-pandoc@v2
-
- - uses: r-lib/actions/setup-r@v2
- with:
- r-version: ${{ matrix.config.r }}
- http-user-agent: ${{ matrix.config.http-user-agent }}
- use-public-rspm: true
-
- - uses: r-lib/actions/setup-r-dependencies@v2
- with:
- extra-packages: any::rcmdcheck
- needs: check
-
- - uses: r-lib/actions/check-r-package@v2
- with:
- upload-snapshots: true
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
new file mode 100644
index 0000000..4bbce75
--- /dev/null
+++ b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,50 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+ release:
+ types: [published]
+ workflow_dispatch:
+
+name: pkgdown.yaml
+
+permissions: read-all
+
+jobs:
+ pkgdown:
+ runs-on: ubuntu-latest
+ # Only restrict concurrency for non-PR jobs
+ concurrency:
+ group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ permissions:
+ contents: write
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-pandoc@v2
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::pkgdown, local::.
+ needs: website
+
+ - name: Build site
+ run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+ shell: Rscript {0}
+
+ - name: Deploy to GitHub pages 🚀
+ if: github.event_name != 'pull_request'
+ uses: JamesIves/github-pages-deploy-action@v4.5.0
+ with:
+ clean: false
+ branch: gh-pages
+ folder: docs
diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml
new file mode 100644
index 0000000..2edd93f
--- /dev/null
+++ b/.github/workflows/pr-commands.yaml
@@ -0,0 +1,85 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ issue_comment:
+ types: [created]
+
+name: pr-commands.yaml
+
+permissions: read-all
+
+jobs:
+ document:
+ if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
+ name: document
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ permissions:
+ contents: write
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/pr-fetch@v2
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::roxygen2
+ needs: pr-document
+
+ - name: Document
+ run: roxygen2::roxygenise()
+ shell: Rscript {0}
+
+ - name: commit
+ run: |
+ git config --local user.name "$GITHUB_ACTOR"
+ git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
+ git add man/\* NAMESPACE
+ git commit -m 'Document'
+
+ - uses: r-lib/actions/pr-push@v2
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+ style:
+ if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
+ name: style
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ permissions:
+ contents: write
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/pr-fetch@v2
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+ - uses: r-lib/actions/setup-r@v2
+
+ - name: Install dependencies
+ run: install.packages("styler")
+ shell: Rscript {0}
+
+ - name: Style
+ run: styler::style_pkg()
+ shell: Rscript {0}
+
+ - name: commit
+ run: |
+ git config --local user.name "$GITHUB_ACTOR"
+ git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
+ git add \*.R
+ git commit -m 'Style'
+
+ - uses: r-lib/actions/pr-push@v2
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
new file mode 100644
index 0000000..9882260
--- /dev/null
+++ b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,61 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+
+name: test-coverage.yaml
+
+permissions: read-all
+
+jobs:
+ test-coverage:
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::covr, any::xml2
+ needs: coverage
+
+ - name: Test coverage
+ run: |
+ cov <- covr::package_coverage(
+ quiet = FALSE,
+ clean = FALSE,
+ install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+ )
+ covr::to_cobertura(cov)
+ shell: Rscript {0}
+
+ - uses: codecov/codecov-action@v4
+ with:
+ fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
+ file: ./cobertura.xml
+ plugin: noop
+ disable_search: true
+ token: ${{ secrets.CODECOV_TOKEN }}
+
+ - name: Show testthat output
+ if: always()
+ run: |
+ ## --------------------------------------------------------------------
+ find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
+ shell: bash
+
+ - name: Upload test results
+ if: failure()
+ uses: actions/upload-artifact@v4
+ with:
+ name: coverage-test-failures
+ path: ${{ runner.temp }}/package
diff --git a/.gitignore b/.gitignore
index 8c990fa..63e0f3e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
inst/doc
doc
Meta
+docs
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index a116dae..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
-
-language: R
-sudo: false
-cache: packages
-r_packages:
- - covr
-after_success:
- - Rscript -e 'covr::codecov()'
diff --git a/DESCRIPTION b/DESCRIPTION
index 2e656a8..9577a7b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -13,7 +13,8 @@ Description: Download and process public domain works in the Project
retrieved.
License: GPL-2
URL: https://docs.ropensci.org/gutenbergr/,
- https://github.com/ropensci/gutenbergr
+ https://github.com/ropensci/gutenbergr,
+ http://ropensci.github.io/gutenbergr/
BugReports: https://github.com/ropensci/gutenbergr/issues
Depends:
R (>= 2.10)
diff --git a/README.Rmd b/README.Rmd
index 4bc068e..8bfcfa6 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -4,31 +4,24 @@ output: github_document
-
```{r, echo = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
- fig.path = "README-",
+ fig.path = "man/figures/README-",
message = FALSE,
warning = FALSE
)
```
-gutenbergr: R package to search and download public domain texts from Project Gutenberg
-----------------
-
-**Authors:** [David Robinson](http://varianceexplained.org/)
-**License:** [GPL-2](https://opensource.org/license/gpl-2-0/)
+# gutenbergr
-[![Build Status](https://app.travis-ci.com/ropensci/gutenbergr.svg?branch=master)](https://app.travis-ci.com/ropensci/gutenbergr)
-[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/gutenbergr)]( https://CRAN.R-project.org/package=gutenbergr)
-[![Build status](https://ci.appveyor.com/api/projects/status/lqb7hngtj5epsmd1?svg=true)](https://ci.appveyor.com/project/ropensci/gutenbergr-dujv9)
-[![Coverage Status](https://img.shields.io/codecov/c/github/ropensci/gutenbergr/master.svg)](https://app.codecov.io/github/ropensci/gutenbergr?branch=master)
+[![CRAN status](https://www.r-pkg.org/badges/version/gutenbergr)](https://CRAN.R-project.org/package=gutenbergr)
[![rOpenSci peer-review](https://badges.ropensci.org/41_status.svg)](https://github.com/ropensci/software-review/issues/41)
[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
[![R-CMD-check](https://github.com/ropensci/gutenbergr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ropensci/gutenbergr/actions/workflows/R-CMD-check.yaml)
+[![Codecov test coverage](https://codecov.io/gh/ropensci/gutenbergr/graph/badge.svg)](https://app.codecov.io/gh/ropensci/gutenbergr)
Download and process public domain works from the [Project Gutenberg](https://www.gutenberg.org/) collection. Includes
@@ -39,21 +32,26 @@ Download and process public domain works from the [Project Gutenberg](https://ww
* `gutenberg_authors` contains information about each author, such as aliases and birth/death year
* `gutenberg_subjects` contains pairings of works with Library of Congress subjects and topics
-### Installation
+## Installation
-Install the package with:
+::: .pkgdown-release
+Install the released version of gutenbergr from [CRAN](https://cran.r-project.org/):
-```{r eval = FALSE}
+```{r, eval = FALSE}
install.packages("gutenbergr")
```
+:::
-Or install the development version using [devtools](https://github.com/r-lib/devtools) with:
+::: .pkgdown-devel
+Install the development version of gutenbergr from [GitHub](https://github.com/):
-```{r eval = FALSE}
-devtools::install_github("ropensci/gutenbergr")
+```{r, eval = FALSE}
+# install.packages("pak")
+pak::pak("ropensci/gutenbergr")
```
+:::
-### Examples
+## Examples
The `gutenberg_works()` function retrieves, by default, a table of metadata for all unique English-language Project Gutenberg works that have text associated with them. (The `gutenberg_metadata` dataset has all Gutenberg works, unfiltered).
@@ -102,20 +100,20 @@ aristotle_books <- gutenberg_works(author == "Aristotle") %>%
aristotle_books
```
-### FAQ
+## FAQ
-#### What do I do with the text once I have it?
+### What do I do with the text once I have it?
* The [Natural Language Processing CRAN View](https://CRAN.R-project.org/view=NaturalLanguageProcessing) suggests many R packages related to text mining, especially around the [tm package](https://cran.r-project.org/package=tm).
* The [tidytext](https://github.com/juliasilge/tidytext) package is useful for tokenization and analysis, especially since gutenbergr downloads books as a data frame already.
* You could match the `wikipedia` column in `gutenberg_author` to Wikipedia content with the [WikipediR](https://cran.r-project.org/package=WikipediR) package or to pageview statistics with the [wikipediatrend](https://cran.r-project.org/package=wikipediatrend) package.
* If you're considering an analysis based on author name, you may find the [humaniformat](https://cran.r-project.org/package=humaniformat) (for extraction of first names) and [gender](https://cran.r-project.org/package=gender) (prediction of gender from first names) packages useful. (Note that humaniformat has a `format_reverse` function for reversing "Last, First" names).
-#### How were the metadata R files generated?
+### How were the metadata R files generated?
See the [data-raw](https://github.com/ropensci/gutenbergr/tree/master/data-raw) directory for the scripts that generate these datasets. As of now, these were generated from [the Project Gutenberg catalog](https://www.gutenberg.org/ebooks/offline_catalogs.html) on **`r format(attr(gutenberg_metadata, "date_updated"), '%d %B %Y')`**.
-#### Do you respect the rules regarding robot access to Project Gutenberg?
+### Do you respect the rules regarding robot access to Project Gutenberg?
Yes! The package respects [these rules](https://www.gutenberg.org/policy/robot_access.html) and complies to the best of our ability. Namely:
@@ -125,7 +123,7 @@ Yes! The package respects [these rules](https://www.gutenberg.org/policy/robot_a
Still, this package is *not* the right way to download the entire Project Gutenberg corpus (or all from a particular language). For that, follow [their recommendation](https://www.gutenberg.org/policy/robot_access.html) to use wget or set up a mirror. This package is recommended for downloading a single work, or works for a particular author or topic.
-### Code of Conduct
+## Code of Conduct
Please note that the gutenbergr project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
diff --git a/README.md b/README.md
index 9399769..f9682f2 100644
--- a/README.md
+++ b/README.md
@@ -1,26 +1,20 @@
-## gutenbergr: R package to search and download public domain texts from Project Gutenberg
-
-**Authors:** [David Robinson](http://varianceexplained.org/)
-**License:** [GPL-2](https://opensource.org/license/gpl-2-0/)
+# gutenbergr
-[![Build
-Status](https://app.travis-ci.com/ropensci/gutenbergr.svg?branch=master)](https://app.travis-ci.com/ropensci/gutenbergr)
-[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/gutenbergr)](https://CRAN.R-project.org/package=gutenbergr)
-[![Build
-status](https://ci.appveyor.com/api/projects/status/lqb7hngtj5epsmd1?svg=true)](https://ci.appveyor.com/project/ropensci/gutenbergr-dujv9)
-[![Coverage
-Status](https://img.shields.io/codecov/c/github/ropensci/gutenbergr/master.svg)](https://app.codecov.io/github/ropensci/gutenbergr?branch=master)
+[![CRAN
+status](https://www.r-pkg.org/badges/version/gutenbergr)](https://CRAN.R-project.org/package=gutenbergr)
[![rOpenSci
peer-review](https://badges.ropensci.org/41_status.svg)](https://github.com/ropensci/software-review/issues/41)
[![Project Status: Active – The project has reached a stable, usable
state and is being actively
developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
[![R-CMD-check](https://github.com/ropensci/gutenbergr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ropensci/gutenbergr/actions/workflows/R-CMD-check.yaml)
+[![Codecov test
+coverage](https://codecov.io/gh/ropensci/gutenbergr/graph/badge.svg)](https://app.codecov.io/gh/ropensci/gutenbergr)
Download and process public domain works from the [Project
@@ -38,22 +32,32 @@ Gutenberg](https://www.gutenberg.org/) collection. Includes
- `gutenberg_subjects` contains pairings of works with Library of
Congress subjects and topics
-### Installation
+## Installation
+
+