Skip to content

Commit

Permalink
Merge pull request #66 from IQSS/dev-data-download
Browse files Browse the repository at this point in the history
Reorganize get_file functions and add a get_dataframe functions
  • Loading branch information
wibeasley authored Jan 17, 2021
2 parents 96e5ed7 + 1881657 commit d4a6e31
Show file tree
Hide file tree
Showing 120 changed files with 12,772 additions and 523 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ man-roxygen/*
^codecov\.yml$
^.*\.Rproj$
^\.Rproj\.user$
^_pkgdown\.yml$
^docs$
^pkgdown$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ README.html
doc
Meta
.Rproj.user
docs
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Authors@R: c(
),
person(
"Shiro", "Kuriwaki",
role = c("aut"),
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-5687-2647")
),
Expand All @@ -36,17 +36,17 @@ Authors@R: c(
"Jan", "Kanis", role = "ctb"
))
Imports:
checkmate,
httr,
jsonlite,
magrittr,
readr,
stats,
utils,
xml2
Suggests:
checkmate,
covr,
foreign,
haven,
knitr,
purrr,
testthat,
Expand All @@ -61,3 +61,4 @@ BugReports: https://github.com/iqss/dataverse-client-r/issues
VignetteBuilder: knitr
Encoding: UTF-8
RoxygenNote: 7.1.1
Roxygen: list(markdown = TRUE)
11 changes: 6 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,16 @@ export(delete_dataset)
export(delete_dataverse)
export(delete_file)
export(delete_sword_dataset)
export(get_dataframe_by_doi)
export(get_dataframe_by_id)
export(get_dataframe_by_name)
export(get_dataset)
export(get_dataverse)
export(get_facets)
export(get_file)
export(get_file_by_doi)
export(get_file_by_id)
export(get_file_by_name)
export(get_file_metadata)
export(get_user_key)
export(initiate_sword_dataset)
Expand All @@ -42,8 +48,3 @@ export(service_document)
export(set_dataverse_metadata)
export(update_dataset)
export(update_dataset_file)
import(httr)
import(xml2)
importFrom(stats,setNames)
importFrom(utils,str)
importFrom(utils,unzip)
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Tests use https://demo.dataverse.org/dataverse/dataverse-client-r/. (#40)
* Fixes most get_file errors by removing query argument (#33 @kuriwaki)
* Fix getting multiple files by id in `get_file()` (#47 @adam3smith)
* Temporary files created by `get_file()` are automatically deleted.

# CHANGES TO dataverse 0.2.1

Expand Down
2 changes: 1 addition & 1 deletion R/SWORD.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' list_datasets(d[[2]])
#' }
#' @seealso Managing a Dataverse: \code{\link{publish_dataverse}}; Managing a dataset: \code{\link{dataset_atom}}, \code{\link{list_datasets}}, \code{\link{create_dataset}}, \code{\link{delete_dataset}}, \code{\link{publish_dataset}}; Managing files within a dataset: \code{\link{add_file}}, \code{\link{delete_file}}
#' @importFrom stats setNames
#'
#' @export
service_document <- function(key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
u <- paste0(api_url(server, prefix="dvn/api/"), "data-deposit/v1.1/swordv2/service-document")
Expand Down
3 changes: 1 addition & 2 deletions R/SWORD_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' @param body A list containing one or more metadata fields. Field names must be valid Dublin Core Terms labels (see details, below). The \samp{title}, \samp{description}, and \samp{creator} fields are required.
#' @template envvars
#' @template dots
#' @details This function is used to initiate a dataset in a (SWORD) Dataverse by supplying relevant metadata. The function is part of the SWORD API (see \href{http://www.ietf.org/rfc/rfc5023.txt}{Atom entry specification}), which is used to upload data to a Dataverse server.
#' @details This function is used to initiate a dataset in a (SWORD) Dataverse by supplying relevant metadata. The function is part of the SWORD API (see \href{https://www.ietf.org/rfc/rfc5023.txt}{Atom entry specification}), which is used to upload data to a Dataverse server.
#' Allowed fields are:
#' \dQuote{abstract}, \dQuote{accessRights}, \dQuote{accrualMethod},
#' \dQuote{accrualPeriodicity}, \dQuote{accrualPolicy}, \dQuote{alternative},
Expand Down Expand Up @@ -212,7 +212,6 @@ dataset_atom <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sy
}

#' @rdname dataset_atom
#' @import xml2
#' @export
dataset_statement <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
if (inherits(dataset, "dataset_atom")) {
Expand Down
2 changes: 1 addition & 1 deletion R/SWORD_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ add_file <- function(dataset, file, key = Sys.getenv("DATAVERSE_KEY"), server =
#'
#' # delete a file
#' ds <- dataset_statement(dat)
#' delete_file(ds$files[[1]]$id
#' delete_file(ds$files[[1]]$id)
#'
#' # delete a dataset
#' delete_dataset(dat)
Expand Down
4 changes: 2 additions & 2 deletions R/dataverse-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#' }
#'
#' @references
#' \href{http://guides.dataverse.org/en/latest/api/index.html}{Dataverse API Documentation}
#' \href{https://guides.dataverse.org/en/latest/api/index.html}{Dataverse API Documentation}
#'
#' \href{http://dataverse.org/}{Dataverse Homepage}
#' \href{https://dataverse.org/}{Dataverse Homepage}
#'
#' \href{https://dataverse.harvard.edu/}{Harvard IQSS Dataverse}
#'
Expand Down
149 changes: 149 additions & 0 deletions R/get_dataframe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#' Get file from dataverse and convert it into a dataframe or tibble
#'
#' `get_dataframe_by_id`, if you know the numeric ID of the dataset, or instead
#' `get_dataframe_by_name` if you know the filename and doi. The dataset
#'
#' @rdname get_dataframe
#'
#' @param filename The name of the file of interest, with file extension, for example
#' `"roster-bulls-1996.tab"`.
#' @param .f The function to used for reading in the raw dataset. This user
#' must choose the appropriate function: for example if the target is a .rds
#' file, then `.f` should be `readRDS` or `readr::read_`rds`.
#' @param original A logical, defaulting to TRUE. Whether to read the ingested,
#' archival version of the dataset if one exists. The archival versions are tab-delimited
#' `.tab` files so if `original = FALSE`, `.f` is set to `readr::read_tsv`.
#' If functions to read the original version is available, then `original = TRUE`
#' with a specified `.f` is better.
#'
#' @inheritDotParams get_file
#'
#' @examples
#'
#' # Retrieve data.frame from dataverse DOI and file name
#' df_from_rds_ingested <-
#' get_dataframe_by_name(
#' filename = "roster-bulls-1996.tab",
#' dataset = "doi:10.70122/FK2/HXJVJU",
#' server = "demo.dataverse.org"
#' )
#'
#' # Retrieve the same data.frame from dataverse + file DOI
#' df_from_rds_ingested_by_doi <-
#' get_dataframe_by_doi(
#' filedoi = "10.70122/FK2/HXJVJU/SA3Z2V",
#' server = "demo.dataverse.org"
#' )
#'
#' # Retrieve ingested file originally a Stata dta
#' df_from_stata_ingested <-
#' get_dataframe_by_name(
#' filename = "nlsw88.tab",
#' dataset = "doi:10.70122/FK2/PPIAXE",
#' server = "demo.dataverse.org"
#' )
#'
#'
#' # To use the original file version, or for non-ingested data,
#' # please specify `original = TRUE` and specify a function in .f.
#'
#' # A data.frame is still returned, but the
#' if (requireNamespace("readr", quietly = TRUE)) {
#' df_from_rds_original <-
#' get_dataframe_by_name(
#' filename = "nlsw88_rds-export.rds",
#' dataset = "doi:10.70122/FK2/PPIAXE",
#' server = "demo.dataverse.org",
#' original = TRUE,
#' .f = readr::read_rds
#' )
#' }
#'
#' if (requireNamespace("haven", quietly = TRUE)) {
#' df_from_stata_original <-
#' get_dataframe_by_name(
#' filename = "nlsw88.tab",
#' dataset = "doi:10.70122/FK2/PPIAXE",
#' server = "demo.dataverse.org",
#' original = TRUE,
#' .f = haven::read_dta
#' )
#' }
#' @export
get_dataframe_by_name <- function (
filename,
dataset = NULL,
.f = NULL,
original = FALSE,
...
) {
# retrieve ID
fileid <- get_fileid.character(x = dataset, file = filename, ...)

get_dataframe_by_id(fileid, .f, original = original, ...)
}

#' @rdname get_dataframe
#' @export
get_dataframe_by_id <- function(
fileid,
.f = NULL,
original = FALSE,
...
) {

# if not ingested, then whether to take the original is not relevant.
ingested <- is_ingested(fileid, ...)

if (isFALSE(ingested)) {
original <- NA
}

if (is.null(.f) & isTRUE(ingested) & isFALSE(original)) {
message("Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.\n")
.f <- readr::read_tsv
}

if (is.null(.f) & (isFALSE(ingested) | isTRUE(original))) {
stop("read-in function was left NULL, but the target file is not ingested or you asked for the original version. Please supply a .f argument.\n")
}

# READ raw data
raw <- get_file(file = fileid, original = original, ...)

# save to temp and then read it in with supplied function
if (!is.null(.f)) {
get_dataframe_internal(raw, filename = "foo", .f = .f)
}
}

#' @rdname get_dataframe
#' @inheritParams get_file_by_doi
#' @export
get_dataframe_by_doi <- function (
filedoi,
.f = NULL,
original = FALSE,
...
) {
filedoi <- prepend_doi(filedoi)

# get_file can also take doi now
get_dataframe_by_id(fileid = filedoi, .f = .f, original = original, ...)
}

#' Write to temp and apply function
#'
#' @keywords internal
get_dataframe_internal <- function (raw, filename, .f) {
tryCatch(
{
tmp <- tempfile(filename)
writeBin(raw, tmp)
do.call(.f, list(tmp))
},
finally = {
if (file.exists(tmp)) unlink(tmp)
}
)
}
2 changes: 1 addition & 1 deletion R/get_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ get_dataset <- function(

#' @rdname get_dataset
#' @param block A character string specifying a metadata block to retrieve. By default this is \dQuote{citation}. Other values may be available, depending on the dataset, such as \dQuote{geospatial} or \dQuote{socialscience}.
#' @importFrom utils str
#'
#' @export
dataset_metadata <- function(
dataset,
Expand Down
Loading

0 comments on commit d4a6e31

Please sign in to comment.