diff --git a/.Rbuildignore b/.Rbuildignore index f45c63b..87bee57 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,6 @@ man-roxygen/* ^codecov\.yml$ ^.*\.Rproj$ ^\.Rproj\.user$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.gitignore b/.gitignore index eedcabb..9326638 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ README.html doc Meta .Rproj.user +docs diff --git a/DESCRIPTION b/DESCRIPTION index 5ac1516..215b7e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ Authors@R: c( ), person( "Shiro", "Kuriwaki", - role = c("aut"), + role = c("aut"), email = "shirokuriwaki@gmail.com", comment = c(ORCID = "0000-0002-5687-2647") ), @@ -36,17 +36,17 @@ Authors@R: c( "Jan", "Kanis", role = "ctb" )) Imports: + checkmate, httr, jsonlite, - magrittr, readr, stats, utils, xml2 Suggests: - checkmate, covr, foreign, + haven, knitr, purrr, testthat, @@ -61,3 +61,4 @@ BugReports: https://github.com/iqss/dataverse-client-r/issues VignetteBuilder: knitr Encoding: UTF-8 RoxygenNote: 7.1.1 +Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index eb14124..a007dab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,10 +27,16 @@ export(delete_dataset) export(delete_dataverse) export(delete_file) export(delete_sword_dataset) +export(get_dataframe_by_doi) +export(get_dataframe_by_id) +export(get_dataframe_by_name) export(get_dataset) export(get_dataverse) export(get_facets) export(get_file) +export(get_file_by_doi) +export(get_file_by_id) +export(get_file_by_name) export(get_file_metadata) export(get_user_key) export(initiate_sword_dataset) @@ -42,8 +48,3 @@ export(service_document) export(set_dataverse_metadata) export(update_dataset) export(update_dataset_file) -import(httr) -import(xml2) -importFrom(stats,setNames) -importFrom(utils,str) -importFrom(utils,unzip) diff --git a/NEWS.md b/NEWS.md index a7589f3..a12718a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ * Tests use https://demo.dataverse.org/dataverse/dataverse-client-r/. (#40) * Fixes most get_file errors by removing query argument (#33 @kuriwaki) * Fix getting multiple files by id in `get_file()` (#47 @adam3smith) +* Temporary files created by `get_file()` are automatically deleted. # CHANGES TO dataverse 0.2.1 diff --git a/R/SWORD.R b/R/SWORD.R index ed4f3b2..d1427bb 100644 --- a/R/SWORD.R +++ b/R/SWORD.R @@ -13,7 +13,7 @@ #' list_datasets(d[[2]]) #' } #' @seealso Managing a Dataverse: \code{\link{publish_dataverse}}; Managing a dataset: \code{\link{dataset_atom}}, \code{\link{list_datasets}}, \code{\link{create_dataset}}, \code{\link{delete_dataset}}, \code{\link{publish_dataset}}; Managing files within a dataset: \code{\link{add_file}}, \code{\link{delete_file}} -#' @importFrom stats setNames +#' #' @export service_document <- function(key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { u <- paste0(api_url(server, prefix="dvn/api/"), "data-deposit/v1.1/swordv2/service-document") diff --git a/R/SWORD_dataset.R b/R/SWORD_dataset.R index 450ebd6..18cb410 100644 --- a/R/SWORD_dataset.R +++ b/R/SWORD_dataset.R @@ -4,7 +4,7 @@ #' @param body A list containing one or more metadata fields. Field names must be valid Dublin Core Terms labels (see details, below). The \samp{title}, \samp{description}, and \samp{creator} fields are required. #' @template envvars #' @template dots -#' @details This function is used to initiate a dataset in a (SWORD) Dataverse by supplying relevant metadata. The function is part of the SWORD API (see \href{http://www.ietf.org/rfc/rfc5023.txt}{Atom entry specification}), which is used to upload data to a Dataverse server. +#' @details This function is used to initiate a dataset in a (SWORD) Dataverse by supplying relevant metadata. The function is part of the SWORD API (see \href{https://www.ietf.org/rfc/rfc5023.txt}{Atom entry specification}), which is used to upload data to a Dataverse server. #' Allowed fields are: #' \dQuote{abstract}, \dQuote{accessRights}, \dQuote{accrualMethod}, #' \dQuote{accrualPeriodicity}, \dQuote{accrualPolicy}, \dQuote{alternative}, @@ -212,7 +212,6 @@ dataset_atom <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sy } #' @rdname dataset_atom -#' @import xml2 #' @export dataset_statement <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { if (inherits(dataset, "dataset_atom")) { diff --git a/R/SWORD_files.R b/R/SWORD_files.R index 90dfea6..2b3d5d7 100644 --- a/R/SWORD_files.R +++ b/R/SWORD_files.R @@ -121,7 +121,7 @@ add_file <- function(dataset, file, key = Sys.getenv("DATAVERSE_KEY"), server = #' #' # delete a file #' ds <- dataset_statement(dat) -#' delete_file(ds$files[[1]]$id +#' delete_file(ds$files[[1]]$id) #' #' # delete a dataset #' delete_dataset(dat) diff --git a/R/dataverse-package.R b/R/dataverse-package.R index e08c497..022708f 100644 --- a/R/dataverse-package.R +++ b/R/dataverse-package.R @@ -17,9 +17,9 @@ #' } #' #' @references -#' \href{http://guides.dataverse.org/en/latest/api/index.html}{Dataverse API Documentation} +#' \href{https://guides.dataverse.org/en/latest/api/index.html}{Dataverse API Documentation} #' -#' \href{http://dataverse.org/}{Dataverse Homepage} +#' \href{https://dataverse.org/}{Dataverse Homepage} #' #' \href{https://dataverse.harvard.edu/}{Harvard IQSS Dataverse} #' diff --git a/R/get_dataframe.R b/R/get_dataframe.R new file mode 100644 index 0000000..09c9101 --- /dev/null +++ b/R/get_dataframe.R @@ -0,0 +1,149 @@ +#' Get file from dataverse and convert it into a dataframe or tibble +#' +#' `get_dataframe_by_id`, if you know the numeric ID of the dataset, or instead +#' `get_dataframe_by_name` if you know the filename and doi. The dataset +#' +#' @rdname get_dataframe +#' +#' @param filename The name of the file of interest, with file extension, for example +#' `"roster-bulls-1996.tab"`. +#' @param .f The function to used for reading in the raw dataset. This user +#' must choose the appropriate function: for example if the target is a .rds +#' file, then `.f` should be `readRDS` or `readr::read_`rds`. +#' @param original A logical, defaulting to TRUE. Whether to read the ingested, +#' archival version of the dataset if one exists. The archival versions are tab-delimited +#' `.tab` files so if `original = FALSE`, `.f` is set to `readr::read_tsv`. +#' If functions to read the original version is available, then `original = TRUE` +#' with a specified `.f` is better. +#' +#' @inheritDotParams get_file +#' +#' @examples +#' +#' # Retrieve data.frame from dataverse DOI and file name +#' df_from_rds_ingested <- +#' get_dataframe_by_name( +#' filename = "roster-bulls-1996.tab", +#' dataset = "doi:10.70122/FK2/HXJVJU", +#' server = "demo.dataverse.org" +#' ) +#' +#' # Retrieve the same data.frame from dataverse + file DOI +#' df_from_rds_ingested_by_doi <- +#' get_dataframe_by_doi( +#' filedoi = "10.70122/FK2/HXJVJU/SA3Z2V", +#' server = "demo.dataverse.org" +#' ) +#' +#' # Retrieve ingested file originally a Stata dta +#' df_from_stata_ingested <- +#' get_dataframe_by_name( +#' filename = "nlsw88.tab", +#' dataset = "doi:10.70122/FK2/PPIAXE", +#' server = "demo.dataverse.org" +#' ) +#' +#' +#' # To use the original file version, or for non-ingested data, +#' # please specify `original = TRUE` and specify a function in .f. +#' +#' # A data.frame is still returned, but the +#' if (requireNamespace("readr", quietly = TRUE)) { +#' df_from_rds_original <- +#' get_dataframe_by_name( +#' filename = "nlsw88_rds-export.rds", +#' dataset = "doi:10.70122/FK2/PPIAXE", +#' server = "demo.dataverse.org", +#' original = TRUE, +#' .f = readr::read_rds +#' ) +#' } +#' +#' if (requireNamespace("haven", quietly = TRUE)) { +#' df_from_stata_original <- +#' get_dataframe_by_name( +#' filename = "nlsw88.tab", +#' dataset = "doi:10.70122/FK2/PPIAXE", +#' server = "demo.dataverse.org", +#' original = TRUE, +#' .f = haven::read_dta +#' ) +#' } +#' @export +get_dataframe_by_name <- function ( + filename, + dataset = NULL, + .f = NULL, + original = FALSE, + ... +) { + # retrieve ID + fileid <- get_fileid.character(x = dataset, file = filename, ...) + + get_dataframe_by_id(fileid, .f, original = original, ...) +} + +#' @rdname get_dataframe +#' @export +get_dataframe_by_id <- function( + fileid, + .f = NULL, + original = FALSE, + ... +) { + + # if not ingested, then whether to take the original is not relevant. + ingested <- is_ingested(fileid, ...) + + if (isFALSE(ingested)) { + original <- NA + } + + if (is.null(.f) & isTRUE(ingested) & isFALSE(original)) { + message("Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.\n") + .f <- readr::read_tsv + } + + if (is.null(.f) & (isFALSE(ingested) | isTRUE(original))) { + stop("read-in function was left NULL, but the target file is not ingested or you asked for the original version. Please supply a .f argument.\n") + } + + # READ raw data + raw <- get_file(file = fileid, original = original, ...) + + # save to temp and then read it in with supplied function + if (!is.null(.f)) { + get_dataframe_internal(raw, filename = "foo", .f = .f) + } +} + +#' @rdname get_dataframe +#' @inheritParams get_file_by_doi +#' @export +get_dataframe_by_doi <- function ( + filedoi, + .f = NULL, + original = FALSE, + ... +) { + filedoi <- prepend_doi(filedoi) + + # get_file can also take doi now + get_dataframe_by_id(fileid = filedoi, .f = .f, original = original, ...) +} + +#' Write to temp and apply function +#' +#' @keywords internal +get_dataframe_internal <- function (raw, filename, .f) { + tryCatch( + { + tmp <- tempfile(filename) + writeBin(raw, tmp) + do.call(.f, list(tmp)) + }, + finally = { + if (file.exists(tmp)) unlink(tmp) + } + ) +} diff --git a/R/get_dataset.R b/R/get_dataset.R index 039d423..09435e7 100644 --- a/R/get_dataset.R +++ b/R/get_dataset.R @@ -66,7 +66,7 @@ get_dataset <- function( #' @rdname get_dataset #' @param block A character string specifying a metadata block to retrieve. By default this is \dQuote{citation}. Other values may be available, depending on the dataset, such as \dQuote{geospatial} or \dQuote{socialscience}. -#' @importFrom utils str +#' #' @export dataset_metadata <- function( dataset, diff --git a/R/get_file.R b/R/get_file.R index 2934a02..7964f42 100644 --- a/R/get_file.R +++ b/R/get_file.R @@ -1,170 +1,177 @@ #' @rdname files -#' @title Download File(s) -#' @description Download Dataverse File(s) +#' +#' @title Download File +#' +#' @description Download Dataverse File(s). `get_file` is a general wrapper, +#' and can take either dataverse objects, file IDs, or a filename and dataverse. +#' `get_file_by_name` is a shorthand for running `get_file` by +#' specifying a file name (`filename`) and dataset (`dataset`). +#' `get_file_by_doi` obtains a file by its file DOI, bypassing the +#' `dataset` argument. +#' +#' Internally, all functions download each file by `get_file_by_id`. `get_file_*` +#' functions return a raw binary file, which cannot be readily analyzed in R. +#' To use the objects as dataframes, see the `get_dataset_*` functions at \link{get_dataset} +#' #' @details This function provides access to data files from a Dataverse entry. -#' @param file An integer specifying a file identifier; or a vector of integers specifying file identifiers; or, if \code{doi} is specified, a character string specifying a file name within the DOI-identified dataset; or an object of class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}. -#' @template ds -#' @param format A character string specifying a file format. For \code{get_file}: by default, this is \dQuote{original} (the original file format). If \dQuote{RData} or \dQuote{prep} is used, an alternative is returned. If \dQuote{bundle}, a compressed directory containing a bundle of file formats is returned. For \code{get_file_metadata}, this is \dQuote{ddi}. -#' @param vars A character vector specifying one or more variable names, used to extract a subset of the data. +#' +#' @param file An integer specifying a file identifier; or a vector of integers +#' specifying file identifiers; or, if used with the prefix \code{"doi:"}, a +#' character with the file-specific DOI; or, if used without the prefix, a +#' filename accompanied by a dataset DOI in the `dataset` argument, or an object of +#' class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}. +#' @param dataset @kuriwaki, can you please add a description for this parameter? +#' @param format A character string specifying a file format for download. +#' by default, this is \dQuote{original} (the original file format). If `NULL`, +#' no query is added, so ingested files are returned in their ingested TSV form. +#' For tabular datasets, the option \dQuote{bundle} downloads the bundle +#' of the original and archival versions, as well as the documentation. +#' See for details. +#' @param vars A character vector specifying one or more variable names, used to +#' extract a subset of the data. +#' #' @template envvars #' @template dots -#' @return \code{get_file_metadata} returns a character vector containing a DDI metadata file. \code{get_file} returns a raw vector (or list of raw vectors, if \code{length(file) > 1}). +#' @template ds +#' +#' @return \code{get_file} returns a raw vector (or list of raw vectors, +#' if \code{length(file) > 1}), which can be saved locally with the `writeBin` +#' function. To load datasets into the R environment dataframe, see +#' \link{get_dataframe_by_name}. +#' +#' @seealso To load the objects as datasets \link{get_dataframe_by_name}. +#' #' @examples #' \dontrun{ -#' # download file from: -#' # https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ARKOTI -#' monogan <- get_dataverse("monogan") -#' monogan_data <- dataverse_contents(monogan) -#' d1 <- get_dataset("doi:10.7910/DVN/ARKOTI") -#' f <- get_file(d1$files$datafile$id[3]) -#' -#' # check file metadata -#' m1 <- get_file_metadata("constructionData.tab", "doi:10.7910/DVN/ARKOTI") -#' m2 <- get_file_metadata(2437257) -#' -#' # retrieve file based on DOI and filename -#' f2 <- get_file("constructionData.tab", "doi:10.7910/DVN/ARKOTI") -#' f2 <- get_file(2692202) -#' -#' # retrieve file based on "dataverse_file" object -#' flist <- dataset_files(2692151) -#' get_file(flist[[2]]) -#' -#' # retrieve all files in a dataset in their original format (returns a list of raw vectors) -#' file_ids <- get_dataset("doi:10.7910/DVN/CXOB4K")[['files']]$id -#' f3 <- get_file(file_ids, format = "original") -#' # read file as data.frame -#' if (require("rio")) { -#' tmp <- tempfile(fileext = ".dta") -#' writeBin(f, tmp) -#' dat <- haven::read_dta(tmp) -#' -#' # check UNF match -#' # if (require("UNF")) { -#' # unf(dat) %unf% d1$files$datafile$UNF[3] -#' # } -#' } +#' +#' # 1. Using filename and dataverse +#' f1 <- get_file_by_name( +#' filename = "nlsw88.tab", +#' dataset = "10.70122/FK2/PPIAXE", +#' server = "demo.dataverse.org" +#' ) +#' +#' # 2. Using file DOI +#' f2 <- get_file_by_doi( +#' filedoi = "10.70122/FK2/PPIAXE/MHDB0O", +#' server = "demo.dataverse.org" +#' ) +#' +#' # 3. Two-steps: Find ID from get_dataset +#' d3 <- get_dataset("doi:10.70122/FK2/PPIAXE", server = "demo.dataverse.org") +#' f3 <- get_file(d3$files$id[1], server = "demo.dataverse.org") +#' +#' # 4. Retrieve multiple raw data in list +#' f4_vec <- get_dataset( +#' "doi:10.70122/FK2/PPIAXE", +#' server = "demo.dataverse.org" +#' )$files$id +#' +#' f4 <- get_file(f4_vec, server = "demo.dataverse.org") +#' length(f4) +#' +#' # Write binary files +#' # (see `get_dataframe_by_name` to load in environment) +#' # The appropriate file extension needs to be assigned by the user. +#' writeBin(f1, "nlsw88.dta") +#' writeBin(f2, "nlsw88.dta") +#' +#' writeBin(f4[[1]], "nlsw88.rds") # originally a rds file +#' writeBin(f4[[2]], "nlsw88.dta") # originally a dta file #' } -#' @importFrom utils unzip +#' #' @export get_file <- function( file, - dataset = NULL, - format = c("original", "RData", "prep", "bundle"), - # thumb = TRUE, - vars = NULL, - key = Sys.getenv("DATAVERSE_KEY"), - server = Sys.getenv("DATAVERSE_SERVER"), + dataset = NULL, + format = c("original", "bundle"), + vars = NULL, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, ... ) { + format <- match.arg(format) # single file ID if (is.numeric(file)) fileid <- file - # get file ID from 'dataset' - if (!is.numeric(file)) { - if (inherits(file, "dataverse_file")) { - fileid <- get_fileid(file, key = key, server = server) - } else if (is.null(dataset)) { - stop("When 'file' is a character string, dataset must be specified. Or, use a global fileid instead.") - } else { - fileid <- get_fileid(dataset, file, key = key, server = server, ...) - } - } else { - fileid <- file - } + # get file ID from 'dataset'. Streamline in feature relying on get_fileid + if (!is.numeric(file) & inherits(file, "dataverse_file")) + fileid <- get_fileid.dataverse_file(file, key = key, server = server) + if (!is.numeric(file) & !inherits(file, "dataverse_file") & !is.null(dataset)) + fileid <- get_fileid.character(dataset, file, key = key, server = server, ...) - # # request multiple files ----- - # if (length(fileid) > 1) { - # fileid <- paste0(fileid, collapse = ",") - # u <- paste0(api_url(server), "access/datafiles/", fileid) - # r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - # httr::stop_for_status(r) - # tempf <- tempfile(fileext = ".zip") - # tempd <- tempfile() - # dir.create(tempd) - # on.exit(unlink(tempf), add = TRUE) - # on.exit(unlink(tempd), add = TRUE) - # writeBin(httr::content(r, as = "raw"), tempf) - # to_extract <- utils::unzip(tempf, list = TRUE) - # out <- lapply(to_extract$Name[to_extract$Name != "MANIFEST.TXT"], function(zipf) { - # utils::unzip(zipfile = tempf, files = zipf, exdir = tempd) - # readBin(file.path(tempd, zipf), "raw", n = 1e8) - # }) - # return(out) - # } + if (!is.numeric(file) & !inherits(file, "dataverse_file") & is.null(dataset)) { + if (grepl(x = file, pattern = "^doi")) { + fileid <- file # doi is allowed + } else { + stop("When 'file' is a character (non-global ID), dataset must be specified.") + } + } - # downloading files sequentially and add the raw vectors to a list + # Main function. Call get_file_by_id out <- vector("list", length(fileid)) - for (i in 1:length(fileid)) { - if (format == "bundle") { - u <- paste0(api_url(server), "access/datafile/bundle/", fileid[i]) - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - } - if (format != "bundle") { - u <- paste0(api_url(server), "access/datafile/", fileid[i]) - query <- list() - if (!is.null(vars)) { - query$vars <- paste0(vars, collapse = ",") - } - if (!is.null(format)) { - query$format <- match.arg(format) - } - # request single file in non-bundle format ---- - # add query if ingesting a tab (detect from original file name) - if (length(query) == 1 & grepl("\\.tab$", file[i])) { - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, ...) - } else { - # do not add query if not an ingestion file - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - } - } - httr::stop_for_status(r) - out[[i]] <- httr::content(r, as = "raw") - } - # return the raw vector if there's a single file - if (length(out) == 1) { - return (out[[1]]) + for (i in seq_along(fileid)) { + out[[i]] <- get_file_by_id( + fileid = fileid[i], + dataset = dataset, + format = format, + vars = vars, + key = key, + server = server, + original = original, + ... + ) } - else { - # return a list of raw vectors otherwise - return (out) + + if (length(out) == 1L) { # return the raw vector if there's a single file + return(out[[1]]) + } else { + return(out) # return a list of raw vectors otherwise } } -get_file_name_from_header <- function(x) { - gsub("\"", "", strsplit(httr::headers(x)[["content-type"]], "name=")[[1]][2]) -} #' @rdname files -#' @import xml2 +#' +#' @param filename Filename of the dataset, with file extension as shown in Dataverse +#' (for example, if nlsw88.dta was the original but is displayed as the ingested +#' nlsw88.tab, use the ingested version.) +#' #' @export -get_file_metadata <- function( - file, - dataset = NULL, - format = c("ddi", "preprocessed"), - key = Sys.getenv("DATAVERSE_KEY"), - server = Sys.getenv("DATAVERSE_SERVER"), +get_file_by_name <- function ( + filename, + dataset, + format = c("original", "bundle"), + vars = NULL, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, ... ) { - # get file ID from doi - if (!is.numeric(file)) { - if (inherits(file, "dataverse_file")) { - file <- get_fileid(file) - } else if (is.null(dataset)) { - stop("When 'file' is a character string, dataset must be specified. Or, use a global fileid instead.") - } else { - file <- get_fileid(dataset, file, key = key, server = server, ...) - } - } - format <- match.arg(format) - u <- paste0(api_url(server), "access/datafile/", file, "/metadata/", format) - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r) - out <- httr::content(r, as = "text", encoding = "UTF-8") - return(out) - } + format <- match.arg(format) + + + # retrieve ID + fileid <- get_fileid.character( + x = dataset, + file = filename, + server = server, + ... + ) + + get_file_by_id( + fileid, + format = format, + vars = vars, + key = key, + server = server, + original = original, + ... + ) +} diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R new file mode 100644 index 0000000..0e37c74 --- /dev/null +++ b/R/get_file_by_id.R @@ -0,0 +1,110 @@ +#' @rdname files +#' +#' @param original A logical, defaulting to TRUE. If a ingested (.tab) version is +#' available, download the original version instead of the ingested? If there was +#' no ingested version, is set to NA. Note in `get_dataframe_*`, +#' `original` is set to FALSE by default. Either can be changed. +#' @param fileid A numeric ID internally used for `get_file_by_id` +#' +#' @export +get_file_by_id <- function( + fileid, + dataset = NULL, + format = c("original", "bundle"), + vars = NULL, + original = TRUE, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + ... +) { + format <- match.arg(format) + + if (length(fileid) != 1L) { + stop("The `fileid` parameter must be single element.") + } else if (!(inherits(fileid, "numeric") | inherits(fileid, "integer") | inherits(fileid, "character"))) { + stop("The `fileid` data type must be numeric, integer, or character.") + } + # `dataset` place holder. + checkmate::assert_character(format , any.missing = FALSE, len = 1) + # `vars` place holder. + checkmate::assert_logical( original, any.missing = TRUE , len = 1) + checkmate::assert_character(key , any.missing = FALSE, len = 1) + checkmate::assert_character(server , any.missing = FALSE, len = 1) + + # must be a number OR doi string in the form of "doi:" + use_persistent_id <- !is.numeric(fileid) + if (use_persistent_id) { + if (!grepl(x = fileid, pattern = "^doi:")) + stop("A 'persistent' fileid must be prefixed with 'doi:'. It was `", fileid, "`.") + } else { + if (!checkmate::check_integerish(fileid)) + stop("A 'non-persistent' fileid must be a whole number. It was `", fileid, "`.") + } + + # ping get_file_metadata to see if file is ingested + is_ingested <- is_ingested(fileid, server = server) + + # update archival if not specified + if (isFALSE(is_ingested)) + original <- NA + + # create query ----- + query <- list() + if (!is.null(vars)) + query$vars <- paste0(vars, collapse = ",") + + # format only matters in ingested datasets, + # For non-ingested files (rds/docx), we need to NOT specify a format + # also for bundle, only change url + if (is_ingested & format != "bundle") + query$format <- match.arg(format) + + # if the original is not desired, we need to NOT specify a format + if (is_ingested & (isFALSE(original) || is.na(original) || is.null(original))) + query$format <- NULL + + # part of URL depending on DOI, bundle, or file + if (use_persistent_id) { + u_part <- "access/datafile/:persistentId/?persistentId=" + } else if (format == "bundle") { + u_part <- "access/datafile/bundle/" + } else if (format == "original") { + u_part <- "access/datafile/" + } else { + stop("The `format` value should be 'bundle' or 'original', or a doi needs to be passed to `fileid`.") + } + + # If not bundle, request single file in non-bundle format ---- + u <- paste0(api_url(server), u_part, fileid) + r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, ...) + + httr::stop_for_status(r) + httr::content(r, as = "raw") + } + +#' @rdname files +#' @param filedoi A DOI for a single file (not the entire dataset), of the form +#' `"10.70122/FK2/PPIAXE/MHDB0O"` or `"doi:10.70122/FK2/PPIAXE/MHDB0O"` +#' +#' @export +get_file_by_doi <- function( + filedoi, + dataset = NULL, + format = c("original", "bundle"), + vars = NULL, + original = TRUE, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + ... +) { + get_file_by_id( + fileid = prepend_doi(filedoi), + dataset = dataset, + format = format, + vars = vars, + key = key, + server = server, + original = original, + ... + ) +} diff --git a/R/get_file_metadata.R b/R/get_file_metadata.R new file mode 100644 index 0000000..c0fed61 --- /dev/null +++ b/R/get_file_metadata.R @@ -0,0 +1,47 @@ +#' Retrieve a ddi metadata file +#' +#' +#' @param format Defaults to \dQuote{ddi} for metadata files +#' @inheritParams get_file +#' @return A character vector containing a DDI +#' metadata file. +#' +#' @export +get_file_metadata <- + function(file, + dataset = NULL, + format = c("ddi", "preprocessed"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + ...) { + + # get file ID from doi + persistentID <- FALSE + if (!is.numeric(file)) { + if (inherits(file, "dataverse_file")) { + file <- get_fileid(file) + } else if (grepl(x = file, pattern = "^doi:")) { + # if file-specific DOI, then use DOI + persistentID <- TRUE + } else if (is.null(dataset)) { + stop("When 'file' is a character string, dataset must be specified. Or, use a global fileid instead.") + } else { + file <- get_fileid(dataset, file, key = key, server = server, ...) + } + } + + format <- match.arg(format) + + # different URL depending on if you have persistentId + if (persistentID) { + u <- paste0(api_url(server), "access/datafile/:persistentId/metadata/", format, "/?persistentId=", file) + } else { + u <- paste0(api_url(server), "access/datafile/", file, "/metadata/", format) + } + + r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) + httr::stop_for_status(r) + out <- httr::content(r, as = "text", encoding = "UTF-8") + return(out) + } + diff --git a/R/native_roles.R b/R/native_roles.R index a97381f..1a30631 100644 --- a/R/native_roles.R +++ b/R/native_roles.R @@ -1,7 +1,7 @@ # @rdname roles # @title Roles # @description Get, create, update, and delete a Dataverse role -# @details In Dataverse, roles provide one or more users with permissions. Rather than granting several permissions to each user, you can create a role that carries specific permissions and then freely change the roles to which each user is assigned. See \href{http://guides.dataverse.org/en/latest/user/dataverse-management.html#dataverse-permissions}{the Dataverse User Guide} for more details. +# @details In Dataverse, roles provide one or more users with permissions. Rather than granting several permissions to each user, you can create a role that carries specific permissions and then freely change the roles to which each user is assigned. See \href{https://guides.dataverse.org/en/latest/user/dataverse-management.html#dataverse-permissions}{the Dataverse User Guide} for more details. # # Once created using \code{\link{create_role}}, \code{\link{delete_role}} can delete a role. # @template role diff --git a/R/print.R b/R/print.R index 9ad8137..f514a1f 100644 --- a/R/print.R +++ b/R/print.R @@ -33,7 +33,6 @@ print.dataverse <- function(x, ...) { } # dataverse_dataset class -#' @importFrom utils str #' @export print.dataverse_dataset <- function(x, ...) { cat("Dataset (", x$id, "): ", x$persistentUrl, "\n", sep = "") diff --git a/R/utils.R b/R/utils.R index 9766290..9379f54 100644 --- a/R/utils.R +++ b/R/utils.R @@ -81,6 +81,39 @@ get_fileid.dataverse_file <- function(x, ...) { x[["dataFile"]][["id"]] } + +#' Identify if file is an ingested file +#' +#' @param fileid A numeric fileid or file-specific DOI +#' @template envvars +#' +# @examples +# # https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/X5MUPQ/T0KKUZ +# # nlsw88.tab +# is_ingested(fileid = "doi:10.70122/FK2/X5MUPQ/T0KKUZ", +# server = "demo.dataverse.org") +# +# # nlsw88_rds-export.rds +# is_ingested(fileid = "doi:10.70122/FK2/PPIAXE/SUCFNI", +# server = "demo.dataverse.org") +# +is_ingested <- + function( + fileid, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER") + ) { + ping_metadata <- tryCatch( + { + get_file_metadata(fileid, key = key, server = server) + }, + error = function(e) e + ) + !inherits(ping_metadata, "error") # if error, not ingested +} + + + # other functions prepend_doi <- function(dataset) { if (grepl("^hdl", dataset)) { @@ -100,7 +133,6 @@ prepend_doi <- function(dataset) { dataset } -#' @import httr api_url <- function(server = Sys.getenv("DATAVERSE_SERVER"), prefix = "api/") { if (is.null(server) || server == "") { stop("'server' is missing with no default set in DATAVERSE_SERVER environment variable.") @@ -143,3 +175,4 @@ parse_dataset <- function(out) { out$files <- cbind(out$files, file_df) structure(out, class = "dataverse_dataset") } + diff --git a/README.Rmd b/README.Rmd index ef9d212..b4864d4 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,23 +1,48 @@ -# R Client for Dataverse 4 Repositories +--- +title: "R Client for Dataverse 4 Repositories" +output: github_document +--- ```{r knitr_options, echo=FALSE, results="hide"} options(width = 120) knitr::opts_chunk$set(results = "hold") +Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") ``` -[![Dataverse Project logo](http://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png "Dataverse Project")](http://dataverse.org) +[![CRAN Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse) ![Downloads](https://cranlogs.r-pkg.org/badges/dataverse) [![Travis-CI Build Status](https://travis-ci.org/IQSS/dataverse-client-r.png?branch=master)](https://travis-ci.org/IQSS/dataverse-client-r) [![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master) + +[![Dataverse Project logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png "Dataverse Project")](https://dataverse.org) + +The **dataverse** package provides access to [Dataverse 4](https://dataverse.org/) APIs, enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. **dataverse** is the next-generation iteration of [the **dvn** package](https://cran.r-project.org/package=dvn), which works with Dataverse 3 ("Dataverse Network") applications. **dataverse** includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) **sword** package for data deposit and the **UNF** package for data fingerprinting. + +### Getting Started -The **dataverse** package provides access to [Dataverse 4](http://dataverse.org/) APIs, enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. **dataverse** is the next-generation iteration of [the **dvn** package](https://cran.r-project.org/package=dvn), which works with Dataverse 3 ("Dataverse Network") applications. **dataverse** includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) **sword** package for data deposit and the **UNF** package for data fingerprinting. +You can find a stable 2017 release on [CRAN](https://cran.r-project.org/package=dataverse), or install the latest development version from GitHub: -Some features of the Dataverse 4 API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. But, other features require a Dataverse account for the specific server installation of the Dataverse software, and an API key linked to that account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](http://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: +```{r, echo = FALSE, eval = FALSE} +if (!require("remotes")) { + install.packages("remotes") +} +remotes::install_github("iqss/dataverse-client-r") +``` -```R +```{r} +library("dataverse") +``` + +#### Keys + +Some features of the Dataverse 4 API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. But, other features require a Dataverse account for the specific server installation of the Dataverse software, and an API key linked to that account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](https://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: + +``` r Sys.setenv("DATAVERSE_KEY" = "examplekey12345") ``` -Because [there are many Dataverse installations](http://dataverse.org/), all functions in the R client require specifying what server installation you are interacting with. This can be set by default with an environment variable, `DATAVERSE_SERVER`. This should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. For example, the Harvard Dataverse can be used by setting: +#### Server + +Because [there are many Dataverse installations](https://dataverse.org/), all functions in the R client require specifying what server installation you are interacting with. This can be set by default with an environment variable, `DATAVERSE_SERVER`. This should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. For example, the Harvard Dataverse can be used by setting: -```R +``` r Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") ``` @@ -25,64 +50,133 @@ Note: The package attempts to compensate for any malformed values, though. Currently, the package wraps the data management features of the Dataverse API. Functions for other API features - related to user management and permissions - are not currently exported in the package (but are drafted in the [source code](https://github.com/IQSS/dataverse-client-r)). -### Data Discovery +### Data and Metadata Retrieval -Dataverse supplies a pretty robust search API to discover Dataverses, datasets, and files. The simplest searches simply consist of a query string: +The dataverse package provides multiple interfaces to obtain data into R. Users can supply a file DOI, a dataset DOI combined with a filename, or a dataverse object. They can read in the file as a raw binary or a dataset read in with the appropriate R function. -```{r search1} -library("dataverse") -str(dataverse_search("Gary King"), 1) +#### Reading data as R objects + +Use the `get_dataframe_*()` functions, depending on the input you have. For example, we will read a survey dataset on Dataverse, [nlsw88.dta](https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPKHI1/ZYATZZ) (`doi:10.70122/FK2/PPKHI1/ZYATZZ`), originally in Stata dta form. + +With a file DOI, we can use the `get_dataframe_by_doi` function: + +```{r get_dataframe_by_doi} +nlsw <- + get_dataframe_by_doi( + filedoi = "10.70122/FK2/PPIAXE/MHDB0O", + server = "demo.dataverse.org" + ) ``` +which by default reads in the ingested file (not the original dta) by the [`readr::read_tsv`](https://readr.tidyverse.org/reference/read_delim.html) function. -More complicated searches might specify metadata fields: +Alternatively, we can download the same file by specifying the filename and the DOI of the "dataset" (in Dataverse, a collection of files is called a dataset). -```{r search2} -str(dataverse_search(author = "Gary King", title = "Ecological Inference"), 1) +```{r get_dataframe_by_name_tsv, message=FALSE} +nlsw_tsv <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" + ) ``` -And searches can be restricted to specific types of objects (Dataverse, dataset, or file): +Now, Dataverse often translates rectangular data into an ingested, or "archival" version, which is application-neutral and easily-readable. `read_dataframe_*()` defaults to taking this ingested version rather than using the original, through the argument `original = FALSE`. -```{r search3} -str(dataverse_search(author = "Gary King", type = "dataset"), 1) +This default is safe because you may not have the proprietary software that was originally used. On the other hand, the data may have lost information in the process of the ingestation. + +Instead, to read the same file but its original version, specify `original = TRUE` and set an `.f` argument. In this case, we know that `nlsw88.tab` is a Stata `.dta` dataset, so we will use the `haven::read_dta` function. + +```{r get_dataframe_by_name_original} +nlsw_original <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + .f = haven::read_dta, + original = TRUE, + server = "demo.dataverse.org" + ) ``` -The results are paginated using `per_page` argument. To retrieve subsequent pages, specify `start`. +Note that even though the file prefix is ".tab", we use `haven::read_dta`. +Of course, when the dataset is not ingested (such as a Rds file), users would always need to specify an `.f` argument for the specific file. -### Data and Metadata Retrieval -The easiest way to access data from Dataverse is to use a persistent identifier (typically a DOI). You can retrieve the contents of a Dataverse dataset: +Note the difference between `nls_tsv` and `nls_original`. `nls_original` preserves the data attributes like value labels, whereas `nls_tsv` has dropped this or left this in file metadata. -```{r get_dataset} -get_dataset("doi:10.7910/DVN/ARKOTI") +```{r} +class(nlsw_tsv$race) # tab ingested version only has numeric data ``` -Knowing a file name, you can also access that file (e.g., a Stata dataset) directly in R: +```{r} +attr(nlsw_original$race, "labels") # original dta has value labels +``` + + -```{r get_file} -f <- get_file("constructionData.tab", "doi:10.7910/DVN/ARKOTI") -# load it into memory -tmp <- tempfile(fileext = ".dta") -writeBin(as.vector(f), tmp) -dat <- foreign::read.dta(tmp) +#### Reading a dataset as a binary file. + +In some cases, you may not want to read in the data in your environment, perhaps because that is not possible (e.g. for a `.docx` file), and you want to simply write these files your local disk. To do this, use the more primitive `get_file_*` commands. The arguments are equivalent, except we no longer need an `.f` argument + +```{r get_file_by_name} +nlsw_raw <- + get_file_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" + ) +class(nlsw_raw) +``` + +#### Reading file metadata + +The function `get_file_metadata()` can also be used similarly. This will return a metadata format for ingested tabular files in the `ddi` format. The function `get_dataset()` will retrieve the list of files in a dataset. + +```{r, get_dataset} +get_dataset( + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" +) +``` + +### Data Discovery + +Dataverse supplies a robust search API to discover Dataverses, datasets, and files. The simplest searches simply consist of a query string: + +```{r search1, eval = FALSE} +dataverse_search("Gary King") +``` + +More complicated searches might specify metadata fields: + +```{r search2, eval = FALSE} +dataverse_search(author = "Gary King", title = "Ecological Inference") ``` -If you don't know the file name in advance, you can parse the available files returned by `get_dataset()` and retrieve the file using its Dataverse "id" number. +And searches can be restricted to specific types of objects (Dataverse, dataset, or file): +```{r search3, eval = FALSE} +dataverse_search(author = "Gary King", type = "dataset") +``` + +The results are paginated using `per_page` argument. To retrieve subsequent pages, specify `start`. ### Data Archiving Dataverse provides two - basically unrelated - workflows for managing (adding, documenting, and publishing) datasets. The first is built on [SWORD v2.0](http://swordapp.org/sword-v2/). This means that to create a new dataset listing, you will have first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following: -```R +``` r # retrieve your service document d <- service_document() # create a list of metadata -metadat <- list(title = "My Study", - creator = "Doe, John", - description = "An example study") +metadat <- + list( + title = "My Study", + creator = "Doe, John", + description = "An example study" + ) # create the dataset ds <- initiate_sword_dataset("mydataverse", body = metadat) @@ -101,7 +195,7 @@ list_datasets("mydataverse") The second workflow is called the "native" API and is similar but uses slightly different functions: -```R +``` r # create the dataset ds <- create_dataset("mydataverse") @@ -119,22 +213,6 @@ get_dataverse("mydataverse") Through the native API it is possible to update a dataset by modifying its metadata with `update_dataset()` or file contents using `update_dataset_file()` and then republish a new version using `publish_dataset()`. -## Installation - -[![CRAN Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse) -![Downloads](https://cranlogs.r-pkg.org/badges/dataverse) -[![Travis-CI Build Status](https://travis-ci.org/IQSS/dataverse-client-r.png?branch=master)](https://travis-ci.org/IQSS/dataverse-client-r) -[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master) - -You can (eventually) find a stable release on [CRAN](https://cran.r-project.org/package=dataverse), or install the latest development version from GitHub: - -```R -if (!require("remotes")) { - install.packages("remotes") -} -remotes::install_github("iqss/dataverse-client-r") -library("dataverse") -``` - -Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik's [OAIHarvester](https://cran.r-project.org/package=OAIHarvester) and Scott Chamberlain's [oai](https://cran.r-project.org/package=oai), which offer metadata download from any web repository that is compliant with the [Open Archives Initiative](http://www.openarchives.org/) standards. Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses OAIHarvester to interface with [Dryad](http://datadryad.org/). The [rfigshare](https://cran.r-project.org/package=rfigshare) package works in a similar spirit to **dataverse** with [https://figshare.com/](https://figshare.com/). +### Other Installations +Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik's [OAIHarvester](https://cran.r-project.org/package=OAIHarvester) and Scott Chamberlain's [oai](https://cran.r-project.org/package=oai), which offer metadata download from any web repository that is compliant with the [Open Archives Initiative](http://www.openarchives.org/) standards. Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses OAIHarvester to interface with [Dryad](https://datadryad.org/stash). The [rfigshare](https://cran.r-project.org/package=rfigshare) package works in a similar spirit to **dataverse** with . diff --git a/README.md b/README.md index f709041..724349a 100644 --- a/README.md +++ b/README.md @@ -1,198 +1,279 @@ -# R Client for Dataverse 4 Repositories +R Client for Dataverse 4 Repositories +================ + +[![CRAN +Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse) +![Downloads](https://cranlogs.r-pkg.org/badges/dataverse) [![Travis-CI +Build +Status](https://travis-ci.org/IQSS/dataverse-client-r.png?branch=master)](https://travis-ci.org/IQSS/dataverse-client-r) +[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master) +[![Dataverse Project +logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png +"Dataverse Project")](https://dataverse.org) +The **dataverse** package provides access to +[Dataverse 4](https://dataverse.org/) APIs, enabling data search, +retrieval, and deposit, thus allowing R users to integrate public data +sharing into the reproducible research workflow. **dataverse** is the +next-generation iteration of [the **dvn** +package](https://cran.r-project.org/package=dvn), which works with +Dataverse 3 (“Dataverse Network”) applications. **dataverse** includes +numerous improvements for data search, retrieval, and deposit, including +use of the (currently in development) **sword** package for data deposit +and the **UNF** package for data fingerprinting. -[![Dataverse Project logo](http://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png "Dataverse Project")](http://dataverse.org) +### Getting Started -The **dataverse** package provides access to [Dataverse 4](http://dataverse.org/) APIs, enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. **dataverse** is the next-generation iteration of [the **dvn** package](https://cran.r-project.org/package=dvn), which works with Dataverse 3 ("Dataverse Network") applications. **dataverse** includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) **sword** package for data deposit and the **UNF** package for data fingerprinting. +You can find a stable 2017 release on +[CRAN](https://cran.r-project.org/package=dataverse), or install the +latest development version from GitHub: -Some features of the Dataverse 4 API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. But, other features require a Dataverse account for the specific server installation of the Dataverse software, and an API key linked to that account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](http://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: +``` r +library("dataverse") +``` -```R +#### Keys + +Some features of the Dataverse 4 API are public and require no +authentication. This means in many cases you can search for and retrieve +data without a Dataverse account for that a specific Dataverse +installation. But, other features require a Dataverse account for the +specific server installation of the Dataverse software, and an API key +linked to that account. Instructions for obtaining an account and +setting up an API key are available in the [Dataverse User +Guide](https://guides.dataverse.org/en/latest/user/account.html). (Note: +if your key is compromised, it can be regenerated to preserve security.) +Once you have an API key, this should be stored as an environment +variable called `DATAVERSE_KEY`. It can be set within R using: + +``` r Sys.setenv("DATAVERSE_KEY" = "examplekey12345") ``` -Because [there are many Dataverse installations](http://dataverse.org/), all functions in the R client require specifying what server installation you are interacting with. This can be set by default with an environment variable, `DATAVERSE_SERVER`. This should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. For example, the Harvard Dataverse can be used by setting: +#### Server -```R +Because [there are many Dataverse installations](https://dataverse.org/), +all functions in the R client require specifying what server +installation you are interacting with. This can be set by default with +an environment variable, `DATAVERSE_SERVER`. This should be the +Dataverse server, without the “https” prefix or the “/api” URL path, +etc. For example, the Harvard Dataverse can be used by setting: + +``` r Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") ``` -Note: The package attempts to compensate for any malformed values, though. +Note: The package attempts to compensate for any malformed values, +though. -Currently, the package wraps the data management features of the Dataverse API. Functions for other API features - related to user management and permissions - are not currently exported in the package (but are drafted in the [source code](https://github.com/IQSS/dataverse-client-r)). +Currently, the package wraps the data management features of the +Dataverse API. Functions for other API features - related to user +management and permissions - are not currently exported in the package +(but are drafted in the [source +code](https://github.com/IQSS/dataverse-client-r)). -### Data Discovery +### Data and Metadata Retrieval -Dataverse supplies a pretty robust search API to discover Dataverses, datasets, and files. The simplest searches simply consist of a query string: +The dataverse package provides multiple interfaces to obtain data into +R. Users can supply a file DOI, a dataset DOI combined with a filename, +or a dataverse object. They can read in the file as a raw binary or a +dataset read in with the appropriate R function. +#### Reading data as R objects -```r -library("dataverse") -str(dataverse_search("Gary King"), 1) -``` +Use the `get_dataframe_*()` functions, depending on the input you have. +For example, we will read a survey dataset on Dataverse, +[nlsw88.dta](https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPKHI1/ZYATZZ) +(`doi:10.70122/FK2/PPKHI1/ZYATZZ`), originally in Stata dta form. -``` -## 10 of 1043 results retrieved -``` +With a file DOI, we can use the `get_dataframe_by_doi` function: +``` r +nlsw <- + get_dataframe_by_doi( + filedoi = "10.70122/FK2/PPIAXE/MHDB0O", + server = "demo.dataverse.org" + ) ``` -## 'data.frame': 10 obs. of 17 variables: -## $ name : chr "00698McArthur-King-BoxCoverSheets.pdf" "00698McArthur-King-MemoOfAgreement.pdf" "00698McArthur-King-StudyDescription.pdf" "077_mod1_s2m.tab" ... -## $ type : chr "file" "file" "file" "file" ... -## $ url : chr "https://dataverse.harvard.edu/api/access/datafile/101348" "https://dataverse.harvard.edu/api/access/datafile/101349" "https://dataverse.harvard.edu/api/access/datafile/101350" "https://dataverse.harvard.edu/api/access/datafile/2910738" ... -## $ file_id : chr "101348" "101349" "101350" "2910738" ... -## $ description : chr "Describe contents of each box of a paper data set" "Legal agreement between data depositor and Murray Archive" "Overview: abstract, research methodology, publications, and other info." NA ... -## $ published_at : chr "2009-03-05T00:00:00Z" "2009-03-05T00:00:00Z" "2009-03-05T00:00:00Z" "2016-11-09T22:06:10Z" ... -## $ file_type : chr "Adobe PDF" "Adobe PDF" "Adobe PDF" "Tab-Delimited" ... -## $ file_content_type: chr "application/pdf" "application/pdf" "application/pdf" "text/tab-separated-values" ... -## $ size_in_bytes : int 503714 360107 16506 318276 NA NA NA NA NA NA -## $ md5 : chr "" "" "" "af9a6fa00bf29009e9eb5d366ad64660" ... -## $ checksum :'data.frame': 10 obs. of 2 variables: -## $ dataset_citation : chr "Charles C. McArthur; Stanley H. King, 2009, \"Harvard Student Study, 1960-1964\", hdl:1902.1/00698, Harvard Dataverse, V2" "Charles C. McArthur; Stanley H. King, 2009, \"Harvard Student Study, 1960-1964\", hdl:1902.1/00698, Harvard Dataverse, V2" "Charles C. McArthur; Stanley H. King, 2009, \"Harvard Student Study, 1960-1964\", hdl:1902.1/00698, Harvard Dataverse, V2" "International Food Policy Research Institute (IFPRI); Savannah Agricultural Research Institute, 2016, \"Medium "| __truncated__ ... -## $ unf : chr NA NA NA "UNF:6:4mZh78EEGxqFLF71f/Nh/A==" ... -## $ global_id : chr NA NA NA NA ... -## $ citationHtml : chr NA NA NA NA ... -## $ citation : chr NA NA NA NA ... -## $ authors :List of 10 -``` - -More complicated searches might specify metadata fields: - -```r -str(dataverse_search(author = "Gary King", title = "Ecological Inference"), 1) + ## Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE. + + ## + ## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────── + ## cols( + ## idcode = col_double(), + ## age = col_double(), + ## race = col_double(), + ## married = col_double(), + ## never_married = col_double(), + ## grade = col_double(), + ## collgrad = col_double(), + ## south = col_double(), + ## smsa = col_double(), + ## c_city = col_double(), + ## industry = col_double(), + ## occupation = col_double(), + ## union = col_double(), + ## wage = col_double(), + ## hours = col_double(), + ## ttl_exp = col_double(), + ## tenure = col_double() + ## ) + +which by default reads in the ingested file (not the original dta) by +the +[`readr::read_tsv`](https://readr.tidyverse.org/reference/read_delim.html) +function. + +Alternatively, we can download the same file by specifying the filename +and the DOI of the “dataset” (in Dataverse, a collection of files is +called a dataset). + +``` r +nlsw_tsv <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" + ) ``` -``` -## 10 of 1349 results retrieved +Now, Dataverse often translates rectangular data into an ingested, or +“archival” version, which is application-neutral and easily-readable. +`read_dataframe_*()` defaults to taking this ingested version rather +than using the original, through the argument `original = FALSE`. + +This default is safe because you may not have the proprietary software +that was originally used. On the other hand, the data may have lost +information in the process of the ingestation. + +Instead, to read the same file but its original version, specify +`original = TRUE` and set an `.f` argument. In this case, we know that +`nlsw88.tab` is a Stata `.dta` dataset, so we will use the +`haven::read_dta` function. + +``` r +nlsw_original <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + .f = haven::read_dta, + original = TRUE, + server = "demo.dataverse.org" + ) ``` -``` -## 'data.frame': 10 obs. of 17 variables: -## $ name : chr "00531Winter-LiberalArts-Clare-Data.tab" "00698McArthur-King-BoxCoverSheets.pdf" "00698McArthur-King-MemoOfAgreement.pdf" "00698McArthur-King-StudyDescription.pdf" ... -## $ type : chr "file" "file" "file" "file" ... -## $ url : chr "https://dataverse.harvard.edu/api/access/datafile/101725" "https://dataverse.harvard.edu/api/access/datafile/101348" "https://dataverse.harvard.edu/api/access/datafile/101349" "https://dataverse.harvard.edu/api/access/datafile/101350" ... -## $ file_id : chr "101725" "101348" "101349" "101350" ... -## $ description : chr "Clare College data in tab delimited format" "Describe contents of each box of a paper data set" "Legal agreement between data depositor and Murray Archive" "Overview: abstract, research methodology, publications, and other info." ... -## $ published_at : chr "2010-05-10T00:00:00Z" "2009-03-05T00:00:00Z" "2009-03-05T00:00:00Z" "2009-03-05T00:00:00Z" ... -## $ file_type : chr "Tab-Delimited" "Adobe PDF" "Adobe PDF" "Adobe PDF" ... -## $ file_content_type: chr "text/tab-separated-values" "application/pdf" "application/pdf" "application/pdf" ... -## $ size_in_bytes : int 167843 503714 360107 16506 318276 NA 3825612 4012 9054 48213 -## $ md5 : chr "" "" "" "" ... -## $ checksum :'data.frame': 10 obs. of 2 variables: -## $ unf : chr "UNF:3:9ZWOqiilVGnLacm4Qg2EYQ==" NA NA NA ... -## $ dataset_citation : chr "David G. Winter; David C. McClelland; Abigail J. Stewart, 2010, \"New Case for the Liberal Arts, 1974-1978\", h"| __truncated__ "Charles C. McArthur; Stanley H. King, 2009, \"Harvard Student Study, 1960-1964\", hdl:1902.1/00698, Harvard Dataverse, V2" "Charles C. McArthur; Stanley H. King, 2009, \"Harvard Student Study, 1960-1964\", hdl:1902.1/00698, Harvard Dataverse, V2" "Charles C. McArthur; Stanley H. King, 2009, \"Harvard Student Study, 1960-1964\", hdl:1902.1/00698, Harvard Dataverse, V2" ... -## $ global_id : chr NA NA NA NA ... -## $ citationHtml : chr NA NA NA NA ... -## $ citation : chr NA NA NA NA ... -## $ authors :List of 10 -``` +Note that even though the file prefix is “.tab”, we use `read_dta`. -And searches can be restricted to specific types of objects (Dataverse, dataset, or file): +Of course, when the dataset is not ingested (such as a Rds file), users +would always need to specify an `.f` argument for the specific file. +Note the difference between `nls_tsv` and `nls_original`. `nls_original` +preserves the data attributes like value labels, whereas `nls_tsv` has +dropped this or left this in file metadata. -```r -str(dataverse_search(author = "Gary King", type = "dataset"), 1) +``` r +class(nlsw_tsv$race) # tab ingested version only has numeric data ``` -``` -## 10 of 523 results retrieved -``` + ## [1] "numeric" +``` r +attr(nlsw_original$race, "labels") # original dta has value labels ``` -## 'data.frame': 10 obs. of 9 variables: -## $ name : chr "10 Million International Dyadic Events" "A Comparative Study between Gurukul System and Western System of Education" "A Lexicial Index of Electoral Democracy" "A Unified Model of Cabinet Dissolution in Parliamentary Democracies" ... -## $ type : chr "dataset" "dataset" "dataset" "dataset" ... -## $ url : chr "http://hdl.handle.net/1902.1/FYXLAWZRIA" "http://dx.doi.org/10.7910/DVN/329UAV" "http://dx.doi.org/10.7910/DVN/29106" "http://dx.doi.org/10.3886/ICPSR01115.v1" ... -## $ global_id : chr "hdl:1902.1/FYXLAWZRIA" "doi:10.7910/DVN/329UAV" "doi:10.7910/DVN/29106" "doi:10.3886/ICPSR01115.v1" ... -## $ description : chr "When the Palestinians launch a mortar attack into Israel, the Israeli army does not wait until the end of the c"| __truncated__ "India, in ancient times has witnessed students which used to be like the great king Vikramaditya. He followed t"| __truncated__ "We operationalize electoral democracy as a series of necessary-and-sufficient conditions arrayed in an ordinal "| __truncated__ "The literature on cabinet duration is split between two apparently irreconcilable positions. The ATTRIBUTES THE"| __truncated__ ... -## $ published_at: chr "2014-08-21T00:00:00Z" "2016-06-07T13:09:20Z" "2016-08-05T20:42:31Z" "2015-04-09T04:13:54Z" ... -## $ citationHtml: chr "King, Gary; Lowe, Will, 2008, \"10 Million International Dyadic Events\", . diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..9695ab4 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,72 @@ +url: https://IQSS.github.io/dataverse-client-r + +template: + params: + bootswatch: sandstone + # docsearch: + # api_key: eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee + # index_name: dataverse-client-r + +home: + links: +# - text: Ask a question +# href: http://discourse.mc-stan.org/ + +development: + mode: release + +navbar: + title: "Dataverse R Client" + # type: inverse + +articles: +- title: "Getting Started" + navbar: ~ + desc: > + These vignettes provide an introduction to Dataverse with R. + contents: + - 'A-introduction' + - 'B-search' + - 'C-retrieval' + - 'D-archiving' + +reference: +- title: "Retrieve" + contents: + - '`get_file`' + - '`get_dataframe_by_name`' + - '`get_dataset`' + - '`get_dataverse`' + - '`get_facets`' + - '`get_file_metadata`' + - '`get_user_key`' + +- title: "Create, Add, & Publish" + contents: + - '`create_dataset`' + - '`create_dataverse`' + - '`add_dataset_file`' + - '`add_file`' + - '`publish_dataset`' + - '`publish_dataverse`' + - '`publish_sword_dataset`' + +- title: "Delete" + contents: + - '`delete_dataset`' + - '`delete_dataverse`' + - '`delete_file`' + - '`delete_sword_dataset`' + +- title: Other + contents: + - '`dataset_atom`' + - '`dataset_versions`' + - '`dataverse`' + - '`dataverse_metadata`' + - '`dataverse_search`' + - '`initiate_sword_dataset`' + - '`is_ingested`' + - '`list_datasets`' + - '`service_document`' + - '`set_dataverse_metadata`' diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..c20f9e2 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,169 @@ + + + + + + + + +Page not found (404) • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+ + +Content not found. Please use links in the navbar. + +
+ + + +
+ + + +
+ + +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + + + diff --git a/docs/ISSUE_TEMPLATE.html b/docs/ISSUE_TEMPLATE.html new file mode 100644 index 0000000..c52c35a --- /dev/null +++ b/docs/ISSUE_TEMPLATE.html @@ -0,0 +1,200 @@ + + + + + + + + +NA • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+ + + +

Please specify whether your issue is about:

+
    +
  • + +a possible bug
  • +
  • + +a question about package functionality
  • +
  • + +a suggested code or documentation change, improvement to the code, or feature request
  • +
+

If you are reporting (1) a bug or (2) a question about code, please supply:

+ +

Put your code here:

+
+## load package
+library("dataverse")
+
+## code goes here
+
+
+## session info for your system
+sessionInfo()
+ + +
+ + + +
+ + + +
+ + +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + + + diff --git a/docs/PULL_REQUEST_TEMPLATE.html b/docs/PULL_REQUEST_TEMPLATE.html new file mode 100644 index 0000000..6de5746 --- /dev/null +++ b/docs/PULL_REQUEST_TEMPLATE.html @@ -0,0 +1,192 @@ + + + + + + + + +NA • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+ + + +

Please ensure the following before submitting a PR:

+
    +
  • + +if suggesting code changes or improvements, open an issue first
  • +
  • + +for all but trivial changes (e.g., typo fixes), add your name to DESCRIPTION +
  • +
  • + +for all but trivial changes (e.g., typo fixes), documentation your change in NEWS.md with a parenthetical reference to the issue number being addressed
  • +
  • + +if changing documentation, edit files in /R not /man and run devtools::document() to update documentation
  • +
  • + +add code or new test files to /tests for any new functionality or bug fix
  • +
  • + +make sure R CMD check runs without error before submitting the PR
  • +
+ + +
+ + + +
+ + + +
+ + +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/A-introduction.html b/docs/articles/A-introduction.html new file mode 100644 index 0000000..0bdf2f6 --- /dev/null +++ b/docs/articles/A-introduction.html @@ -0,0 +1,272 @@ + + + + + + + +Introduction to Dataverse • dataverse + + + + + + + + + + +
+
+ + + + +
+
+ + + + +

The dataverse package is the official R client for Dataverse 4 data repositories. The package enables data search, retrieval, and deposit with any Dataverse installation, thus allowing R users to integrate public data sharing into the reproducible research workflow.

+

In addition to this introduction, the package contains three additional vignettes covering:

+ +

They can be accessed from CRAN or from within R using vignettes(package = "dataverse").

+

The dataverse client package can be installed from CRAN, and you can find the latest development version and report any issues on GitHub:

+
+if (!require("remotes")) {
+    install.packages("remotes")
+}
+remotes::install_github("iqss/dataverse-client-r")
+library("dataverse")
+

(Note: dataverse is the next-generation iteration of the dvn package, which works with Dataverse 3 (“Dataverse Network”) applications. See the appendix of this vignette for a cross-walk of functionality between dvn and dataverse.)

+
+

+Quick Start

+

Dataverse has some terminology that is worth quickly reviewing before showing how to work with Dataverse in R. Dataverse is an application that can be installed in many places. As a result, dataverse can work with any instllation but you need to specify which installation you want to work with. This can be set by default with an environment variable, DATAVERSE_SERVER:

+
+library("dataverse")
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+

This should be the Dataverse server, without the “https” prefix or the “/api” URL path, etc. The package attempts to compensate for any malformed values, though.

+

Within a given Dataverse installation, organizations or individuals can create objects that are also called “Dataverses”. These Dataverses can then contain other dataverses, which can contain other dataverses, and so on. They can also contain datasets which in turn contain files. You can think of Harvard’s Dataverse as a top-level installation, where an institution might have a dataverse that contains a subsidiary dataverse for each researcher at the organization, who in turn publishes all files relevant to a given study as a dataset.

+

You can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. For example, to search for data files or datasets that mention “ecological inference”, we can just do:

+
+dataverse_search("ecological inference")[c("name", "type", "description")]
+

The search vignette describes this functionality in more detail. To retrieve a data file, we need to investigate the dataset being returned and look at what files it contains using a variety of functions, the last of which - get_file() - can retrieve the files as raw vectors:

+ +

For “native” Dataverse features (such as user account controls) or to create and publish a dataset, you will need an API key linked to a Dataverse installation account. Instructions for obtaining an account and setting up an API key are available in the Dataverse User Guide. (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called DATAVERSE_KEY. It can be set within R using:

+
+Sys.setenv("DATAVERSE_KEY" = "examplekey12345")
+

With that set, you can easily create a new dataverse, create a dataset within that dataverse, push files to the dataset, and release it:

+
+# create a dataverse
+dat <- create_dataverse("mydataverse")
+
+# create a list of metadata
+metadat <- list(title = "My Study",
+                creator = "Doe, John",
+                description = "An example study")
+
+# create the dataset
+dat <- initiate_dataset("mydataverse", body = metadat)
+
+# add files to dataset
+tmp <- tempfile()
+write.csv(iris, file = tmp)
+f <- add_file(dat, file = tmp)
+
+# publish new dataset
+publish_dataset(dat)
+

Your data are now publicly accessible.

+
+
+

+Appendix: dvn to dataverse Crosswalk

+

The original Dataverse client for R was called dvn; it worked with Dataverse versions <= 3 and was removed from CRAN in favor of dataverse in 2018. dvn provided functionality for searching, retrieving, and depositing data. Here is a cross-walk of functionality in case you were already familiar with the dvn package:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
API Category +dataverse functions +dvn functions
Data Searchdataverse_search()dvSearch()
Data Retrievalget_file_metadata()dvMetadata()
get_file()
Data Depositcreate_dataverse()
initiate_dataset()dvCreateStudy()
update_dataset()dvEditStudy()
add_file()addFile()
delete_file()dvDeleteFile()
publish_sword_dataset()dvReleaseStudy()
delete_sword_dataset()
service_document()dvServiceDoc()
dataset_statement()dvStudyStatement()
list_datasets()dvUserStudies()
+
+
+ + + +
+ + + +
+ +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + diff --git a/docs/articles/A-introduction_files/accessible-code-block-0.0.1/empty-anchor.js b/docs/articles/A-introduction_files/accessible-code-block-0.0.1/empty-anchor.js new file mode 100644 index 0000000..ca349fd --- /dev/null +++ b/docs/articles/A-introduction_files/accessible-code-block-0.0.1/empty-anchor.js @@ -0,0 +1,15 @@ +// Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> +// v0.0.1 +// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. + +document.addEventListener('DOMContentLoaded', function() { + const codeList = document.getElementsByClassName("sourceCode"); + for (var i = 0; i < codeList.length; i++) { + var linkList = codeList[i].getElementsByTagName('a'); + for (var j = 0; j < linkList.length; j++) { + if (linkList[j].innerHTML === "") { + linkList[j].setAttribute('aria-hidden', 'true'); + } + } + } +}); diff --git a/docs/articles/A-introduction_files/header-attrs-2.6/header-attrs.js b/docs/articles/A-introduction_files/header-attrs-2.6/header-attrs.js new file mode 100644 index 0000000..dd57d92 --- /dev/null +++ b/docs/articles/A-introduction_files/header-attrs-2.6/header-attrs.js @@ -0,0 +1,12 @@ +// Pandoc 2.9 adds attributes on both header and div. We remove the former (to +// be compatible with the behavior of Pandoc < 2.8). +document.addEventListener('DOMContentLoaded', function(e) { + var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); + var i, h, a; + for (i = 0; i < hs.length; i++) { + h = hs[i]; + if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 + a = h.attributes; + while (a.length > 0) h.removeAttribute(a[0].name); + } +}); diff --git a/docs/articles/B-search.html b/docs/articles/B-search.html new file mode 100644 index 0000000..dff2407 --- /dev/null +++ b/docs/articles/B-search.html @@ -0,0 +1,204 @@ + + + + + + + +Data Search and Discovery • dataverse + + + + + + + + + + +
+
+ + + + +
+
+ + + + +

Searching for data within Dataverse is quite easy using the dataverse_search() function. The simplest searches simply consist of a query string:

+
+library("dataverse")
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+dataverse_search("Gary King")[c("name")]
+
## 10 of 1043 results retrieved
+
##                                                                          name
+## 1                                       00698McArthur-King-BoxCoverSheets.pdf
+## 2                                      00698McArthur-King-MemoOfAgreement.pdf
+## 3                                     00698McArthur-King-StudyDescription.pdf
+## 4                                                            077_mod1_s2m.tab
+## 5                                      10 Million International Dyadic Events
+## 6             1998 Jewish Community Study of the Coachella Valley, California
+## 7                                               2002 State Legislative Survey
+## 8  A Comparative Study between Gurukul System and Western System of Education
+## 9    A Demographic and Attitudinal Study of the Jewish Community of St. Louis
+## 10       A Demographic Study of the Jewish Community of Atlantic County, 1985
+

The results are paginated, so users can rely upon the per_page and start argument to requested subsequent pages of results. We’ll start at 6 and to show that we retrieve the last five results from the previous query plus 15 more (due to per_page = 20):

+
+dataverse_search("Gary King", start = 6, per_page = 20)[c("name")]
+
## 20 of 1043 results retrieved
+
##                                                                          name
+## 1                                               2002 State Legislative Survey
+## 2  A Comparative Study between Gurukul System and Western System of Education
+## 3    A Demographic and Attitudinal Study of the Jewish Community of St. Louis
+## 4        A Demographic Study of the Jewish Community of Atlantic County, 1985
+## 5          A Demographic Study of the Jewish Community of Greater Kansas City
+## 6     A Demographic Study of the Jewish Community of Greater Washington, 1983
+## 7                                     A Lexicial Index of Electoral Democracy
+## 8         A Population Study of the Jewish Community of Metrowest, New Jersey
+## 9               A Population Study of the Jewish Community of Rochester, 1986
+## 10                    A Population Study of the Jewish Community of Worcester
+## 11                                  A Study of Jewish Culture in the Bay Area
+## 12        A Unified Model of Cabinet Dissolution in Parliamentary Democracies
+## 13                        ABC News / The Washington Post  Poll: January, 1988
+## 14 ABC News / The Washington Post poll # 7925:  Social Security/1984 Election
+## 15                        ABC News / The Washington Post Poll: December, 1987
+## 16                                     ABC News Gary Hart Poll, December 1987
+## 17                                     ABC News Gary Hart Poll, December 1987
+## 18                                            ABC News Iraq Poll, August 1990
+## 19                                   ABC News Kosovo Peace Poll #1, June 1999
+## 20                    ABC News New Hampshire Primary Voter Poll, January 2000
+

More complicated searches can specify metadata fields like title and restrict results to a specific type of Dataverse object (a “dataverse”, “dataset”, or “file”):

+
+ei <- dataverse_search(author = "Gary King", title = "Ecological Inference", type = "dataset", per_page = 20)
+
## 20 of 867 results retrieved
+
+# fields returned
+names(ei)
+# names of datasets
+ei$name
+
## [1] "name"         "type"         "url"          "global_id"    "description"  "published_at" "citationHtml"
+## [8] "citation"     "authors"
+##  [1] "10 Million International Dyadic Events"
+##  [2] "3D Dust map from Green et al. (2015)"
+##  [3] "[KRISNA02]³ New Religious Movements : Case of ISKCON"
+##  [4] "A Comparative Study between Gurukul System and Western System of Education"
+##  [5] "A Lexicial Index of Electoral Democracy"
+##  [6] "A Statistical Inference Engine for Small, Dependent Samples  [Version 2.310]"
+##  [7] "A Unified Model of Cabinet Dissolution in Parliamentary Democracies"
+##  [8] "ABC News / The Washington Post poll # 7925:  Social Security/1984 Election"
+##  [9] "ABC News Iraq Poll, August 1990"
+## [10] "ABC News/The Washington Post Poll:  Los Angeles Race Riots"
+## [11] "ABC News/The Washington Post Poll:  Race Relations"
+## [12] "ABC News/Washington Post Los Angeles Beating Poll, April 1992"
+## [13] "ABC News/Washington Post Poll #1, September 1990"
+## [14] "ABC News/Washington Post Race Relations Poll, May 1992"
+## [15] "ABC News/Washington Post Reagan 100 Days Poll, April 1981"
+## [16] "Afrobarometer Round 3: The Quality of Democracy and Governance in 18 African Countries, 2005-2006"
+## [17] "Afrobarometer Round 3: The Quality of Democracy and Governance in Benin, 2005"
+## [18] "Afrobarometer Round 3: The Quality of Democracy and Governance in Botswana, 2005"
+## [19] "Afrobarometer Round 3: The Quality of Democracy and Governance in Cape Verde, 2005"
+## [20] "Afrobarometer Round 3: The Quality of Democracy and Governance in Ghana, 2005"
+

Once datasets and files are identified, it is easy to download and use them directly in R. See the “Data Retrieval” vignette for details.

+
+ + + +
+ + + +
+ +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + diff --git a/docs/articles/B-search_files/accessible-code-block-0.0.1/empty-anchor.js b/docs/articles/B-search_files/accessible-code-block-0.0.1/empty-anchor.js new file mode 100644 index 0000000..ca349fd --- /dev/null +++ b/docs/articles/B-search_files/accessible-code-block-0.0.1/empty-anchor.js @@ -0,0 +1,15 @@ +// Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> +// v0.0.1 +// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. + +document.addEventListener('DOMContentLoaded', function() { + const codeList = document.getElementsByClassName("sourceCode"); + for (var i = 0; i < codeList.length; i++) { + var linkList = codeList[i].getElementsByTagName('a'); + for (var j = 0; j < linkList.length; j++) { + if (linkList[j].innerHTML === "") { + linkList[j].setAttribute('aria-hidden', 'true'); + } + } + } +}); diff --git a/docs/articles/B-search_files/header-attrs-2.6/header-attrs.js b/docs/articles/B-search_files/header-attrs-2.6/header-attrs.js new file mode 100644 index 0000000..dd57d92 --- /dev/null +++ b/docs/articles/B-search_files/header-attrs-2.6/header-attrs.js @@ -0,0 +1,12 @@ +// Pandoc 2.9 adds attributes on both header and div. We remove the former (to +// be compatible with the behavior of Pandoc < 2.8). +document.addEventListener('DOMContentLoaded', function(e) { + var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); + var i, h, a; + for (i = 0; i < hs.length; i++) { + h = hs[i]; + if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 + a = h.attributes; + while (a.length > 0) h.removeAttribute(a[0].name); + } +}); diff --git a/docs/articles/C-retrieval.html b/docs/articles/C-retrieval.html new file mode 100644 index 0000000..b5258d7 --- /dev/null +++ b/docs/articles/C-retrieval.html @@ -0,0 +1,339 @@ + + + + + + + +Data Retrieval and Reuse • dataverse + + + + + + + + + + +
+
+ + + + +
+
+ + + + +

This vignette shows how to download data from Dataverse using the dataverse package. We’ll focus on a Dataverse repository that contains supplemental files for Jamie Monogan’s book Political Analysis Using R, which is stored at Harvard University’s IQSS Dataverse Network:

+
+

Monogan, Jamie, 2015, “Political Analysis Using R: Example Code and Data, Plus Data for Practice Problems”, doi:10.7910/DVN/ARKOTI, Harvard Dataverse, V1, UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==

+
+

This study is persistently retrievable by a “Digital Object Identifier (DOI)”: https://doi.org/10.7910/DVN/ARKOTI and the citation above (taken from the Dataverse page) includes a “Universal Numeric Fingerprint (UNF)”: UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==, which provides a versioned, multi-file hash for the entire study, which contains 32 files.

+

If you don’t already know what datasets and files you want to use from Dataverse, see the “Data Search” vignette for guidance on data search and discovery.

+
+

+Retrieving Dataset and File Metadata

+

We will download these files and examine them directly in R using the dataverse package. To begin, we need to loading the package and using the get_dataset() function to retrieve some basic metadata about the dataset:

+
+library("dataverse")
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+(dataset <- get_dataset("doi:10.7910/DVN/ARKOTI"))
+
## Dataset (75170):
+## Version: 1.0, RELEASED
+## Release Date: 2015-07-07T02:57:02Z
+## License: CC0
+## 17 Files:
+##                           label version      id                  contentType
+## 1                  alpl2013.tab       2 2692294    text/tab-separated-values
+## 2                   BPchap7.tab       2 2692295    text/tab-separated-values
+## 3                   chapter01.R       2 2692202 text/plain; charset=US-ASCII
+## 4                   chapter02.R       2 2692206 text/plain; charset=US-ASCII
+## 5                   chapter03.R       2 2692210 text/plain; charset=US-ASCII
+## 6                   chapter04.R       2 2692204 text/plain; charset=US-ASCII
+## 7                   chapter05.R       2 2692205 text/plain; charset=US-ASCII
+## 8                   chapter06.R       2 2692212 text/plain; charset=US-ASCII
+## 9                   chapter07.R       2 2692209 text/plain; charset=US-ASCII
+## 10                  chapter08.R       2 2692208 text/plain; charset=US-ASCII
+## 11                  chapter09.R       2 2692211 text/plain; charset=US-ASCII
+## 12                  chapter10.R       1 2692203 text/plain; charset=US-ASCII
+## 13                  chapter11.R       1 2692207 text/plain; charset=US-ASCII
+## 14 comprehensiveJapanEnergy.tab       2 2692296    text/tab-separated-values
+## 15         constructionData.tab       2 2692293    text/tab-separated-values
+## 16             drugCoverage.csv       1 2692233 text/plain; charset=US-ASCII
+## 17         hanmerKalkanANES.tab       2 2692290    text/tab-separated-values
+## 18                 hmnrghts.tab       2 2692298    text/tab-separated-values
+## 19                 hmnrghts.txt       1 2692238                   text/plain
+## 20                   levant.tab       2 2692289    text/tab-separated-values
+## 21                       LL.csv       1 2692228 text/plain; charset=US-ASCII
+## 22                 moneyDem.tab       2 2692292    text/tab-separated-values
+## 23            owsiakJOP2013.tab       2 2692297    text/tab-separated-values
+## 24                PESenergy.csv       1 2692230 text/plain; charset=US-ASCII
+## 25                  pts1994.csv       1 2692229 text/plain; charset=US-ASCII
+## 26                  pts1995.csv       1 2692231 text/plain; charset=US-ASCII
+## 27                 sen113kh.ord       1 2692239 text/plain; charset=US-ASCII
+## 28                SinghEJPR.tab       2 2692299    text/tab-separated-values
+## 29                 SinghJTP.tab       2 2692288    text/tab-separated-values
+## 30                 stdSingh.tab       2 2692291    text/tab-separated-values
+## 31                       UN.csv       1 2692232 text/plain; charset=US-ASCII
+## 32                  war1800.tab       2 2692300    text/tab-separated-values
+

The output prints some basic metadata and then the str() of the files data frame returned by the call. This lists all of the files in the dataset along with a considerable amount of metadata about each. We can see a quick glance at these files using:

+
dataset$files[c("filename", "contentType")]
+

This shows that there are indeed 32 files, a mix of .R code files and tab- and comma-separated data files.

+

You can also retrieve more extensive metadata using dataset_metadata():

+
+str(dataset_metadata("doi:10.7910/DVN/ARKOTI"), 1)
+
## List of 2
+##  $ displayName: chr "Citation Metadata"
+##  $ fields     :'data.frame': 7 obs. of  4 variables:
+

We’ll focus here on the code and data files for Chapter 2 from the book.

+
+
+

+Retrieving Files

+

Let’s start by grabbing the code using get_file() (note that this always returns a raw vector):

+
+code3 <- get_file("chapter03.R", "doi:10.7910/DVN/ARKOTI")
+writeBin(code3, "chapter03.R")
+

Now we’ll get the corresponding data and save it locally. For this code we need two data files:

+
+writeBin(get_file("constructionData.tab", "doi:10.7910/DVN/ARKOTI"),
+         "constructionData.dta")
+writeBin(get_file("PESenergy.csv", "doi:10.7910/DVN/ARKOTI"),
+         "PESenergy.csv")
+

To confirm that the data look the way we want, we can also (perhaps alternatively) load it directly into R:

+
+constructionData <- foreign::read.dta("constructionData.dta")
+str(constructionData)
+PESenergy <- utils::read.table("PESenergy.csv")
+str(PESenergy)
+
## 'data.frame':    50 obs. of  55 variables:
+##  $ year                      : int  1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 ...
+##  $ stno                      : int  1 2 3 4 5 6 7 8 9 10 ...
+##  $ totalreg                  : int  329 500 314 963 2106 643 634 239 1996 880 ...
+##  $ totalhealth               : int  300 424 263 834 1859 554 501 204 1640 732 ...
+##  $ raneyfolded97             : num  0.58 0.69 0.85 0.63 0.5 ...
+##  $ healthagenda97            : int  49 180 137 220 1409 153 324 40 408 157 ...
+##  $ predictedtotalig          : num  51.8 99 81.8 111.2 224.1 ...
+##  $ supplytotalhealth         : int  1168 6991 4666 9194 70014 8847 7845 1438 35363 13471 ...
+##  $ totalhealthsupplysq       : int  136 4887 2177 8453 490196 7827 6154 207 125054 18147 ...
+##  $ partratetotalhealth       : num  2.48 1.09 1.09 1.4 0.35 ...
+##  $ ighealthcare              : int  29 76 51 129 247 89 133 35 356 148 ...
+##  $ supplydirectpatientcare   : int  1137 6687 4458 8785 66960 8320 7439 1365 33793 12760 ...
+##  $ dpcsupplysq               : int  129 4472 1987 7718 448364 6922 5534 186 114197 16282 ...
+##  $ partratedpc               : num  1.14 0.51 0.43 0.68 0.17 ...
+##  $ igdpcare                  : int  13 34 19 60 112 40 67 12 212 74 ...
+##  $ supplypharmprod           : int  0 174 78 229 2288 340 202 36 962 360 ...
+##  $ pharmsupplysq             : int  0 30276 6084 52441 5234944 115600 40804 1296 925444 129600 ...
+##  $ partratepharmprod         : num  0 10.34 19.23 5.24 2.05 ...
+##  $ igpharmprod               : int  4 18 15 12 47 23 22 12 46 32 ...
+##  $ supplybusiness            : int  0 51 28 93 315 55 36 14 317 78 ...
+##  $ businesssupplysq          : int  0 2601 784 8649 99225 3025 1296 196 100489 6084 ...
+##  $ partratebusness           : num  0 1.96 14.29 15.05 6.03 ...
+##  $ igbusiness                : int  2 1 4 14 19 5 4 2 25 6 ...
+##  $ supplygovt                : int  14 26 80 23 70 71 105 2 67 176 ...
+##  $ govsupplysq               : num  0.02 0.07 0.64 0.05 0.49 ...
+##  $ partrategov               : num  0 38.5 2.5 30.4 10 ...
+##  $ iggovt                    : int  0 10 2 7 7 1 8 0 12 2 ...
+##  $ supplyadvocacy            : int  16 37 14 57 344 54 51 18 206 76 ...
+##  $ advossq                   : int  256 1369 196 3249 118336 2916 2601 324 42436 5776 ...
+##  $ partrateadvo              : num  31.25 16.22 28.57 31.58 8.72 ...
+##  $ ig97advoc                 : int  5 6 4 18 30 7 9 4 26 17 ...
+##  $ rnmedschools              : int  1 16 8 7 37 7 12 3 18 21 ...
+##  $ rnmedschoolssq            : int  1 256 64 49 1369 49 144 9 324 441 ...
+##  $ rnmedschoolpartrate       : num  100 0 12.5 28.57 5.41 ...
+##  $ rnmedschooligs            : int  1 0 1 2 2 0 1 0 6 1 ...
+##  $ healthprofessionals       : int  12890 128980 82140 122760 749620 111550 121110 22740 471270 215670 ...
+##  $ healthprofessionalssquared: int  16615 1663584 674698 1507002 56193014 1244340 1466763 51711 22209541 4651355 ...
+##  $ partrateprofessionals     : num  0.03 0.01 0.01 0.01 0 ...
+##  $ ighealthprofessionals     : int  4 7 6 16 30 13 22 5 29 16 ...
+##  $ predictdpcpartrate        : num  1.175 0.915 1.016 0.826 0.348 ...
+##  $ predictdpcig              : num  23.1 49.7 39.4 58.8 103.5 ...
+##  $ predictprofpartrate       : num  0.02475 0.01383 0.01788 0.01434 0.00579 ...
+##  $ predictprofig             : num  7.59 12.58 10.69 12.34 22.47 ...
+##  $ predictmedschoolparttrate : num  17.39 8.08 12.3 12.95 5.02 ...
+##  $ predictmedschoolig        : num  0.355 1.269 0.774 0.713 2.65 ...
+##  $ predictadvopartrate       : num  31.9 26.4 32.5 21.6 13 ...
+##  $ predictadvoig             : num  5.96 7.98 5.76 9.83 28.53 ...
+##  $ predictbuspartrate        : num  25.78 18.08 21.33 13.1 7.27 ...
+##  $ predictbusig              : num  2.58 7.96 5.66 11.66 20.04 ...
+##  $ predictpharmpartrate      : num  21.38 15.22 18.52 13.44 4.14 ...
+##  $ predictpharmig            : num  11.3 18.1 14.4 20.1 45.1 ...
+##  $ predictgovpartrate        : num  14.41 12.61 5.84 13.03 6.93 ...
+##  $ predictgovig              : num  2.06 2.43 3.78 2.35 3.57 ...
+##  $ predicttotalpartrate      : num  2.41 1.823 2.047 1.623 0.752 ...
+##  $ predicttotalig            : num  54.2 99.2 81.9 114.8 228.3 ...
+##  - attr(*, "datalabel")= chr ""
+##  - attr(*, "time.stamp")= chr " 1 Jun 2013 16:59"
+##  - attr(*, "formats")= chr  "%8.0g" "%8.0g" "%8.0g" "%8.0g" ...
+##  - attr(*, "types")= int  252 251 252 252 254 252 254 253 253 254 ...
+##  - attr(*, "val.labels")= chr  "" "" "" "" ...
+##  - attr(*, "var.labels")= chr  "Year" "StNo." "97 TotalReg" "97Total-Health" ...
+##  - attr(*, "version")= int 12
+## 'data.frame':    181 obs. of  1 variable:
+##  $ V1: Factor w/ 181 levels "Apr-69,5,3.4,60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39.2",..: 31 62 47 107 1 122 92 77 16 167 ...
+

In addition to visual inspection, we can compare the UNF signatures for each dataset against what is reported by Dataverse to confirm that we received the correct files:

+
+library("UNF")
+unf(constructionData)
+unf(PESenergy)
+dataset$files[c("label", "UNF")]
+
## UNF6:+4pc5114xS0ryr1sSvdX6g==
+## UNF6:TD7TEMZyrX4iGTlTsUKQDg==
+##                           label                            UNF
+## 1                  alpl2013.tab UNF:6:d9ZNXvmiPfiunSAiXRpVfg==
+## 2                   BPchap7.tab UNF:6:B3/HJbnzktaX5eEJA2ItiA==
+## 3                   chapter01.R                           <NA>
+## 4                   chapter02.R                           <NA>
+## 5                   chapter03.R                           <NA>
+## 6                   chapter04.R                           <NA>
+## 7                   chapter05.R                           <NA>
+## 8                   chapter06.R                           <NA>
+## 9                   chapter07.R                           <NA>
+## 10                  chapter08.R                           <NA>
+## 11                  chapter09.R                           <NA>
+## 12                  chapter10.R                           <NA>
+## 13                  chapter11.R                           <NA>
+## 14 comprehensiveJapanEnergy.tab UNF:6:Vhb3oZb9m4Nk9N7s6UAHGg==
+## 15         constructionData.tab UNF:6:+4pc5114xS0ryr1sSvdX6g==
+## 16             drugCoverage.csv                           <NA>
+## 17         hanmerKalkanANES.tab UNF:6:lrQrhDAXFc8lSRP9muJslw==
+## 18                 hmnrghts.tab UNF:6:uEg24jBA2ht0P4WeNLjI+w==
+## 19                 hmnrghts.txt                           <NA>
+## 20                   levant.tab UNF:6:zlgG7+JXsIZYvS383eQOvA==
+## 21                       LL.csv                           <NA>
+## 22                 moneyDem.tab UNF:6:7M/QM5i6IM/VUM94UJjJUQ==
+## 23            owsiakJOP2013.tab UNF:6:0ZEvCFuUQms2zYD57hmwNQ==
+## 24                PESenergy.csv                           <NA>
+## 25                  pts1994.csv                           <NA>
+## 26                  pts1995.csv                           <NA>
+## 27                 sen113kh.ord                           <NA>
+## 28                SinghEJPR.tab UNF:6:iDGp9dXOl4SiR+rCBWo8Tw==
+## 29                 SinghJTP.tab UNF:6:lDCyZ7YQF5O++SRsxh2kGA==
+## 30                 stdSingh.tab UNF:6:A5gwtn5q/ewkTMpcQEQ73w==
+## 31                       UN.csv                           <NA>
+## 32                  war1800.tab UNF:6:jJ++mepKcv9JbJOOPLMf2Q==
+
+
+

+Reusing Files and Reproducing Analysis

+

To reproduce the analysis, we can simply run the code file either as a system() call or directly in R using source() (note this particular file begins with an rm() call so you may want to run it in a new enviroment):

+
+# Option 1
+system("Rscript chapter03.R")
+
+# Option 2
+source("chapter03.R", local=new.env())
+

Any well-produced set of analysis reproduction files, like this one, should run without error once the data and code are in-hand. Troubleshooting anlaysis files is beyond the scope of this vignette, but common sources are

+
    +
  1. The working directory is not set the same as the author intended. This could affect code files not finding the relative position of datasets or of other code files.
  2. +
  3. Your local machine hasn’t downloaded or installed all the necessary datasets and packages.
  4. +
  5. The functions called in the code have changed since the script was developed.
  6. +
+

To archive your own reproducible analyses using Dataverse, see the “Archiving Data” vignette.

+
+
+ + + +
+ + + +
+ +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + diff --git a/docs/articles/C-retrieval_files/accessible-code-block-0.0.1/empty-anchor.js b/docs/articles/C-retrieval_files/accessible-code-block-0.0.1/empty-anchor.js new file mode 100644 index 0000000..ca349fd --- /dev/null +++ b/docs/articles/C-retrieval_files/accessible-code-block-0.0.1/empty-anchor.js @@ -0,0 +1,15 @@ +// Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> +// v0.0.1 +// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. + +document.addEventListener('DOMContentLoaded', function() { + const codeList = document.getElementsByClassName("sourceCode"); + for (var i = 0; i < codeList.length; i++) { + var linkList = codeList[i].getElementsByTagName('a'); + for (var j = 0; j < linkList.length; j++) { + if (linkList[j].innerHTML === "") { + linkList[j].setAttribute('aria-hidden', 'true'); + } + } + } +}); diff --git a/docs/articles/C-retrieval_files/header-attrs-2.6/header-attrs.js b/docs/articles/C-retrieval_files/header-attrs-2.6/header-attrs.js new file mode 100644 index 0000000..dd57d92 --- /dev/null +++ b/docs/articles/C-retrieval_files/header-attrs-2.6/header-attrs.js @@ -0,0 +1,12 @@ +// Pandoc 2.9 adds attributes on both header and div. We remove the former (to +// be compatible with the behavior of Pandoc < 2.8). +document.addEventListener('DOMContentLoaded', function(e) { + var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); + var i, h, a; + for (i = 0; i < hs.length; i++) { + h = hs[i]; + if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 + a = h.attributes; + while (a.length > 0) h.removeAttribute(a[0].name); + } +}); diff --git a/docs/articles/D-archiving.html b/docs/articles/D-archiving.html new file mode 100644 index 0000000..4316f7c --- /dev/null +++ b/docs/articles/D-archiving.html @@ -0,0 +1,183 @@ + + + + + + + +Data Archiving • dataverse + + + + + + + + + + +
+
+ + + + +
+
+ + + + +

This vignette describes how to archive data into Dataverse directly from R.

+
+library("dataverse")
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+
+

+SWORD-based Data Archiving

+

The main data archiving (or “deposit”) workflow for Dataverse is built on SWORD v2.0. This means that to create a new dataset listing, you will have first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following:

+
+# retrieve your service document
+d <- service_document()
+
+# list current datasets in a dataverse
+list_datasets("mydataverse")
+
+# create a new dataset
+## create a list of metadata
+metadat <- list(title = "My Study",
+                creator = "Doe, John",
+                description = "An example study")
+## initiate the dataset
+dat <- initiate_sword_dataset("mydataverse", body = metadat)
+

Once the dataset is initiated, it is possible to add and delete files:

+
+tmp <- tempfile()
+write.csv(iris, file = tmp)
+f <- add_file(dat, file = tmp)
+

The add_file() function accepts, as its first argument, a character vector of file names, a data.frame, or a list of R objects. Files can be deleted using delete_file(). Once the dataset is finalized, it can be published using publish_dataset():

+ +

And it will then show up in the list of published datasets returned by list_datasets(dat).

+
+
+

+Native API

+

Dataverse also implements a second way to release datasets, called the “native” API. It is similar to to the SWORD API:

+
+# create the dataset
+ds <- create_dataset("mydataverse")
+
+# add files
+tmp <- tempfile()
+write.csv(iris, file = tmp)
+f <- add_dataset_file(file = tmp, dataset = ds)
+
+# publish dataset
+publish_dataset(ds)
+
+# dataset will now be published
+get_dataverse("mydataverse")
+
+
+ + + +
+ + + +
+ +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + diff --git a/docs/articles/D-archiving_files/accessible-code-block-0.0.1/empty-anchor.js b/docs/articles/D-archiving_files/accessible-code-block-0.0.1/empty-anchor.js new file mode 100644 index 0000000..ca349fd --- /dev/null +++ b/docs/articles/D-archiving_files/accessible-code-block-0.0.1/empty-anchor.js @@ -0,0 +1,15 @@ +// Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> +// v0.0.1 +// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. + +document.addEventListener('DOMContentLoaded', function() { + const codeList = document.getElementsByClassName("sourceCode"); + for (var i = 0; i < codeList.length; i++) { + var linkList = codeList[i].getElementsByTagName('a'); + for (var j = 0; j < linkList.length; j++) { + if (linkList[j].innerHTML === "") { + linkList[j].setAttribute('aria-hidden', 'true'); + } + } + } +}); diff --git a/docs/articles/D-archiving_files/header-attrs-2.6/header-attrs.js b/docs/articles/D-archiving_files/header-attrs-2.6/header-attrs.js new file mode 100644 index 0000000..dd57d92 --- /dev/null +++ b/docs/articles/D-archiving_files/header-attrs-2.6/header-attrs.js @@ -0,0 +1,12 @@ +// Pandoc 2.9 adds attributes on both header and div. We remove the former (to +// be compatible with the behavior of Pandoc < 2.8). +document.addEventListener('DOMContentLoaded', function(e) { + var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); + var i, h, a; + for (i = 0; i < hs.length; i++) { + h = hs[i]; + if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 + a = h.attributes; + while (a.length > 0) h.removeAttribute(a[0].name); + } +}); diff --git a/docs/articles/index.html b/docs/articles/index.html new file mode 100644 index 0000000..682428b --- /dev/null +++ b/docs/articles/index.html @@ -0,0 +1,174 @@ + + + + + + + + +Articles • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+ + +
+

Getting Started

+

These vignettes provide an introduction to Dataverse with R.

+ +
+
Introduction to Dataverse
+
+
Data Search and Discovery
+
+
Data Retrieval and Reuse
+
+
Data Archiving
+
+
+
+
+
+ + +
+ + +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + + + diff --git a/docs/authors.html b/docs/authors.html new file mode 100644 index 0000000..9ccabe4 --- /dev/null +++ b/docs/authors.html @@ -0,0 +1,200 @@ + + + + + + + + +Citation and Authors • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+ + +

Thomas J. Leeper (). dataverse: R Client for Dataverse 4. R package version 0.2.1.9002.

+
@Manual{,
+  title = {dataverse: R Client for Dataverse 4},
+  author = {Thomas J. Leeper},
+  note = {R package version 0.2.1.9002},
+}
+ + + +
    +
  • +

    Will Beasley. Author, maintainer. +

    +
  • +
  • +

    Thomas J. Leeper. Author. +

    +
  • +
  • +

    Philip Durbin. Author. +

    +
  • +
  • +

    Shiro Kuriwaki. Author. +

    +
  • +
  • +

    Sebastian Karcher. Author. +

    +
  • +
  • +

    Jan Kanis. Contributor. +

    +
  • +
+ +
+ +
+ + + +
+ + +
+

Site built with pkgdown 1.6.1.

+
+ +
+
+ + + + + + + + diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css new file mode 100644 index 0000000..5a85941 --- /dev/null +++ b/docs/bootstrap-toc.css @@ -0,0 +1,60 @@ +/*! + * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) + * Copyright 2015 Aidan Feldman + * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ + +/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ + +/* All levels of nav */ +nav[data-toggle='toc'] .nav > li > a { + display: block; + padding: 4px 20px; + font-size: 13px; + font-weight: 500; + color: #767676; +} +nav[data-toggle='toc'] .nav > li > a:hover, +nav[data-toggle='toc'] .nav > li > a:focus { + padding-left: 19px; + color: #563d7c; + text-decoration: none; + background-color: transparent; + border-left: 1px solid #563d7c; +} +nav[data-toggle='toc'] .nav > .active > a, +nav[data-toggle='toc'] .nav > .active:hover > a, +nav[data-toggle='toc'] .nav > .active:focus > a { + padding-left: 18px; + font-weight: bold; + color: #563d7c; + background-color: transparent; + border-left: 2px solid #563d7c; +} + +/* Nav: second level (shown on .active) */ +nav[data-toggle='toc'] .nav .nav { + display: none; /* Hide by default, but at >768px, show it */ + padding-bottom: 10px; +} +nav[data-toggle='toc'] .nav .nav > li > a { + padding-top: 1px; + padding-bottom: 1px; + padding-left: 30px; + font-size: 12px; + font-weight: normal; +} +nav[data-toggle='toc'] .nav .nav > li > a:hover, +nav[data-toggle='toc'] .nav .nav > li > a:focus { + padding-left: 29px; +} +nav[data-toggle='toc'] .nav .nav > .active > a, +nav[data-toggle='toc'] .nav .nav > .active:hover > a, +nav[data-toggle='toc'] .nav .nav > .active:focus > a { + padding-left: 28px; + font-weight: 500; +} + +/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ +nav[data-toggle='toc'] .nav > .active > ul { + display: block; +} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js new file mode 100644 index 0000000..1cdd573 --- /dev/null +++ b/docs/bootstrap-toc.js @@ -0,0 +1,159 @@ +/*! + * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) + * Copyright 2015 Aidan Feldman + * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ +(function() { + 'use strict'; + + window.Toc = { + helpers: { + // return all matching elements in the set, or their descendants + findOrFilter: function($el, selector) { + // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ + // http://stackoverflow.com/a/12731439/358804 + var $descendants = $el.find(selector); + return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); + }, + + generateUniqueIdBase: function(el) { + var text = $(el).text(); + var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); + return anchor || el.tagName.toLowerCase(); + }, + + generateUniqueId: function(el) { + var anchorBase = this.generateUniqueIdBase(el); + for (var i = 0; ; i++) { + var anchor = anchorBase; + if (i > 0) { + // add suffix + anchor += '-' + i; + } + // check if ID already exists + if (!document.getElementById(anchor)) { + return anchor; + } + } + }, + + generateAnchor: function(el) { + if (el.id) { + return el.id; + } else { + var anchor = this.generateUniqueId(el); + el.id = anchor; + return anchor; + } + }, + + createNavList: function() { + return $(''); + }, + + createChildNavList: function($parent) { + var $childList = this.createNavList(); + $parent.append($childList); + return $childList; + }, + + generateNavEl: function(anchor, text) { + var $a = $(''); + $a.attr('href', '#' + anchor); + $a.text(text); + var $li = $('
  • '); + $li.append($a); + return $li; + }, + + generateNavItem: function(headingEl) { + var anchor = this.generateAnchor(headingEl); + var $heading = $(headingEl); + var text = $heading.data('toc-text') || $heading.text(); + return this.generateNavEl(anchor, text); + }, + + // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). + getTopLevel: function($scope) { + for (var i = 1; i <= 6; i++) { + var $headings = this.findOrFilter($scope, 'h' + i); + if ($headings.length > 1) { + return i; + } + } + + return 1; + }, + + // returns the elements for the top level, and the next below it + getHeadings: function($scope, topLevel) { + var topSelector = 'h' + topLevel; + + var secondaryLevel = topLevel + 1; + var secondarySelector = 'h' + secondaryLevel; + + return this.findOrFilter($scope, topSelector + ',' + secondarySelector); + }, + + getNavLevel: function(el) { + return parseInt(el.tagName.charAt(1), 10); + }, + + populateNav: function($topContext, topLevel, $headings) { + var $context = $topContext; + var $prevNav; + + var helpers = this; + $headings.each(function(i, el) { + var $newNav = helpers.generateNavItem(el); + var navLevel = helpers.getNavLevel(el); + + // determine the proper $context + if (navLevel === topLevel) { + // use top level + $context = $topContext; + } else if ($prevNav && $context === $topContext) { + // create a new level of the tree and switch to it + $context = helpers.createChildNavList($prevNav); + } // else use the current $context + + $context.append($newNav); + + $prevNav = $newNav; + }); + }, + + parseOps: function(arg) { + var opts; + if (arg.jquery) { + opts = { + $nav: arg + }; + } else { + opts = arg; + } + opts.$scope = opts.$scope || $(document.body); + return opts; + } + }, + + // accepts a jQuery object, or an options object + init: function(opts) { + opts = this.helpers.parseOps(opts); + + // ensure that the data attribute is in place for styling + opts.$nav.attr('data-toggle', 'toc'); + + var $topContext = this.helpers.createChildNavList(opts.$nav); + var topLevel = this.helpers.getTopLevel(opts.$scope); + var $headings = this.helpers.getHeadings(opts.$scope, topLevel); + this.helpers.populateNav($topContext, topLevel, $headings); + } + }; + + $(function() { + $('nav[data-toggle="toc"]').each(function(i, el) { + var $nav = $(el); + Toc.init($nav); + }); + }); +})(); diff --git a/docs/docsearch.css b/docs/docsearch.css new file mode 100644 index 0000000..e5f1fe1 --- /dev/null +++ b/docs/docsearch.css @@ -0,0 +1,148 @@ +/* Docsearch -------------------------------------------------------------- */ +/* + Source: https://github.com/algolia/docsearch/ + License: MIT +*/ + +.algolia-autocomplete { + display: block; + -webkit-box-flex: 1; + -ms-flex: 1; + flex: 1 +} + +.algolia-autocomplete .ds-dropdown-menu { + width: 100%; + min-width: none; + max-width: none; + padding: .75rem 0; + background-color: #fff; + background-clip: padding-box; + border: 1px solid rgba(0, 0, 0, .1); + box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); +} + +@media (min-width:768px) { + .algolia-autocomplete .ds-dropdown-menu { + width: 175% + } +} + +.algolia-autocomplete .ds-dropdown-menu::before { + display: none +} + +.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { + padding: 0; + background-color: rgb(255,255,255); + border: 0; + max-height: 80vh; +} + +.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { + margin-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion { + padding: 0; + overflow: visible +} + +.algolia-autocomplete .algolia-docsearch-suggestion--category-header { + padding: .125rem 1rem; + margin-top: 0; + font-size: 1.3em; + font-weight: 500; + color: #00008B; + border-bottom: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { + float: none; + padding-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { + float: none; + width: auto; + padding: 0; + text-align: left +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content { + float: none; + width: auto; + padding: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content::before { + display: none +} + +.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { + padding-top: .75rem; + margin-top: .75rem; + border-top: 1px solid rgba(0, 0, 0, .1) +} + +.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { + display: block; + padding: .1rem 1rem; + margin-bottom: 0.1; + font-size: 1.0em; + font-weight: 400 + /* display: none */ +} + +.algolia-autocomplete .algolia-docsearch-suggestion--title { + display: block; + padding: .25rem 1rem; + margin-bottom: 0; + font-size: 0.9em; + font-weight: 400 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--text { + padding: 0 1rem .5rem; + margin-top: -.25rem; + font-size: 0.8em; + font-weight: 400; + line-height: 1.25 +} + +.algolia-autocomplete .algolia-docsearch-footer { + width: 110px; + height: 20px; + z-index: 3; + margin-top: 10.66667px; + float: right; + font-size: 0; + line-height: 0; +} + +.algolia-autocomplete .algolia-docsearch-footer--logo { + background-image: url("data:image/svg+xml;utf8,"); + background-repeat: no-repeat; + background-position: 50%; + background-size: 100%; + overflow: hidden; + text-indent: -9000px; + width: 100%; + height: 100%; + display: block; + transform: translate(-8px); +} + +.algolia-autocomplete .algolia-docsearch-suggestion--highlight { + color: #FF8C00; + background: rgba(232, 189, 54, 0.1) +} + + +.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { + box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) +} + +.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { + background-color: rgba(192, 192, 192, .15) +} diff --git a/docs/docsearch.js b/docs/docsearch.js new file mode 100644 index 0000000..b35504c --- /dev/null +++ b/docs/docsearch.js @@ -0,0 +1,85 @@ +$(function() { + + // register a handler to move the focus to the search bar + // upon pressing shift + "/" (i.e. "?") + $(document).on('keydown', function(e) { + if (e.shiftKey && e.keyCode == 191) { + e.preventDefault(); + $("#search-input").focus(); + } + }); + + $(document).ready(function() { + // do keyword highlighting + /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ + var mark = function() { + + var referrer = document.URL ; + var paramKey = "q" ; + + if (referrer.indexOf("?") !== -1) { + var qs = referrer.substr(referrer.indexOf('?') + 1); + var qs_noanchor = qs.split('#')[0]; + var qsa = qs_noanchor.split('&'); + var keyword = ""; + + for (var i = 0; i < qsa.length; i++) { + var currentParam = qsa[i].split('='); + + if (currentParam.length !== 2) { + continue; + } + + if (currentParam[0] == paramKey) { + keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); + } + } + + if (keyword !== "") { + $(".contents").unmark({ + done: function() { + $(".contents").mark(keyword); + } + }); + } + } + }; + + mark(); + }); +}); + +/* Search term highlighting ------------------------------*/ + +function matchedWords(hit) { + var words = []; + + var hierarchy = hit._highlightResult.hierarchy; + // loop to fetch from lvl0, lvl1, etc. + for (var idx in hierarchy) { + words = words.concat(hierarchy[idx].matchedWords); + } + + var content = hit._highlightResult.content; + if (content) { + words = words.concat(content.matchedWords); + } + + // return unique words + var words_uniq = [...new Set(words)]; + return words_uniq; +} + +function updateHitURL(hit) { + + var words = matchedWords(hit); + var url = ""; + + if (hit.anchor) { + url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; + } else { + url = hit.url + '?q=' + escape(words.join(" ")); + } + + return url; +} diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..2559d3b --- /dev/null +++ b/docs/index.html @@ -0,0 +1,358 @@ + + + + + + + +Client for Dataverse 4 Repositories • dataverse + + + + + + + + + + +
    +
    + + + + +
    +
    +
    + + +

    Dataverse Project logo

    +

    The dataverse package provides access to Dataverse 4 APIs, enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. dataverse is the next-generation iteration of the dvn package, which works with Dataverse 3 (“Dataverse Network”) applications. dataverse includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) sword package for data deposit and the UNF package for data fingerprinting.

    +
    +

    +Getting Started

    +

    You can find a stable 2017 release on CRAN, or install the latest development version from GitHub:

    + +
    +

    +Keys

    +

    Some features of the Dataverse 4 API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. But, other features require a Dataverse account for the specific server installation of the Dataverse software, and an API key linked to that account. Instructions for obtaining an account and setting up an API key are available in the Dataverse User Guide. (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called DATAVERSE_KEY. It can be set within R using:

    +
    +Sys.setenv("DATAVERSE_KEY" = "examplekey12345")
    +
    +
    +

    +Server

    +

    Because there are many Dataverse installations, all functions in the R client require specifying what server installation you are interacting with. This can be set by default with an environment variable, DATAVERSE_SERVER. This should be the Dataverse server, without the “https” prefix or the “/api” URL path, etc. For example, the Harvard Dataverse can be used by setting:

    +
    +Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
    +

    Note: The package attempts to compensate for any malformed values, though.

    +

    Currently, the package wraps the data management features of the Dataverse API. Functions for other API features - related to user management and permissions - are not currently exported in the package (but are drafted in the source code).

    +
    +
    +
    +

    +Data and Metadata Retrieval

    +

    The dataverse package provides multiple interfaces to obtain data into R. Users can supply a file DOI, a dataset DOI combined with a filename, or a dataverse object. They can read in the file as a raw binary or a dataset read in with the appropriate R function.

    +
    +

    +Reading data as R objects

    +

    Use the get_dataframe_*() functions, depending on the input you have. For example, we will read a survey dataset on Dataverse, nlsw88.dta (doi:10.70122/FK2/PPKHI1/ZYATZZ), originally in Stata dta form.

    +

    With a file DOI, we can use the get_dataframe_by_doi function:

    +
    +nlsw <-
    +  get_dataframe_by_doi(
    +    filedoi     = "10.70122/FK2/PPIAXE/MHDB0O",
    +    server      = "demo.dataverse.org"
    +  )
    +
    ## Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.
    +
    +##
    +## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────
    +## cols(
    +##   idcode = col_double(),
    +##   age = col_double(),
    +##   race = col_double(),
    +##   married = col_double(),
    +##   never_married = col_double(),
    +##   grade = col_double(),
    +##   collgrad = col_double(),
    +##   south = col_double(),
    +##   smsa = col_double(),
    +##   c_city = col_double(),
    +##   industry = col_double(),
    +##   occupation = col_double(),
    +##   union = col_double(),
    +##   wage = col_double(),
    +##   hours = col_double(),
    +##   ttl_exp = col_double(),
    +##   tenure = col_double()
    +## )
    +

    which by default reads in the ingested file (not the original dta) by the readr::read_tsv function.

    +

    Alternatively, we can download the same file by specifying the filename and the DOI of the “dataset” (in Dataverse, a collection of files is called a dataset).

    +
    +nlsw_tsv <-
    +  get_dataframe_by_name(
    +    filename  = "nlsw88.tab",
    +    dataset   = "10.70122/FK2/PPIAXE",
    +    server    = "demo.dataverse.org"
    +  )
    +

    Now, Dataverse often translates rectangular data into an ingested, or “archival” version, which is application-neutral and easily-readable. read_dataframe_*() defaults to taking this ingested version rather than using the original, through the argument original = FALSE.

    +

    This default is safe because you may not have the proprietary software that was originally used. On the other hand, the data may have lost information in the process of the ingestation.

    +

    Instead, to read the same file but its original version, specify original = TRUE and set an .f argument. In this case, we know that nlsw88.tab is a Stata .dta dataset, so we will use the haven::read_dta function.

    +
    +nlsw_original <-
    +  get_dataframe_by_name(
    +    filename    = "nlsw88.tab",
    +    dataset     = "10.70122/FK2/PPIAXE",
    +    .f          = haven::read_dta,
    +    original    = TRUE,
    +    server      = "demo.dataverse.org"
    +  )
    +

    Note that even though the file prefix is “.tab”, we use read_dta.

    +

    Of course, when the dataset is not ingested (such as a Rds file), users would always need to specify an .f argument for the specific file.

    +

    Note the difference between nls_tsv and nls_original. nls_original preserves the data attributes like value labels, whereas nls_tsv has dropped this or left this in file metadata.

    +
    +class(nlsw_tsv$race) # tab ingested version only has numeric data
    +
    ## [1] "numeric"
    +
    +attr(nlsw_original$race, "labels") # original dta has value labels
    +
    ## white black other
    +##     1     2     3
    +
    +
    +

    +Reading a dataset as a binary file.

    +

    In some cases, you may not want to read in the data in your environment, perhaps because that is not possible (e.g. for a .docx file), and you want to simply write these files your local disk. To do this, use the more primitive get_file_* commands. The arguments are equivalent, except we no longer need an .f argument

    +
    +nlsw_raw <-
    +  get_file_by_name(
    +    filename    = "nlsw88.tab",
    +    dataset     = "10.70122/FK2/PPIAXE",
    +    server      = "demo.dataverse.org"
    +  )
    +class(nlsw_raw)
    +
    ## [1] "raw"
    +
    +
    +

    +Reading file metadata

    +

    The function get_file_metadata() can also be used similarly. This will return a metadata format for ingested tabular files in the ddi format. The function get_dataset() will retrieve the list of files in a dataset.

    +
    +get_dataset(
    +  dataset = "10.70122/FK2/PPIAXE",
    +  server  = "demo.dataverse.org"
    +)
    +
    ## Dataset (182162):
    +## Version: 1.1, RELEASED
    +## Release Date: 2020-12-30T00:00:24Z
    +## License: CC0
    +## 22 Files:
    +##                   label version      id               contentType
    +## 1 nlsw88_rds-export.rds       1 1734016  application/octet-stream
    +## 2            nlsw88.tab       3 1734017 text/tab-separated-values
    +
    +
    +
    +

    +Data Discovery

    +

    Dataverse supplies a robust search API to discover Dataverses, datasets, and files. The simplest searches simply consist of a query string:

    +
    +dataverse_search("Gary King")
    +

    More complicated searches might specify metadata fields:

    +
    +dataverse_search(author = "Gary King", title = "Ecological Inference")
    +

    And searches can be restricted to specific types of objects (Dataverse, dataset, or file):

    +
    +dataverse_search(author = "Gary King", type = "dataset")
    +

    The results are paginated using per_page argument. To retrieve subsequent pages, specify start.

    +
    +
    +

    +Data Archiving

    +

    Dataverse provides two - basically unrelated - workflows for managing (adding, documenting, and publishing) datasets. The first is built on SWORD v2.0. This means that to create a new dataset listing, you will have first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following:

    +
    +# retrieve your service document
    +d <- service_document()
    +
    +# create a list of metadata
    +metadat <-
    +  list(
    +    title       = "My Study",
    +    creator     = "Doe, John",
    +    description = "An example study"
    +  )
    +
    +# create the dataset
    +ds <- initiate_sword_dataset("mydataverse", body = metadat)
    +
    +# add files to dataset
    +tmp <- tempfile()
    +write.csv(iris, file = tmp)
    +f <- add_file(ds, file = tmp)
    +
    +# publish new dataset
    +publish_sword_dataset(ds)
    +
    +# dataset will now be published
    +list_datasets("mydataverse")
    +

    The second workflow is called the “native” API and is similar but uses slightly different functions:

    +
    +# create the dataset
    +ds <- create_dataset("mydataverse")
    +
    +# add files
    +tmp <- tempfile()
    +write.csv(iris, file = tmp)
    +f <- add_dataset_file(file = tmp, dataset = ds)
    +
    +# publish dataset
    +publish_dataset(ds)
    +
    +# dataset will now be published
    +get_dataverse("mydataverse")
    +

    Through the native API it is possible to update a dataset by modifying its metadata with update_dataset() or file contents using update_dataset_file() and then republish a new version using publish_dataset().

    +
    +
    +

    +Other Installations

    +

    Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik’s OAIHarvester and Scott Chamberlain’s oai, which offer metadata download from any web repository that is compliant with the Open Archives Initiative standards. Additionally, rdryad uses OAIHarvester to interface with Dryad. The rfigshare package works in a similar spirit to dataverse with https://figshare.com/.

    +
    +
    +
    + + +
    + + +
    + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + diff --git a/docs/link.svg b/docs/link.svg new file mode 100644 index 0000000..88ad827 --- /dev/null +++ b/docs/link.svg @@ -0,0 +1,12 @@ + + + + + + diff --git a/docs/news/index.html b/docs/news/index.html new file mode 100644 index 0000000..d9988b7 --- /dev/null +++ b/docs/news/index.html @@ -0,0 +1,167 @@ + + + + + + + + +Changelog • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    + + + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/pkgdown.css b/docs/pkgdown.css new file mode 100644 index 0000000..1273238 --- /dev/null +++ b/docs/pkgdown.css @@ -0,0 +1,367 @@ +/* Sticky footer */ + +/** + * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ + * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css + * + * .Site -> body > .container + * .Site-content -> body > .container .row + * .footer -> footer + * + * Key idea seems to be to ensure that .container and __all its parents__ + * have height set to 100% + * + */ + +html, body { + height: 100%; +} + +body { + position: relative; +} + +body > .container { + display: flex; + height: 100%; + flex-direction: column; +} + +body > .container .row { + flex: 1 0 auto; +} + +footer { + margin-top: 45px; + padding: 35px 0 36px; + border-top: 1px solid #e5e5e5; + color: #666; + display: flex; + flex-shrink: 0; +} +footer p { + margin-bottom: 0; +} +footer div { + flex: 1; +} +footer .pkgdown { + text-align: right; +} +footer p { + margin-bottom: 0; +} + +img.icon { + float: right; +} + +img { + max-width: 100%; +} + +/* Fix bug in bootstrap (only seen in firefox) */ +summary { + display: list-item; +} + +/* Typographic tweaking ---------------------------------*/ + +.contents .page-header { + margin-top: calc(-60px + 1em); +} + +dd { + margin-left: 3em; +} + +/* Section anchors ---------------------------------*/ + +a.anchor { + margin-left: -30px; + display:inline-block; + width: 30px; + height: 30px; + visibility: hidden; + + background-image: url(./link.svg); + background-repeat: no-repeat; + background-size: 20px 20px; + background-position: center center; +} + +.hasAnchor:hover a.anchor { + visibility: visible; +} + +@media (max-width: 767px) { + .hasAnchor:hover a.anchor { + visibility: hidden; + } +} + + +/* Fixes for fixed navbar --------------------------*/ + +.contents h1, .contents h2, .contents h3, .contents h4 { + padding-top: 60px; + margin-top: -40px; +} + +/* Navbar submenu --------------------------*/ + +.dropdown-submenu { + position: relative; +} + +.dropdown-submenu>.dropdown-menu { + top: 0; + left: 100%; + margin-top: -6px; + margin-left: -1px; + border-radius: 0 6px 6px 6px; +} + +.dropdown-submenu:hover>.dropdown-menu { + display: block; +} + +.dropdown-submenu>a:after { + display: block; + content: " "; + float: right; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; + border-width: 5px 0 5px 5px; + border-left-color: #cccccc; + margin-top: 5px; + margin-right: -10px; +} + +.dropdown-submenu:hover>a:after { + border-left-color: #ffffff; +} + +.dropdown-submenu.pull-left { + float: none; +} + +.dropdown-submenu.pull-left>.dropdown-menu { + left: -100%; + margin-left: 10px; + border-radius: 6px 0 6px 6px; +} + +/* Sidebar --------------------------*/ + +#pkgdown-sidebar { + margin-top: 30px; + position: -webkit-sticky; + position: sticky; + top: 70px; +} + +#pkgdown-sidebar h2 { + font-size: 1.5em; + margin-top: 1em; +} + +#pkgdown-sidebar h2:first-child { + margin-top: 0; +} + +#pkgdown-sidebar .list-unstyled li { + margin-bottom: 0.5em; +} + +/* bootstrap-toc tweaks ------------------------------------------------------*/ + +/* All levels of nav */ + +nav[data-toggle='toc'] .nav > li > a { + padding: 4px 20px 4px 6px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; +} + +nav[data-toggle='toc'] .nav > li > a:hover, +nav[data-toggle='toc'] .nav > li > a:focus { + padding-left: 5px; + color: inherit; + border-left: 1px solid #878787; +} + +nav[data-toggle='toc'] .nav > .active > a, +nav[data-toggle='toc'] .nav > .active:hover > a, +nav[data-toggle='toc'] .nav > .active:focus > a { + padding-left: 5px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; + border-left: 2px solid #878787; +} + +/* Nav: second level (shown on .active) */ + +nav[data-toggle='toc'] .nav .nav { + display: none; /* Hide by default, but at >768px, show it */ + padding-bottom: 10px; +} + +nav[data-toggle='toc'] .nav .nav > li > a { + padding-left: 16px; + font-size: 1.35rem; +} + +nav[data-toggle='toc'] .nav .nav > li > a:hover, +nav[data-toggle='toc'] .nav .nav > li > a:focus { + padding-left: 15px; +} + +nav[data-toggle='toc'] .nav .nav > .active > a, +nav[data-toggle='toc'] .nav .nav > .active:hover > a, +nav[data-toggle='toc'] .nav .nav > .active:focus > a { + padding-left: 15px; + font-weight: 500; + font-size: 1.35rem; +} + +/* orcid ------------------------------------------------------------------- */ + +.orcid { + font-size: 16px; + color: #A6CE39; + /* margins are required by official ORCID trademark and display guidelines */ + margin-left:4px; + margin-right:4px; + vertical-align: middle; +} + +/* Reference index & topics ----------------------------------------------- */ + +.ref-index th {font-weight: normal;} + +.ref-index td {vertical-align: top; min-width: 100px} +.ref-index .icon {width: 40px;} +.ref-index .alias {width: 40%;} +.ref-index-icons .alias {width: calc(40% - 40px);} +.ref-index .title {width: 60%;} + +.ref-arguments th {text-align: right; padding-right: 10px;} +.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} +.ref-arguments .name {width: 20%;} +.ref-arguments .desc {width: 80%;} + +/* Nice scrolling for wide elements --------------------------------------- */ + +table { + display: block; + overflow: auto; +} + +/* Syntax highlighting ---------------------------------------------------- */ + +pre { + word-wrap: normal; + word-break: normal; + border: 1px solid #eee; +} + +pre, code { + background-color: #f8f8f8; + color: #333; +} + +pre code { + overflow: auto; + word-wrap: normal; + white-space: pre; +} + +pre .img { + margin: 5px 0; +} + +pre .img img { + background-color: #fff; + display: block; + height: auto; +} + +code a, pre a { + color: #375f84; +} + +a.sourceLine:hover { + text-decoration: none; +} + +.fl {color: #1514b5;} +.fu {color: #000000;} /* function */ +.ch,.st {color: #036a07;} /* string */ +.kw {color: #264D66;} /* keyword */ +.co {color: #888888;} /* comment */ + +.message { color: black; font-weight: bolder;} +.error { color: orange; font-weight: bolder;} +.warning { color: #6A0366; font-weight: bolder;} + +/* Clipboard --------------------------*/ + +.hasCopyButton { + position: relative; +} + +.btn-copy-ex { + position: absolute; + right: 0; + top: 0; + visibility: hidden; +} + +.hasCopyButton:hover button.btn-copy-ex { + visibility: visible; +} + +/* headroom.js ------------------------ */ + +.headroom { + will-change: transform; + transition: transform 200ms linear; +} +.headroom--pinned { + transform: translateY(0%); +} +.headroom--unpinned { + transform: translateY(-100%); +} + +/* mark.js ----------------------------*/ + +mark { + background-color: rgba(255, 255, 51, 0.5); + border-bottom: 2px solid rgba(255, 153, 51, 0.3); + padding: 1px; +} + +/* vertical spacing after htmlwidgets */ +.html-widget { + margin-bottom: 10px; +} + +/* fontawesome ------------------------ */ + +.fab { + font-family: "Font Awesome 5 Brands" !important; +} + +/* don't display links in code chunks when printing */ +/* source: https://stackoverflow.com/a/10781533 */ +@media print { + code a:link:after, code a:visited:after { + content: ""; + } +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js new file mode 100644 index 0000000..7e7048f --- /dev/null +++ b/docs/pkgdown.js @@ -0,0 +1,108 @@ +/* http://gregfranko.com/blog/jquery-best-practices/ */ +(function($) { + $(function() { + + $('.navbar-fixed-top').headroom(); + + $('body').css('padding-top', $('.navbar').height() + 10); + $(window).resize(function(){ + $('body').css('padding-top', $('.navbar').height() + 10); + }); + + $('[data-toggle="tooltip"]').tooltip(); + + var cur_path = paths(location.pathname); + var links = $("#navbar ul li a"); + var max_length = -1; + var pos = -1; + for (var i = 0; i < links.length; i++) { + if (links[i].getAttribute("href") === "#") + continue; + // Ignore external links + if (links[i].host !== location.host) + continue; + + var nav_path = paths(links[i].pathname); + + var length = prefix_length(nav_path, cur_path); + if (length > max_length) { + max_length = length; + pos = i; + } + } + + // Add class to parent
  • , and enclosing
  • if in dropdown + if (pos >= 0) { + var menu_anchor = $(links[pos]); + menu_anchor.parent().addClass("active"); + menu_anchor.closest("li.dropdown").addClass("active"); + } + }); + + function paths(pathname) { + var pieces = pathname.split("/"); + pieces.shift(); // always starts with / + + var end = pieces[pieces.length - 1]; + if (end === "index.html" || end === "") + pieces.pop(); + return(pieces); + } + + // Returns -1 if not found + function prefix_length(needle, haystack) { + if (needle.length > haystack.length) + return(-1); + + // Special case for length-0 haystack, since for loop won't run + if (haystack.length === 0) { + return(needle.length === 0 ? 0 : -1); + } + + for (var i = 0; i < haystack.length; i++) { + if (needle[i] != haystack[i]) + return(i); + } + + return(haystack.length); + } + + /* Clipboard --------------------------*/ + + function changeTooltipMessage(element, msg) { + var tooltipOriginalTitle=element.getAttribute('data-original-title'); + element.setAttribute('data-original-title', msg); + $(element).tooltip('show'); + element.setAttribute('data-original-title', tooltipOriginalTitle); + } + + if(ClipboardJS.isSupported()) { + $(document).ready(function() { + var copyButton = ""; + + $(".examples, div.sourceCode").addClass("hasCopyButton"); + + // Insert copy buttons: + $(copyButton).prependTo(".hasCopyButton"); + + // Initialize tooltips: + $('.btn-copy-ex').tooltip({container: 'body'}); + + // Initialize clipboard: + var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { + text: function(trigger) { + return trigger.parentNode.textContent; + } + }); + + clipboardBtnCopies.on('success', function(e) { + changeTooltipMessage(e.trigger, 'Copied!'); + e.clearSelection(); + }); + + clipboardBtnCopies.on('error', function() { + changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); + }); + }); + } +})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml new file mode 100644 index 0000000..e7f6cff --- /dev/null +++ b/docs/pkgdown.yml @@ -0,0 +1,13 @@ +pandoc: 2.9.2.1 +pkgdown: 1.6.1 +pkgdown_sha: ~ +articles: + A-introduction: A-introduction.html + B-search: B-search.html + C-retrieval: C-retrieval.html + D-archiving: D-archiving.html +last_built: 2021-01-17T17:13Z +urls: + reference: https://IQSS.github.io/dataverse-client-r/reference + article: https://IQSS.github.io/dataverse-client-r/articles + diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png new file mode 100644 index 0000000..17a3580 Binary files /dev/null and b/docs/reference/Rplot001.png differ diff --git a/docs/reference/add_dataset_file.html b/docs/reference/add_dataset_file.html new file mode 100644 index 0000000..7400029 --- /dev/null +++ b/docs/reference/add_dataset_file.html @@ -0,0 +1,271 @@ + + + + + + + + +Add or update a file in a dataset — add_dataset_file • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Add or update a file in a dataset

    +
    + +
    add_dataset_file(
    +  file,
    +  dataset,
    +  description = NULL,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    +
    +update_dataset_file(
    +  file,
    +  dataset = NULL,
    +  id,
    +  description = NULL,
    +  force = TRUE,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    file

    A character string

    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    description

    Optionally, a character string providing a description of the file.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    id

    An integer specifying a file identifier; or, if doi is specified, a character string specifying a file name within the DOI-identified dataset; or an object of class “dataverse_file” as returned by dataset_files.

    force

    A logical indicating whether to force the update even if the file types differ. Default is TRUE.

    + +

    Value

    + +

    add_dataset_file returns the new file ID.

    +

    Details

    + +

    From Dataverse v4.6.1, the “native” API provides endpoints to add and update files without going through the SWORD workflow. To use SWORD instead, see add_file. add_dataset_file adds a new file to a specified dataset.

    +

    update_dataset_file can be used to replace/update a published file. Note that it only works on published files, so unpublished drafts cannot be updated - the dataset must first either be published (publish_dataset) or deleted (delete_dataset).

    +

    See also

    + + + +

    Examples

    +
    if (FALSE) { +meta <- list() +ds <- create_dataset("mydataverse", body = meta) + +saveRDS(mtcars, tmp <- tempfile(fileext = ".rds")) +f <- add_dataset_file(tmp, dataset = ds, description = "mtcars") + +# publish dataset +publish_dataset(ds) + +# update file and republish +saveRDS(iris, tmp) +update_dataset_file(tmp, dataset = ds, id = f, + description = "Actually iris") +publish_dataset(ds) + +# cleanup +unlink(tmp) +delete_dataset(ds) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/add_file.html b/docs/reference/add_file.html new file mode 100644 index 0000000..cd64134 --- /dev/null +++ b/docs/reference/add_file.html @@ -0,0 +1,248 @@ + + + + + + + + +Add file (SWORD) — add_file • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Add one or more files to a SWORD (possibly unpublished) dataset

    +
    + +
    add_file(
    +  dataset,
    +  file,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + +
    dataset

    A dataset DOI (or other persistent identifier), an object of class “dataset_atom” or “dataset_statement”, or an appropriate and complete SWORD URL.

    file

    A character vector of file names, a data.frame, or a list of R objects.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    An object of class “dataset_atom”.

    +

    Details

    + +

    This function is used to add files to a dataset. It is part of the SWORD API, which is used to upload data to a Dataverse server. This means this can be used to view unpublished Dataverses and Datasets.

    +

    As of Dataverse v4.6.1, the “native” API also provides endpoints to add and update files without going through the SWORD workflow. This functionality is provided by add_dataset_file and update_dataset_file.

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, delete_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +

    Examples

    +
    if (FALSE) { +# retrieve your service document +d <- service_document() + +# create a list of metadata +metadat <- list(title = "My Study", + creator = "Doe, John", + description = "An example study") + +# create the dataset +dat <- initiate_sword_dataset("mydataverse", body = metadat) + +# add files to dataset +tmp <- tempfile() +write.csv(iris, file = tmp) +f <- add_file(dat, file = tmp) + +# publish dataset +publish_dataset(dat) + +# delete a dataset +delete_dataset(dat) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/create_dataset.html b/docs/reference/create_dataset.html new file mode 100644 index 0000000..2ffe7e9 --- /dev/null +++ b/docs/reference/create_dataset.html @@ -0,0 +1,249 @@ + + + + + + + + +Create or update a dataset — create_dataset • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Create or update dataset within a Dataverse

    +
    + +
    create_dataset(
    +  dataverse,
    +  body,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    +
    +update_dataset(
    +  dataset,
    +  body,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    dataverse

    A character string specifying a Dataverse name or an object of class “dataverse”.

    body

    A list describing the dataset.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    + +

    Value

    + +

    An object of class “dataverse_dataset”.

    +

    Details

    + +

    create_dataset creates a Dataverse dataset. In Dataverse, a “dataset” is the lowest-level structure in which to organize files. For example, a Dataverse dataset might contain the files used to reproduce a published article, including data, analysis code, and related materials. Datasets can be organized into “Dataverse” objects, which can be further nested within other Dataverses. For someone creating an archive, this would be the first step to producing said archive (after creating a Dataverse, if one does not already exist). Once files and metadata have been added, the dataset can be publised (i.e., made public) using publish_dataset.

    +

    update_dataset updates a Dataverse dataset that has already been created using create_dataset. This creates a draft version of the dataset or modifies the current draft if one is already in-progress. It does not assign a new version number to the dataset nor does it make it publicly visible (which can be done with publish_dataset).

    +

    See also

    + + + +

    Examples

    +
    if (FALSE) { +meta <- list() +ds <- create_dataset("mydataverse", body = meta) + +meta2 <- list() +update_dataset(ds, body = meta2) + +# cleanup +delete_dataset(ds) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/create_dataverse.html b/docs/reference/create_dataverse.html new file mode 100644 index 0000000..5d5b83e --- /dev/null +++ b/docs/reference/create_dataverse.html @@ -0,0 +1,226 @@ + + + + + + + + +Create Dataverse — create_dataverse • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Create a new Dataverse

    +
    + +
    create_dataverse(
    +  dataverse,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataverse

    A character string specifying a Dataverse name or an object of class “dataverse”. If missing, a top-level Dataverse is created.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list.

    +

    Details

    + +

    This function can create a new Dataverse. In the language of Dataverse, a user has a “root” Dataverse into which they can create further nested Dataverses and/or “datasets” that contain, for example, a set of files for a specific project. Creating a new Dataverse can therefore be a useful way to organize other related Dataverses or sets of related datasets.

    +

    For example, if one were involved in an ongoing project that generated monthly data. One may want to store each month's data and related files in a separate “dataset”, so that each has its own persistent identifier (e.g., DOI), but keep all of these datasets within a named Dataverse so that the project's files are kept separate the user's personal Dataverse records. The flexible nesting of Dataverses allows for a number of possible organizational approaches.

    +

    See also

    + +

    To manage Dataverses: delete_dataverse, publish_dataverse, dataverse_contents; to get datasets: get_dataset; to search for Dataverses, datasets, or files: dataverse_search

    + +

    Examples

    +
    if (FALSE) { +(dv <- create_dataverse("mydataverse")) + +# cleanup +delete_dataverse("mydataverse") +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/dataset_atom.html b/docs/reference/dataset_atom.html new file mode 100644 index 0000000..3936166 --- /dev/null +++ b/docs/reference/dataset_atom.html @@ -0,0 +1,236 @@ + + + + + + + + +View dataset (SWORD) — dataset_atom • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    View a SWORD (possibly unpublished) dataset “statement”

    +
    + +
    dataset_atom(
    +  dataset,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    +
    +dataset_statement(
    +  dataset,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataset

    A dataset DOI (or other persistent identifier), an object of class “dataset_atom” or “dataset_statement”, or an appropriate and complete SWORD URL.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list. For dataset_atom, an object of class “dataset_atom”.

    +

    Details

    + +

    These functions are used to view a dataset by its persistent identifier. dataset_statement will contain information about the contents of the dataset, whereas dataset_atom contains “metadata” relevant to the SWORD API.

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, delete_sword_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +

    Examples

    +
    if (FALSE) { +# retrieve your service document +d <- service_document() + +# retrieve dataset statement (list contents) +dataset_statement(d[[2]]) + +# retrieve dataset atom +dataset_atom(d[[2]]) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/dataset_versions.html b/docs/reference/dataset_versions.html new file mode 100644 index 0000000..67c54e1 --- /dev/null +++ b/docs/reference/dataset_versions.html @@ -0,0 +1,230 @@ + + + + + + + + +Dataset versions — dataset_versions • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    View versions of a dataset

    +
    + +
    dataset_versions(
    +  dataset,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list of class “dataverse_dataset_version”.

    +

    Details

    + +

    This returns a list of objects of all versions of a dataset, including metadata. This can be used as a first step for retrieving older versions of files or datasets.

    +

    See also

    + + + +

    Examples

    +
    if (FALSE) { +# download file from: +# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ARKOTI +monogan <- get_dataverse("monogan") +monogan_data <- dataverse_contents(monogan) +d1 <- get_dataset(monogan_data[[1]]) +dataset_versions(d1) +dataset_files(d1) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/dataverse.html b/docs/reference/dataverse.html new file mode 100644 index 0000000..841223f --- /dev/null +++ b/docs/reference/dataverse.html @@ -0,0 +1,192 @@ + + + + + + + + +Client for Dataverse 4 Repositories — dataverse • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Provides access to Dataverse 4 APIs, enabling data search, retrieval, and deposit.

    +
    + + + +

    Details

    + +

    Dataverse is open-source data repository management software developed by the Institute for Quantitative Social Science at Harvard University. This package provides an R interface to Dataverse version 4 repositories, including the principal Dataverse hosted at Harvard (https://dataverse.harvard.edu/). Users can use the package to search for data stored in a Dataverse repository, retrieve data and other files, and also use the package to directly create and archive their own research data and software.

    +

    A Dataverse is structured as a nested set of “dataverse” repositories, such that a single dataverse can contain “datasets” (a set of code files, data files, etc.) or other dataverses. Thus, users may want to search for dataverses (sets of dataverses and datasets), datasets (sets of files), or individual files, and retrieve those objects accordingly. To retrieve a given file, a user typically needs to know what dataset it is stored in. All datasets are identified by a persistent identifier (such as an DOI or Handle, depending on the age of the dataset and what Dataverse repository it is hosted in).

    +

    This package provides five main sets of functions to interact with Dataverse:

    + + +

    References

    + +

    Dataverse API Documentation

    +

    Dataverse Homepage

    +

    Harvard IQSS Dataverse

    + +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/dataverse_metadata.html b/docs/reference/dataverse_metadata.html new file mode 100644 index 0000000..9d88281 --- /dev/null +++ b/docs/reference/dataverse_metadata.html @@ -0,0 +1,226 @@ + + + + + + + + +Dataverse metadata — dataverse_metadata • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Get metadata for a named Dataverse.

    +
    + +
    dataverse_metadata(
    +  dataverse,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataverse

    A character string specifying a Dataverse name or an object of class “dataverse”.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list

    +

    Details

    + +

    This function returns a list of metadata for a named Dataverse. Use dataverse_contents to list Dataverses and/or datasets contained within a Dataverse or use dataset_metadata to get metadata for a specific dataset.

    +

    See also

    + + + +

    Examples

    +
    if (FALSE) { +# download file from: +# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ARKOTI +monogan <- get_dataverse("monogan") +monogan_data <- dataverse_contents(monogan) +dataverse_metadata(monogan) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/dataverse_search.html b/docs/reference/dataverse_search.html new file mode 100644 index 0000000..943ee2a --- /dev/null +++ b/docs/reference/dataverse_search.html @@ -0,0 +1,282 @@ + + + + + + + + +Search Dataverse server — dataverse_search • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Search for Dataverses and datasets

    +
    + +
    dataverse_search(
    +  ...,
    +  type = c("dataverse", "dataset", "file"),
    +  subtree = NULL,
    +  sort = c("name", "date"),
    +  order = c("asc", "desc"),
    +  per_page = 10,
    +  start = NULL,
    +  show_relevance = FALSE,
    +  show_facets = FALSE,
    +  fq = NULL,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  verbose = TRUE,
    +  http_opts = NULL
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ...

    A length-one character vector specifying a search query, a named character vector of search arguments, or a sequence of named character arguments. The specific fields available may vary by server installation.

    type

    A character vector specifying one or more of “dataverse”, “dataset”, and “file”, which is used to restrict the search results. By default, all three types of objects are searched for.

    subtree

    Currently ignored.

    sort

    A character vector specifying whether to sort results by “name” or “date”.

    order

    A character vector specifying either “asc” or “desc” results order.

    per_page

    An integer specifying the page size of results.

    start

    An integer specifying used for pagination.

    show_relevance

    A logical indicating whether or not to show details of which fields were matched by the query

    show_facets

    A logical indicating whether or not to show facets that can be operated on by the fq parameter

    fq

    See API documentation.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    verbose

    A logical indicating whether to display information about the search query (default is TRUE).

    http_opts

    Currently ignored.

    dataverse

    A character string specifying a Dataverse name or an object of class “dataverse”.

    + +

    Value

    + +

    A list.

    +

    Details

    + +

    This function provides an interface for searching for Dataverses, datasets, and/or files within a Dataverse server.

    +

    See also

    + + + +

    Examples

    +
    if (FALSE) { +# simple string search +dataverse_search("Gary King") + +# search using named arguments +dataverse_search(c(author = "Gary King", title = "Ecological Inference")) +dataverse_search(author = "Gary King", title = "Ecological Inference") + +# search only for datasets +dataverse_search(author = "Gary King", type = "dataset") +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/delete_dataset.html b/docs/reference/delete_dataset.html new file mode 100644 index 0000000..5c726c7 --- /dev/null +++ b/docs/reference/delete_dataset.html @@ -0,0 +1,226 @@ + + + + + + + + +Delete draft dataset — delete_dataset • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Delete a dataset draft

    +
    + +
    delete_dataset(
    +  dataset,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A logical.

    +

    Details

    + +

    This function can be used to delete a draft (unpublished) Dataverse dataset. Once published, a dataset cannot be deleted. An existing draft can instead be modified using update_dataset.

    +

    See also

    + + + +

    Examples

    +
    if (FALSE) { +meta <- list() +ds <- create_dataset("mydataverse", body = meta) +delete_dataset(ds) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/delete_dataverse.html b/docs/reference/delete_dataverse.html new file mode 100644 index 0000000..f8ebedf --- /dev/null +++ b/docs/reference/delete_dataverse.html @@ -0,0 +1,223 @@ + + + + + + + + +Delete Dataverse — delete_dataverse • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Delete a dataverse

    +
    + +
    delete_dataverse(
    +  dataverse,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataverse

    A character string specifying a Dataverse name or an object of class “dataverse”.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A logical.

    +

    Details

    + +

    This function deletes a Dataverse.

    +

    See also

    + +

    To manage Dataverses: create_dataverse, publish_dataverse, dataverse_contents; to get datasets: get_dataset; to search for Dataverses, datasets, or files: dataverse_search

    + +

    Examples

    +
    if (FALSE) { +dv <- create_dataverse("mydataverse") +delete_dataverse(dv) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/delete_file.html b/docs/reference/delete_file.html new file mode 100644 index 0000000..580e178 --- /dev/null +++ b/docs/reference/delete_file.html @@ -0,0 +1,243 @@ + + + + + + + + +Delete file (SWORD) — delete_file • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Delete a file from a SWORD (possibly unpublished) dataset

    +
    + +
    delete_file(
    +  id,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    id

    A file ID, possibly returned by add_file, or a complete “edit-media/file” URL.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    If successful, a logical TRUE, else possibly some information.

    +

    Details

    + +

    This function is used to delete a file from a dataset by its file ID. It is part of the SWORD API, which is used to upload data to a Dataverse server.

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, delete_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +

    Examples

    +
    if (FALSE) { +# retrieve your service document +d <- service_document() + +# create a list of metadata +metadat <- list(title = "My Study", + creator = "Doe, John", + description = "An example study") + +# create the dataset +dat <- initiate_sword_dataset("mydataverse", body = metadat) + +# add files to dataset +tmp <- tempfile() +write.csv(iris, file = tmp) +f <- add_file(dat, file = tmp) + +# delete a file +ds <- dataset_statement(dat) +delete_file(ds$files[[1]]$id) + +# delete a dataset +delete_dataset(dat) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/delete_sword_dataset.html b/docs/reference/delete_sword_dataset.html new file mode 100644 index 0000000..4b81c64 --- /dev/null +++ b/docs/reference/delete_sword_dataset.html @@ -0,0 +1,234 @@ + + + + + + + + +Delete dataset (SWORD) — delete_sword_dataset • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Delete a SWORD (possibly unpublished) dataset

    +
    + +
    delete_sword_dataset(
    +  dataset,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataset

    A dataset DOI (or other persistent identifier).

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    If successful, a logical TRUE, else possibly some information.

    +

    Details

    + +

    This function is used to delete a dataset by its persistent identifier. It is part of the SWORD API, which is used to upload data to a Dataverse server.

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +

    Examples

    +
    if (FALSE) { +# retrieve your service document +d <- service_document() + +# create a list of metadata +metadat <- list(title = "My Study", + creator = "Doe, John", + description = "An example study") + +# create the dataset in first dataverse +dat <- initiate_sword_dataset(d[[2]], body = metadat) + +# delete a dataset +delete_dataset(dat) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/files.html b/docs/reference/files.html new file mode 100644 index 0000000..add2fbe --- /dev/null +++ b/docs/reference/files.html @@ -0,0 +1,359 @@ + + + + + + + + +Download File — get_file • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Download Dataverse File(s). get_file is a general wrapper, +and can take either dataverse objects, file IDs, or a filename and dataverse. +get_file_by_name is a shorthand for running get_file by +specifying a file name (filename) and dataset (dataset). +get_file_by_doi obtains a file by its file DOI, bypassing the +dataset argument.

    +

    Internally, all functions download each file by get_file_by_id. get_file_* +functions return a raw binary file, which cannot be readily analyzed in R. +To use the objects as dataframes, see the get_dataset_* functions at get_dataset

    +
    + +
    get_file(
    +  file,
    +  dataset = NULL,
    +  format = c("original", "bundle"),
    +  vars = NULL,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  original = TRUE,
    +  ...
    +)
    +
    +get_file_by_name(
    +  filename,
    +  dataset,
    +  format = c("original", "bundle"),
    +  vars = NULL,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  original = TRUE,
    +  ...
    +)
    +
    +get_file_by_id(
    +  fileid,
    +  dataset = NULL,
    +  format = c("original", "bundle"),
    +  vars = NULL,
    +  original = TRUE,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    +
    +get_file_by_doi(
    +  filedoi,
    +  dataset = NULL,
    +  format = c("original", "bundle"),
    +  vars = NULL,
    +  original = TRUE,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    file

    An integer specifying a file identifier; or a vector of integers +specifying file identifiers; or, if used with the prefix "doi:", a +character with the file-specific DOI; or, if used without the prefix, a +filename accompanied by a dataset DOI in the dataset argument, or an object of +class “dataverse_file” as returned by dataset_files.

    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    format

    A character string specifying a file format for download. +by default, this is “original” (the original file format). If NULL, +no query is added, so ingested files are returned in their ingested TSV form. +For tabular datasets, the option “bundle” downloads the bundle +of the original and archival versions, as well as the documentation. +See https://guides.dataverse.org/en/latest/api/dataaccess.html for details.

    vars

    A character vector specifying one or more variable names, used to +extract a subset of the data.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    original

    A logical, defaulting to TRUE. If a ingested (.tab) version is +available, download the original version instead of the ingested? If there was +no ingested version, is set to NA. Note in get_dataframe_*, +original is set to FALSE by default. Either can be changed.

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    filename

    Filename of the dataset, with file extension as shown in Dataverse +(for example, if nlsw88.dta was the original but is displayed as the ingested +nlsw88.tab, use the ingested version.)

    fileid

    A numeric ID internally used for get_file_by_id

    filedoi

    A DOI for a single file (not the entire dataset), of the form +"10.70122/FK2/PPIAXE/MHDB0O" or "doi:10.70122/FK2/PPIAXE/MHDB0O"

    + +

    Value

    + +

    get_file returns a raw vector (or list of raw vectors, +if length(file) > 1), which can be saved locally with the writeBin +function. To load datasets into the R environment dataframe, see +get_dataframe_by_name.

    +

    Details

    + +

    This function provides access to data files from a Dataverse entry.

    +

    See also

    + +

    To load the objects as datasets get_dataframe_by_name.

    + +

    Examples

    +
    if (FALSE) { + +# 1. Using filename and dataverse +f1 <- get_file_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" +) + +# 2. Using file DOI +f2 <- get_file_by_doi( + filedoi = "10.70122/FK2/PPIAXE/MHDB0O", + server = "demo.dataverse.org" +) + +# 3. Two-steps: Find ID from get_dataset +d3 <- get_dataset("doi:10.70122/FK2/PPIAXE", server = "demo.dataverse.org") +f3 <- get_file(d3$files$id[1], server = "demo.dataverse.org") + +# 4. Retrieve multiple raw data in list +f4_vec <- get_dataset( + "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" +)$files$id + +f4 <- get_file(f4_vec, server = "demo.dataverse.org") +length(f4) + +# Write binary files +# (see `get_dataframe_by_name` to load in environment) +# The appropriate file extension needs to be assigned by the user. +writeBin(f1, "nlsw88.dta") +writeBin(f2, "nlsw88.dta") + +writeBin(f4[[1]], "nlsw88.rds") # originally a rds file +writeBin(f4[[2]], "nlsw88.dta") # originally a dta file +} + +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/get_dataframe.html b/docs/reference/get_dataframe.html new file mode 100644 index 0000000..2703a77 --- /dev/null +++ b/docs/reference/get_dataframe.html @@ -0,0 +1,348 @@ + + + + + + + + +Get file from dataverse and convert it into a dataframe or tibble — get_dataframe_by_name • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    get_dataframe_by_id, if you know the numeric ID of the dataset, or instead +get_dataframe_by_name if you know the filename and doi. The dataset

    +
    + +
    get_dataframe_by_name(
    +  filename,
    +  dataset = NULL,
    +  .f = NULL,
    +  original = FALSE,
    +  ...
    +)
    +
    +get_dataframe_by_id(fileid, .f = NULL, original = FALSE, ...)
    +
    +get_dataframe_by_doi(filedoi, .f = NULL, original = FALSE, ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    filename

    The name of the file of interest, with file extension, for example +"roster-bulls-1996.tab".

    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    .f

    The function to used for reading in the raw dataset. This user +must choose the appropriate function: for example if the target is a .rds +file, then .f should be readRDS or readr::read_rds`.

    original

    A logical, defaulting to TRUE. Whether to read the ingested, +archival version of the dataset if one exists. The archival versions are tab-delimited +.tab files so if original = FALSE, .f is set to readr::read_tsv. +If functions to read the original version is available, then original = TRUE +with a specified .f is better.

    ...

    Arguments passed on to get_file

    +
    file

    An integer specifying a file identifier; or a vector of integers +specifying file identifiers; or, if used with the prefix "doi:", a +character with the file-specific DOI; or, if used without the prefix, a +filename accompanied by a dataset DOI in the dataset argument, or an object of +class “dataverse_file” as returned by dataset_files.

    +
    format

    A character string specifying a file format for download. +by default, this is “original” (the original file format). If NULL, +no query is added, so ingested files are returned in their ingested TSV form. +For tabular datasets, the option “bundle” downloads the bundle +of the original and archival versions, as well as the documentation. +See https://guides.dataverse.org/en/latest/api/dataaccess.html for details.

    +
    vars

    A character vector specifying one or more variable names, used to +extract a subset of the data.

    +
    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    +
    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    + +
    fileid

    A numeric ID internally used for get_file_by_id

    filedoi

    A DOI for a single file (not the entire dataset), of the form +"10.70122/FK2/PPIAXE/MHDB0O" or "doi:10.70122/FK2/PPIAXE/MHDB0O"

    + + +

    Examples

    +
    +# Retrieve data.frame from dataverse DOI and file name +df_from_rds_ingested <- + get_dataframe_by_name( + filename = "roster-bulls-1996.tab", + dataset = "doi:10.70122/FK2/HXJVJU", + server = "demo.dataverse.org" + ) +
    #> Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.
    #> +#> ── Column specification ──────────────────────────────────────────────────────── +#> cols( +#> number = col_double(), +#> player = col_character(), +#> position = col_character(), +#> height = col_character(), +#> weight = col_double(), +#> dob = col_character(), +#> country_birth = col_character(), +#> experience_years = col_double(), +#> college = col_character() +#> )
    +# Retrieve the same data.frame from dataverse + file DOI +df_from_rds_ingested_by_doi <- + get_dataframe_by_doi( + filedoi = "10.70122/FK2/HXJVJU/SA3Z2V", + server = "demo.dataverse.org" + ) +
    #> Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.
    #> +#> ── Column specification ──────────────────────────────────────────────────────── +#> cols( +#> number = col_double(), +#> player = col_character(), +#> position = col_character(), +#> height = col_character(), +#> weight = col_double(), +#> dob = col_character(), +#> country_birth = col_character(), +#> experience_years = col_double(), +#> college = col_character() +#> )
    +# Retrieve ingested file originally a Stata dta +df_from_stata_ingested <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" + ) +
    #> Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.
    #> +#> ── Column specification ──────────────────────────────────────────────────────── +#> cols( +#> idcode = col_double(), +#> age = col_double(), +#> race = col_double(), +#> married = col_double(), +#> never_married = col_double(), +#> grade = col_double(), +#> collgrad = col_double(), +#> south = col_double(), +#> smsa = col_double(), +#> c_city = col_double(), +#> industry = col_double(), +#> occupation = col_double(), +#> union = col_double(), +#> wage = col_double(), +#> hours = col_double(), +#> ttl_exp = col_double(), +#> tenure = col_double() +#> )
    + +# To use the original file version, or for non-ingested data, +# please specify `original = TRUE` and specify a function in .f. + +# A data.frame is still returned, but the +if (requireNamespace("readr", quietly = TRUE)) { + df_from_rds_original <- + get_dataframe_by_name( + filename = "nlsw88_rds-export.rds", + dataset = "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org", + original = TRUE, + .f = readr::read_rds + ) +} + +if (requireNamespace("haven", quietly = TRUE)) { + df_from_stata_original <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org", + original = TRUE, + .f = haven::read_dta + ) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/get_dataframe_internal.html b/docs/reference/get_dataframe_internal.html new file mode 100644 index 0000000..e449bdc --- /dev/null +++ b/docs/reference/get_dataframe_internal.html @@ -0,0 +1,175 @@ + + + + + + + + +Write to temp and apply function — get_dataframe_internal • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Write to temp and apply function

    +
    + +
    get_dataframe_internal(raw, filename, .f)
    + + + +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/get_dataset.html b/docs/reference/get_dataset.html new file mode 100644 index 0000000..8907f9f --- /dev/null +++ b/docs/reference/get_dataset.html @@ -0,0 +1,278 @@ + + + + + + + + +Get dataset — get_dataset • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Retrieve a Dataverse dataset or its metadata

    +
    + +
    get_dataset(
    +  dataset,
    +  version = ":latest",
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    +
    +dataset_metadata(
    +  dataset,
    +  version = ":latest",
    +  block = "citation",
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    +
    +dataset_files(
    +  dataset,
    +  version = ":latest",
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    version

    A character string specifying a version of the dataset. This can be one of “:draft” (the current draft), “:latest” (the latest draft, if it exists, or the latest published version), “:latest-published” (the latest published version, ignoring any draft), or “x.y” (where x is a major version and y is a minor version; the .y can be omitted to obtain a major version). In lieu of this, a dataset's version-specific identification number can be used for the dataset argument.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    block

    A character string specifying a metadata block to retrieve. By default this is “citation”. Other values may be available, depending on the dataset, such as “geospatial” or “socialscience”.

    + +

    Value

    + +

    A list of class “dataverse_dataset” or a list of a form dependent on the specific metadata block retrieved. dataset_files returns a list of objects of class “dataverse_file”.

    +

    Details

    + +

    get_dataset retrieves details about a Dataverse dataset.

    +

    dataset_metadata returns a named metadata block for a dataset. +This is already returned by get_dataset, but this function allows +you to retrieve just a specific block of metadata, such as citation information.

    +

    dataset_files returns a list of files in a dataset, similar to +get_dataset. The difference is that this returns only a list of +“dataverse_dataset” objects, whereas get_dataset returns +metadata and a data.frame of files (rather than a list of file objects).

    +

    See also

    + +

    create_dataset, update_dataset, delete_dataset, publish_dataset, dataset_files, dataset_metadata

    + +

    Examples

    +
    if (FALSE) { +Sys.setenv("DATAVERSE_SERVER" = "demo.dataverse.org") +Sys.setenv("DATAVERSE_KEY" = "c7208dd2-6ec5-469a-bec5-f57e164888d4") + +# Download file from: https://demo.dataverse.org/file.xhtml?fileId=769385 +dv <- get_dataverse("dataverse-client-r") +contents <- dataverse_contents(dv) + +dataset_files(contents[[1]]) # Dataset contains 2 files +dataset_metadata(contents[[1]]) # Easier to query later + +set <- get_dataset(contents[[1]]) # 1st dataset w/n dataverse +f <- get_file(set$files$id[2]) # 2nd file w/n dataset + +# Check the *binary* representation of the file. +length(f) +head(f) + +# Examine the plain-text representation. +tmp <- tempfile(fileext = "svg") +writeBin(as.vector(f), tmp) +svg_lines <- readLines(tmp) +head(svg_lines) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/get_dataverse.html b/docs/reference/get_dataverse.html new file mode 100644 index 0000000..74a8ae7 --- /dev/null +++ b/docs/reference/get_dataverse.html @@ -0,0 +1,254 @@ + + + + + + + + +Get Dataverse — get_dataverse • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Retrieve details of a Dataverse

    +
    + +
    get_dataverse(
    +  dataverse,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  check = TRUE,
    +  ...
    +)
    +
    +dataverse_contents(
    +  dataverse,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + +
    dataverse

    A character string specifying a Dataverse name or an object of class “dataverse”.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    check

    A logical indicating whether to check that the value of dataverse is actually a numeric

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list of class “dataverse”.

    +

    Details

    + +

    get_dataverse function retrieves basic information about a Dataverse from a Dataverse server. To see the contents of the Dataverse, use dataverse_contents instead. Contents might include one or more “datasets” and/or further Dataverses that themselves contain Dataverses and/or datasets. To view the file contents of a single Dataset, use get_dataset.

    +

    See also

    + +

    To manage Dataverses: +create_dataverse, +delete_dataverse, +publish_dataverse, +dataverse_contents;

    +

    To get datasets: +get_dataset;

    +

    To search for Dataverses, datasets, or files: +dataverse_search

    + +

    Examples

    +
    if (FALSE) { +# view the root dataverse for a server +get_dataverse(":root") +dataverse_contents(":root") + +Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") +# download file from: +# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ARKOTI +dv <- get_dataverse("monogan") +(contents <- dataverse_contents(dv)) + +# get a dataset from the dataverse +d1 <- get_dataset(contents[[1]]) +f <- get_file(d1$files$id[3]) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/get_facets.html b/docs/reference/get_facets.html new file mode 100644 index 0000000..694ae54 --- /dev/null +++ b/docs/reference/get_facets.html @@ -0,0 +1,228 @@ + + + + + + + + +Get Dataverse facets — get_facets • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Dataverse metadata facets

    +
    + +
    get_facets(
    +  dataverse,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataverse

    A character string specifying a Dataverse name or an object of class “dataverse”.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list.

    +

    Details

    + +

    Retrieve a list of Dataverse metadata facets.

    +

    See also

    + +

    To manage Dataverses: create_dataverse, delete_dataverse, publish_dataverse, dataverse_contents; to get datasets: get_dataset; to search for Dataverses, datasets, or files: dataverse_search

    + +

    Examples

    +
    if (FALSE) { +# download file from: +# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ARKOTI +monogan <- get_dataverse("monogan") +(monogan_data <- dataverse_contents(monogan)) + +# get facets +get_facets(monogan) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/get_file_metadata.html b/docs/reference/get_file_metadata.html new file mode 100644 index 0000000..9eaf7d6 --- /dev/null +++ b/docs/reference/get_file_metadata.html @@ -0,0 +1,228 @@ + + + + + + + + +Retrieve a ddi metadata file — get_file_metadata • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Retrieve a ddi metadata file

    +
    + +
    get_file_metadata(
    +  file,
    +  dataset = NULL,
    +  format = c("ddi", "preprocessed"),
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    file

    An integer specifying a file identifier; or a vector of integers +specifying file identifiers; or, if used with the prefix "doi:", a +character with the file-specific DOI; or, if used without the prefix, a +filename accompanied by a dataset DOI in the dataset argument, or an object of +class “dataverse_file” as returned by dataset_files.

    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    format

    Defaults to “ddi” for metadata files

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A character vector containing a DDI +metadata file.

    + +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/get_user_key.html b/docs/reference/get_user_key.html new file mode 100644 index 0000000..c665dca --- /dev/null +++ b/docs/reference/get_user_key.html @@ -0,0 +1,208 @@ + + + + + + + + +Get API Key — get_user_key • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Get a user's API key

    +
    + +
    get_user_key(user, password, server = Sys.getenv("DATAVERSE_SERVER"), ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    user

    A character vector specifying a Dataverse server username.

    password

    A character vector specifying the password for this user.

    server

    A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list.

    +

    Details

    + +

    Use a Dataverse server's username and password login to obtain an API key for the user. This can be used if one does not yet have an API key, or desires to reset the key. This function does not require an API key argument to authenticate, but server must still be specified.

    + +

    Examples

    +
    if (FALSE) { +get_user_key("username", "password") +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html new file mode 100644 index 0000000..af54628 --- /dev/null +++ b/docs/reference/index.html @@ -0,0 +1,395 @@ + + + + + + + + +Function reference • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +

    Retrieve

    +

    +
    +

    get_file() get_file_by_name() get_file_by_id() get_file_by_doi()

    +

    Download File

    +

    get_dataframe_by_name() get_dataframe_by_id() get_dataframe_by_doi()

    +

    Get file from dataverse and convert it into a dataframe or tibble

    +

    get_dataset() dataset_metadata() dataset_files()

    +

    Get dataset

    +

    get_dataverse() dataverse_contents()

    +

    Get Dataverse

    +

    get_facets()

    +

    Get Dataverse facets

    +

    get_file_metadata()

    +

    Retrieve a ddi metadata file

    +

    get_user_key()

    +

    Get API Key

    +

    Create, Add, & Publish

    +

    +
    +

    create_dataset() update_dataset()

    +

    Create or update a dataset

    +

    create_dataverse()

    +

    Create Dataverse

    +

    add_dataset_file() update_dataset_file()

    +

    Add or update a file in a dataset

    +

    add_file()

    +

    Add file (SWORD)

    +

    publish_dataset()

    +

    Publish dataset

    +

    publish_dataverse()

    +

    Publish Dataverse (SWORD)

    +

    publish_sword_dataset()

    +

    Publish dataset (SWORD)

    +

    Delete

    +

    +
    +

    delete_dataset()

    +

    Delete draft dataset

    +

    delete_dataverse()

    +

    Delete Dataverse

    +

    delete_file()

    +

    Delete file (SWORD)

    +

    delete_sword_dataset()

    +

    Delete dataset (SWORD)

    +

    Other

    +

    +
    +

    dataset_atom() dataset_statement()

    +

    View dataset (SWORD)

    +

    dataset_versions()

    +

    Dataset versions

    +

    dataverse

    +

    Client for Dataverse 4 Repositories

    +

    dataverse_metadata()

    +

    Dataverse metadata

    +

    dataverse_search()

    +

    Search Dataverse server

    +

    initiate_sword_dataset()

    +

    Initiate dataset (SWORD)

    +

    is_ingested()

    +

    Identify if file is an ingested file

    +

    list_datasets()

    +

    List datasets (SWORD)

    +

    service_document()

    +

    SWORD Service Document

    +

    set_dataverse_metadata()

    +

    Set Dataverse metadata

    +
    + + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/initiate_sword_dataset.html b/docs/reference/initiate_sword_dataset.html new file mode 100644 index 0000000..77a6ea5 --- /dev/null +++ b/docs/reference/initiate_sword_dataset.html @@ -0,0 +1,266 @@ + + + + + + + + +Initiate dataset (SWORD) — initiate_sword_dataset • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Initiate a SWORD (possibly unpublished) dataset

    +
    + +
    initiate_sword_dataset(
    +  dataverse,
    +  body,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + +
    dataverse

    A Dataverse alias or ID number, or an object of class “dataverse”, perhaps as returned by service_document.

    body

    A list containing one or more metadata fields. Field names must be valid Dublin Core Terms labels (see details, below). The title, description, and creator fields are required.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    An object of class “dataset_atom”.

    +

    Details

    + +

    This function is used to initiate a dataset in a (SWORD) Dataverse by supplying relevant metadata. The function is part of the SWORD API (see Atom entry specification), which is used to upload data to a Dataverse server. +Allowed fields are: +“abstract”, “accessRights”, “accrualMethod”, +“accrualPeriodicity”, “accrualPolicy”, “alternative”, +“audience”, “available”, “bibliographicCitation”, +“conformsTo”, “contributor”, “coverage”, “created”, +“creator”, “date”, “dateAccepted”, “dateCopyrighted”, +“dateSubmitted”, “description”, “educationLevel”, “extent”, +“format”, “hasFormat”, “hasPart”, “hasVersion”, +“identifier”, “instructionalMethod”, “isFormatOf”, +“isPartOf”, “isReferencedBy”, “isReplacedBy”, “isRequiredBy”, +“issued”, “isVersionOf”, “language”, “license”, +“mediator”, “medium”, “modified”, “provenance”, +“publisher”, “references”, “relation”, “replaces”, +“requires”, “rights”, “rightsHolder”, “source”, +“spatial”, “subject”, “tableOfContents”, “temporal”, +“title”, “type”, and “valid”.

    +

    Note

    + +

    There are two ways to create dataset: native API (create_dataset) and SWORD API (initiate_sword_dataset).

    +

    References

    + +

    Dublin Core Metadata Terms

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, delete_sword_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +

    Examples

    +
    if (FALSE) { +# retrieve your service document (dataverse list) +d <- service_document() + +# create a list of metadata +metadat <- list(title = "My Study", + creator = "Doe, John", + description = "An example study") + +# create the dataset in first dataverse +dat <- initiate_sword_dataset(d[[2]], body = metadat) + +# add files to dataset +tmp <- tempfile(fileext = ".csv") +write.csv(iris, file = tmp) +add_file(dat, file = tmp) + +# publish dataset +publish_dataset(dat) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/is_ingested.html b/docs/reference/is_ingested.html new file mode 100644 index 0000000..1ff70da --- /dev/null +++ b/docs/reference/is_ingested.html @@ -0,0 +1,201 @@ + + + + + + + + +Identify if file is an ingested file — is_ingested • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Identify if file is an ingested file

    +
    + +
    is_ingested(
    +  fileid,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER")
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + +
    fileid

    A numeric fileid or file-specific DOI

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    + + +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/list_datasets.html b/docs/reference/list_datasets.html new file mode 100644 index 0000000..22424db --- /dev/null +++ b/docs/reference/list_datasets.html @@ -0,0 +1,225 @@ + + + + + + + + +List datasets (SWORD) — list_datasets • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    List datasets in a SWORD (possibly unpublished) Dataverse

    +
    + +
    list_datasets(
    +  dataverse,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataverse

    A Dataverse alias or ID number, or an object of class “dataverse”, perhaps as returned by service_document.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list.

    +

    Details

    + +

    This function is used to list datasets in a given Dataverse. It is part of the SWORD API, which is used to upload data to a Dataverse server. This means this can be used to view unpublished Dataverses and Datasets.

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, delete_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +

    Examples

    +
    if (FALSE) { +Sys.setenv("DATAVERSE_SERVER" = "demo.dataverse.org") +Sys.setenv("DATAVERSE_KEY" = "c7208dd2-6ec5-469a-bec5-f57e164888d4") +dv <- get_dataverse("dataverse-client-r") +list_datasets(dv) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/publish_dataset.html b/docs/reference/publish_dataset.html new file mode 100644 index 0000000..aa139bc --- /dev/null +++ b/docs/reference/publish_dataset.html @@ -0,0 +1,232 @@ + + + + + + + + +Publish dataset — publish_dataset • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Publish/release Dataverse dataset

    +
    + +
    publish_dataset(
    +  dataset,
    +  minor = TRUE,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + +
    dataset

    A character specifying a persistent identification ID for a dataset, +for example "doi:10.70122/FK2/HXJVJU". Alternatively, an object of class +“dataverse_dataset” obtained by dataverse_contents().

    minor

    A logical specifying whether the new release of the dataset is a “minor” release (TRUE, by default), resulting in a minor version increase (e.g., from 1.1 to 1.2). If FALSE, the dataset is given a “major” release (e.g., from 1.1 to 2.0).

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list.

    +

    Details

    + +

    Use this function to “publish” (i.e., publicly release) a draft Dataverse dataset. This creates a publicly visible listing of the dataset, accessible by its DOI, with a numbered version. This action cannot be undone. +There are no requirements for what constitutes a major or minor release, but a minor release might be used to update metadata (e.g., a new linked publication) or the addition of supplemental files. A major release is best used to reflect a substantial change to the dataset, such as would require a published erratum or a substantial change to data or code.

    +

    See also

    + + + +

    Examples

    +
    if (FALSE) { +meta <- list() +ds <- create_dataset("mydataverse", body = meta) +publish_dataset(ds) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/publish_dataverse.html b/docs/reference/publish_dataverse.html new file mode 100644 index 0000000..5d1bd92 --- /dev/null +++ b/docs/reference/publish_dataverse.html @@ -0,0 +1,217 @@ + + + + + + + + +Publish Dataverse (SWORD) — publish_dataverse • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Publish/re-publish a Dataverse via SWORD

    +
    + +
    publish_dataverse(
    +  dataverse,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataverse

    An object of class “sword_collection”, as returned by service_document.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list.

    +

    Details

    + +

    This function is used to publish a (possibly already published) Dataverse. It is part of the SWORD API, which is used to upload data to a Dataverse server.

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, delete_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/publish_sword_dataset.html b/docs/reference/publish_sword_dataset.html new file mode 100644 index 0000000..194f299 --- /dev/null +++ b/docs/reference/publish_sword_dataset.html @@ -0,0 +1,237 @@ + + + + + + + + +Publish dataset (SWORD) — publish_sword_dataset • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Publish a SWORD (possibly unpublished) dataset

    +
    + +
    publish_sword_dataset(
    +  dataset,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    dataset

    A dataset DOI (or other persistent identifier), an object of class “dataset_atom” or “dataset_statement”, or an appropriate and complete SWORD URL.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list.

    +

    Details

    + +

    This function is used to publish a dataset by its persistent identifier. This cannot be undone. The function is part of the SWORD API, which is used to upload data to a Dataverse server.

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, delete_sword_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +

    Examples

    +
    if (FALSE) { +# retrieve your service document +d <- service_document() + +# create a list of metadata +metadat <- list(title = "My Study", + creator = "Doe, John", + description = "An example study") + +# create the dataset in first dataverse +dat <- initiate_sword_dataset(d[[2]], body = metadat) + +# publish dataset +publish_sword_dataset(dat) + +# delete a dataset +delete_dataset(dat) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/service_document.html b/docs/reference/service_document.html new file mode 100644 index 0000000..695d136 --- /dev/null +++ b/docs/reference/service_document.html @@ -0,0 +1,221 @@ + + + + + + + + +SWORD Service Document — service_document • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Obtain a SWORD service document.

    +
    + +
    service_document(
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + +
    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list of class “sword_service_document”, possibly with one or more “sword_collection” entries. The latter are SWORD representations of a Dataverse. These can be passed to other SWORD API functions, e.g., for creating a new dataset.

    +

    Details

    + +

    This function can be used to check authentication against the Dataverse SWORD server. It is typically a first step when creating a new Dataverse, a new Dataset, or modifying an existing Dataverse or Dataset.

    +

    See also

    + +

    Managing a Dataverse: publish_dataverse; Managing a dataset: dataset_atom, list_datasets, create_dataset, delete_dataset, publish_dataset; Managing files within a dataset: add_file, delete_file

    + +

    Examples

    +
    if (FALSE) { +# retrieve your service document +d <- service_document() + +# list available datasets in first dataverse +list_datasets(d[[2]]) +} +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/set_dataverse_metadata.html b/docs/reference/set_dataverse_metadata.html new file mode 100644 index 0000000..4f97254 --- /dev/null +++ b/docs/reference/set_dataverse_metadata.html @@ -0,0 +1,227 @@ + + + + + + + + +Set Dataverse metadata — set_dataverse_metadata • dataverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    Set Dataverse metadata

    +
    + +
    set_dataverse_metadata(
    +  dataverse,
    +  body,
    +  root = TRUE,
    +  key = Sys.getenv("DATAVERSE_KEY"),
    +  server = Sys.getenv("DATAVERSE_SERVER"),
    +  ...
    +)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    dataverse

    A character string specifying a Dataverse name or an object of class “dataverse”.

    body

    A list.

    root

    A logical.

    key

    A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +Sys.setenv("DATAVERSE_KEY" = "examplekey").

    server

    A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (server = "dataverse.harvard.edu"). This can be modified atomically +or globally using Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com").

    ...

    Additional arguments passed to an HTTP request function, such as +GET, POST, or +DELETE.

    + +

    Value

    + +

    A list

    +

    Details

    + +

    This function sets the value of metadata fields for a Dataverse. Use update_dataset to set the metadata fields for a dataset instead.

    +

    See also

    + + + +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/for-developers/developer-tasks.R b/for-developers/developer-tasks.R index b308ce8..9b61d04 100644 --- a/for-developers/developer-tasks.R +++ b/for-developers/developer-tasks.R @@ -10,6 +10,17 @@ pkgdown::clean_site() pkgdown::build_site() system("R CMD Rd2pdf --no-preview --force --output=./documentation-peek.pdf ." ) +checks_to_exclude <- c( + "covr", + "lintr_line_length_linter" +) +gp <- + goodpractice::all_checks() %>% + purrr::discard(~(. %in% checks_to_exclude)) %>% + goodpractice::gp(checks = .) +goodpractice::results(gp) +gp + devtools::run_examples(); #dev.off() #This overwrites the NAMESPACE file too # devtools::run_examples(, "redcap_read.Rd") test_results_checked <- devtools::test() diff --git a/inst/dataset-basketball/dataframe-from-tab.rds b/inst/dataset-basketball/dataframe-from-tab.rds new file mode 100644 index 0000000..2850adc Binary files /dev/null and b/inst/dataset-basketball/dataframe-from-tab.rds differ diff --git a/inst/dataset-basketball/expected-metadata.yml b/inst/dataset-basketball/expected-metadata.yml index 8957bac..1a8aa94 100644 --- a/inst/dataset-basketball/expected-metadata.yml +++ b/inst/dataset-basketball/expected-metadata.yml @@ -24,6 +24,19 @@ roster: type: MD5 value: c6feabffac401627b80761c5a1de55f0 creationDate: '2020-12-29' + raw_value: "number,player,position,height,weight,dob,country_birth,experience_years,college\r\n0,Robert + Parish,C,7-0,230,\"August 30, 1953\",us,20,Centenary College of Louisiana\r\n1,Randy + Brown,PG,6-2,190,\"May 22, 1968\",us,5,\"Houston, New Mexico State\"\r\n6,Matt + Steigenga,SF,6-7,225,\"March 27, 1970\",us,0,Michigan State\r\n7,Toni Kukoč,SF,6-10,192,\"September + 18, 1968\",hr,3,\r\n8,Dickey Simpkins,PF,6-9,248,\"April 6, 1972\",us,2,Providence\r\n9,Ron + Harper,PG,6-6,185,\"January 20, 1964\",us,10,Miami University\r\n13,Luc Longley,C,7-2,265,\"January + 19, 1969\",au,5,New Mexico\r\n18,Bison Dele,C,6-9,235,\"April 6, 1969\",us,5,\"Maryland, Arizona\"\r\n23,Michael + Jordan,SG,6-6,195,\"February 17, 1963\",us,11,UNC\r\n25,Steve Kerr,PG,6-3,175,\"September + 27, 1965\",lb,8,Arizona\r\n30,Jud Buechler,SF,6-6,220,\"June 19, 1968\",us,6,Arizona\r\n33,Scottie + Pippen,SF,6-8,210,\"September 25, 1965\",us,9,University of Central Arkansas\r\n34,Bill + Wennington,C,7-0,245,\"April 26, 1963\",ca,9,St. John's\r\n35,Jason Caffey,PF,6-8,255,\"June + 12, 1973\",us,1,Alabama\r\n91,Dennis Rodman,PF,6-7,210,\"May 13, 1961\",us,10,Southeastern + Oklahoma State University\r\n" image: description: 'ID: 1734006. A svg file' label: vector-basketball.svg @@ -46,3 +59,66 @@ image: type: MD5 value: 8038c2efb57dd470e908ae2ad1ff70e0 creationDate: '2020-12-29' + raw_value: |+ + + + + + Created by potrace 1.15, written by Peter Selinger 2001-2017 + + + + + + diff --git a/man-roxygen/dots.R b/man-roxygen/dots.R index bd3f491..cf74d0d 100644 --- a/man-roxygen/dots.R +++ b/man-roxygen/dots.R @@ -1 +1,3 @@ -#' @param ... Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}. +#' @param ... Additional arguments passed to an HTTP request function, such as +#' \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +#' \code{\link[httr]{DELETE}}. diff --git a/man-roxygen/ds.R b/man-roxygen/ds.R index 5391eaa..efe78d0 100644 --- a/man-roxygen/ds.R +++ b/man-roxygen/ds.R @@ -1 +1,3 @@ -#' @param dataset An integer specifying a dataset identification number or an object of class \dQuote{dataverse_dataset}. The identification number is the dataset's persistent identification number (not the integer specifying a specific version of the dataset, such as returned by \code{\link{dataset_versions}}). +#' @param dataset A character specifying a persistent identification ID for a dataset, +#' for example `"doi:10.70122/FK2/HXJVJU"`. Alternatively, an object of class +#' \dQuote{dataverse_dataset} obtained by `dataverse_contents()`. diff --git a/man-roxygen/envvars.R b/man-roxygen/envvars.R index 7526c1f..35579b9 100644 --- a/man-roxygen/envvars.R +++ b/man-roxygen/envvars.R @@ -1,2 +1,8 @@ -#' @param key A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}. -#' @param server A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}. +#' @param key A character string specifying a Dataverse server API key. If one +#' is not specified, functions calling authenticated API endpoints will fail. +#' Keys can be specified atomically or globally using +#' \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}. +#' @param server A character string specifying a Dataverse server. There are +#' multiple Dataverse installations, but the defaults is to use the Harvard +#' Dataverse (`server = "dataverse.harvard.edu"`). This can be modified atomically +#' or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}. diff --git a/man/add_dataset_file.Rd b/man/add_dataset_file.Rd index faa1db7..c0a74d7 100644 --- a/man/add_dataset_file.Rd +++ b/man/add_dataset_file.Rd @@ -28,15 +28,25 @@ update_dataset_file( \arguments{ \item{file}{A character string} -\item{dataset}{An integer specifying a dataset identification number or an object of class \dQuote{dataverse_dataset}. The identification number is the dataset's persistent identification number (not the integer specifying a specific version of the dataset, such as returned by \code{\link{dataset_versions}}).} +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} \item{description}{Optionally, a character string providing a description of the file.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} \item{id}{An integer specifying a file identifier; or, if \code{doi} is specified, a character string specifying a file name within the DOI-identified dataset; or an object of class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.} diff --git a/man/add_file.Rd b/man/add_file.Rd index e1db2ff..848de4b 100644 --- a/man/add_file.Rd +++ b/man/add_file.Rd @@ -17,11 +17,19 @@ add_file( \item{file}{A character vector of file names, a data.frame, or a list of R objects.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ An object of class \dQuote{dataset_atom}. diff --git a/man/create_dataset.Rd b/man/create_dataset.Rd index c27eb4f..948af53 100644 --- a/man/create_dataset.Rd +++ b/man/create_dataset.Rd @@ -26,13 +26,23 @@ update_dataset( \item{body}{A list describing the dataset.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} -\item{dataset}{An integer specifying a dataset identification number or an object of class \dQuote{dataverse_dataset}. The identification number is the dataset's persistent identification number (not the integer specifying a specific version of the dataset, such as returned by \code{\link{dataset_versions}}).} +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} } \value{ An object of class \dQuote{dataverse_dataset}. diff --git a/man/create_dataverse.Rd b/man/create_dataverse.Rd index 1c71ae9..9df42c8 100644 --- a/man/create_dataverse.Rd +++ b/man/create_dataverse.Rd @@ -14,11 +14,19 @@ create_dataverse( \arguments{ \item{dataverse}{A character string specifying a Dataverse name or an object of class \dQuote{dataverse}. If missing, a top-level Dataverse is created.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list. diff --git a/man/dataset_atom.Rd b/man/dataset_atom.Rd index b13305a..3c3d423 100644 --- a/man/dataset_atom.Rd +++ b/man/dataset_atom.Rd @@ -22,11 +22,19 @@ dataset_statement( \arguments{ \item{dataset}{A dataset DOI (or other persistent identifier), an object of class \dQuote{dataset_atom} or \dQuote{dataset_statement}, or an appropriate and complete SWORD URL.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list. For \code{dataset_atom}, an object of class \dQuote{dataset_atom}. diff --git a/man/dataset_versions.Rd b/man/dataset_versions.Rd index e1ea475..851177c 100644 --- a/man/dataset_versions.Rd +++ b/man/dataset_versions.Rd @@ -12,13 +12,23 @@ dataset_versions( ) } \arguments{ -\item{dataset}{An integer specifying a dataset identification number or an object of class \dQuote{dataverse_dataset}. The identification number is the dataset's persistent identification number (not the integer specifying a specific version of the dataset, such as returned by \code{\link{dataset_versions}}).} +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list of class \dQuote{dataverse_dataset_version}. diff --git a/man/dataverse.Rd b/man/dataverse.Rd index 2c7313b..651cb6f 100644 --- a/man/dataverse.Rd +++ b/man/dataverse.Rd @@ -15,17 +15,17 @@ A Dataverse is structured as a nested set of \dQuote{dataverse} repositories, su This package provides five main sets of functions to interact with Dataverse: \itemize{ - \item Search: \code{\link{dataverse_search}} - \item Data retrieval: \code{\link{get_dataverse}}, \code{\link{dataverse_contents}}, \code{\link{get_dataset}}, \code{\link{dataset_metadata}}, \code{\link{get_file}} - \item Data archiving (SWORD API): \code{\link{service_document}}, \code{\link{list_datasets}}, \code{\link{initiate_sword_dataset}}, \code{\link{delete_sword_dataset}}, \code{\link{publish_sword_dataset}}, \code{\link{add_file}}, \code{\link{delete_file}} - \item Dataverse management \dQuote{native} API: \code{\link{create_dataverse}}, \code{\link{publish_dataverse}}, \code{\link{delete_dataverse}} - \item Dataset management \dQuote{native} API: \code{\link{create_dataset}}, \code{\link{update_dataset}}, \code{\link{publish_dataset}}, \code{\link{delete_dataset}}, \code{\link{dataset_files}}, \code{\link{dataset_versions}} +\item Search: \code{\link{dataverse_search}} +\item Data retrieval: \code{\link{get_dataverse}}, \code{\link{dataverse_contents}}, \code{\link{get_dataset}}, \code{\link{dataset_metadata}}, \code{\link{get_file}} +\item Data archiving (SWORD API): \code{\link{service_document}}, \code{\link{list_datasets}}, \code{\link{initiate_sword_dataset}}, \code{\link{delete_sword_dataset}}, \code{\link{publish_sword_dataset}}, \code{\link{add_file}}, \code{\link{delete_file}} +\item Dataverse management \dQuote{native} API: \code{\link{create_dataverse}}, \code{\link{publish_dataverse}}, \code{\link{delete_dataverse}} +\item Dataset management \dQuote{native} API: \code{\link{create_dataset}}, \code{\link{update_dataset}}, \code{\link{publish_dataset}}, \code{\link{delete_dataset}}, \code{\link{dataset_files}}, \code{\link{dataset_versions}} } } \references{ -\href{http://guides.dataverse.org/en/latest/api/index.html}{Dataverse API Documentation} +\href{https://guides.dataverse.org/en/latest/api/index.html}{Dataverse API Documentation} - \href{http://dataverse.org/}{Dataverse Homepage} +\href{https://dataverse.org/}{Dataverse Homepage} - \href{https://dataverse.harvard.edu/}{Harvard IQSS Dataverse} +\href{https://dataverse.harvard.edu/}{Harvard IQSS Dataverse} } diff --git a/man/dataverse_metadata.Rd b/man/dataverse_metadata.Rd index 115dc0d..0735899 100644 --- a/man/dataverse_metadata.Rd +++ b/man/dataverse_metadata.Rd @@ -14,11 +14,19 @@ dataverse_metadata( \arguments{ \item{dataverse}{A character string specifying a Dataverse name or an object of class \dQuote{dataverse}.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list diff --git a/man/dataverse_search.Rd b/man/dataverse_search.Rd index 9030076..0ed2e61 100644 --- a/man/dataverse_search.Rd +++ b/man/dataverse_search.Rd @@ -42,9 +42,15 @@ dataverse_search( \item{fq}{See API documentation.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} - -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} + +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} \item{verbose}{A logical indicating whether to display information about the search query (default is \code{TRUE}).} diff --git a/man/delete_dataset.Rd b/man/delete_dataset.Rd index 20d5721..ece4252 100644 --- a/man/delete_dataset.Rd +++ b/man/delete_dataset.Rd @@ -12,13 +12,23 @@ delete_dataset( ) } \arguments{ -\item{dataset}{An integer specifying a dataset identification number or an object of class \dQuote{dataverse_dataset}. The identification number is the dataset's persistent identification number (not the integer specifying a specific version of the dataset, such as returned by \code{\link{dataset_versions}}).} +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A logical. diff --git a/man/delete_dataverse.Rd b/man/delete_dataverse.Rd index 8421dbd..a34ce4c 100644 --- a/man/delete_dataverse.Rd +++ b/man/delete_dataverse.Rd @@ -14,11 +14,19 @@ delete_dataverse( \arguments{ \item{dataverse}{A character string specifying a Dataverse name or an object of class \dQuote{dataverse}.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A logical. diff --git a/man/delete_file.Rd b/man/delete_file.Rd index 7ca4631..721c1d7 100644 --- a/man/delete_file.Rd +++ b/man/delete_file.Rd @@ -14,11 +14,19 @@ delete_file( \arguments{ \item{id}{A file ID, possibly returned by \code{\link{add_file}}, or a complete \dQuote{edit-media/file} URL.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ If successful, a logical \code{TRUE}, else possibly some information. @@ -49,7 +57,7 @@ f <- add_file(dat, file = tmp) # delete a file ds <- dataset_statement(dat) -delete_file(ds$files[[1]]$id +delete_file(ds$files[[1]]$id) # delete a dataset delete_dataset(dat) diff --git a/man/delete_sword_dataset.Rd b/man/delete_sword_dataset.Rd index 0e6facc..7cb8462 100644 --- a/man/delete_sword_dataset.Rd +++ b/man/delete_sword_dataset.Rd @@ -14,11 +14,19 @@ delete_sword_dataset( \arguments{ \item{dataset}{A dataset DOI (or other persistent identifier).} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ If successful, a logical \code{TRUE}, else possibly some information. diff --git a/man/files.Rd b/man/files.Rd index 941bd21..7137ad5 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -1,87 +1,166 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_file.R +% Please edit documentation in R/get_file.R, R/get_file_by_id.R \name{get_file} \alias{get_file} -\alias{get_file_metadata} -\title{Download File(s)} +\alias{get_file_by_name} +\alias{get_file_by_id} +\alias{get_file_by_doi} +\title{Download File} \usage{ get_file( file, dataset = NULL, - format = c("original", "RData", "prep", "bundle"), + format = c("original", "bundle"), vars = NULL, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, ... ) -get_file_metadata( - file, +get_file_by_name( + filename, + dataset, + format = c("original", "bundle"), + vars = NULL, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ... +) + +get_file_by_id( + fileid, dataset = NULL, - format = c("ddi", "preprocessed"), + format = c("original", "bundle"), + vars = NULL, + original = TRUE, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + ... +) + +get_file_by_doi( + filedoi, + dataset = NULL, + format = c("original", "bundle"), + vars = NULL, + original = TRUE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ... ) } \arguments{ -\item{file}{An integer specifying a file identifier; or a vector of integers specifying file identifiers; or, if \code{doi} is specified, a character string specifying a file name within the DOI-identified dataset; or an object of class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.} +\item{file}{An integer specifying a file identifier; or a vector of integers +specifying file identifiers; or, if used with the prefix \code{"doi:"}, a +character with the file-specific DOI; or, if used without the prefix, a +filename accompanied by a dataset DOI in the \code{dataset} argument, or an object of +class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.} -\item{dataset}{An integer specifying a dataset identification number or an object of class \dQuote{dataverse_dataset}. The identification number is the dataset's persistent identification number (not the integer specifying a specific version of the dataset, such as returned by \code{\link{dataset_versions}}).} +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} -\item{format}{A character string specifying a file format. For \code{get_file}: by default, this is \dQuote{original} (the original file format). If \dQuote{RData} or \dQuote{prep} is used, an alternative is returned. If \dQuote{bundle}, a compressed directory containing a bundle of file formats is returned. For \code{get_file_metadata}, this is \dQuote{ddi}.} +\item{format}{A character string specifying a file format for download. +by default, this is \dQuote{original} (the original file format). If \code{NULL}, +no query is added, so ingested files are returned in their ingested TSV form. +For tabular datasets, the option \dQuote{bundle} downloads the bundle +of the original and archival versions, as well as the documentation. +See \url{https://guides.dataverse.org/en/latest/api/dataaccess.html} for details.} -\item{vars}{A character vector specifying one or more variable names, used to extract a subset of the data.} +\item{vars}{A character vector specifying one or more variable names, used to +extract a subset of the data.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{original}{A logical, defaulting to TRUE. If a ingested (.tab) version is +available, download the original version instead of the ingested? If there was +no ingested version, is set to NA. Note in \verb{get_dataframe_*}, +\code{original} is set to FALSE by default. Either can be changed.} + +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} + +\item{filename}{Filename of the dataset, with file extension as shown in Dataverse +(for example, if nlsw88.dta was the original but is displayed as the ingested +nlsw88.tab, use the ingested version.)} + +\item{fileid}{A numeric ID internally used for \code{get_file_by_id}} + +\item{filedoi}{A DOI for a single file (not the entire dataset), of the form +\code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}} } \value{ -\code{get_file_metadata} returns a character vector containing a DDI metadata file. \code{get_file} returns a raw vector (or list of raw vectors, if \code{length(file) > 1}). +\code{get_file} returns a raw vector (or list of raw vectors, +if \code{length(file) > 1}), which can be saved locally with the \code{writeBin} +function. To load datasets into the R environment dataframe, see +\link{get_dataframe_by_name}. } \description{ -Download Dataverse File(s) +Download Dataverse File(s). \code{get_file} is a general wrapper, +and can take either dataverse objects, file IDs, or a filename and dataverse. +\code{get_file_by_name} is a shorthand for running \code{get_file} by +specifying a file name (\code{filename}) and dataset (\code{dataset}). +\code{get_file_by_doi} obtains a file by its file DOI, bypassing the +\code{dataset} argument. + +Internally, all functions download each file by \code{get_file_by_id}. \verb{get_file_*} +functions return a raw binary file, which cannot be readily analyzed in R. +To use the objects as dataframes, see the \verb{get_dataset_*} functions at \link{get_dataset} } \details{ This function provides access to data files from a Dataverse entry. } \examples{ \dontrun{ -# download file from: -# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ARKOTI -monogan <- get_dataverse("monogan") -monogan_data <- dataverse_contents(monogan) -d1 <- get_dataset("doi:10.7910/DVN/ARKOTI") -f <- get_file(d1$files$datafile$id[3]) - -# check file metadata -m1 <- get_file_metadata("constructionData.tab", "doi:10.7910/DVN/ARKOTI") -m2 <- get_file_metadata(2437257) - -# retrieve file based on DOI and filename -f2 <- get_file("constructionData.tab", "doi:10.7910/DVN/ARKOTI") -f2 <- get_file(2692202) - -# retrieve file based on "dataverse_file" object -flist <- dataset_files(2692151) -get_file(flist[[2]]) - -# retrieve all files in a dataset in their original format (returns a list of raw vectors) -file_ids <- get_dataset("doi:10.7910/DVN/CXOB4K")[['files']]$id -f3 <- get_file(file_ids, format = "original") -# read file as data.frame -if (require("rio")) { - tmp <- tempfile(fileext = ".dta") - writeBin(f, tmp) - dat <- haven::read_dta(tmp) - - # check UNF match - # if (require("UNF")) { - # unf(dat) \%unf\% d1$files$datafile$UNF[3] - # } + +# 1. Using filename and dataverse +f1 <- get_file_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" +) + +# 2. Using file DOI +f2 <- get_file_by_doi( + filedoi = "10.70122/FK2/PPIAXE/MHDB0O", + server = "demo.dataverse.org" +) + +# 3. Two-steps: Find ID from get_dataset +d3 <- get_dataset("doi:10.70122/FK2/PPIAXE", server = "demo.dataverse.org") +f3 <- get_file(d3$files$id[1], server = "demo.dataverse.org") + +# 4. Retrieve multiple raw data in list +f4_vec <- get_dataset( + "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" +)$files$id + +f4 <- get_file(f4_vec, server = "demo.dataverse.org") +length(f4) + +# Write binary files +# (see `get_dataframe_by_name` to load in environment) +# The appropriate file extension needs to be assigned by the user. +writeBin(f1, "nlsw88.dta") +writeBin(f2, "nlsw88.dta") + +writeBin(f4[[1]], "nlsw88.rds") # originally a rds file +writeBin(f4[[2]], "nlsw88.dta") # originally a dta file } + } +\seealso{ +To load the objects as datasets \link{get_dataframe_by_name}. } diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd new file mode 100644 index 0000000..6f4f4c1 --- /dev/null +++ b/man/get_dataframe.Rd @@ -0,0 +1,125 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_dataframe.R +\name{get_dataframe_by_name} +\alias{get_dataframe_by_name} +\alias{get_dataframe_by_id} +\alias{get_dataframe_by_doi} +\title{Get file from dataverse and convert it into a dataframe or tibble} +\usage{ +get_dataframe_by_name( + filename, + dataset = NULL, + .f = NULL, + original = FALSE, + ... +) + +get_dataframe_by_id(fileid, .f = NULL, original = FALSE, ...) + +get_dataframe_by_doi(filedoi, .f = NULL, original = FALSE, ...) +} +\arguments{ +\item{filename}{The name of the file of interest, with file extension, for example +\code{"roster-bulls-1996.tab"}.} + +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} + +\item{.f}{The function to used for reading in the raw dataset. This user +must choose the appropriate function: for example if the target is a .rds +file, then \code{.f} should be \code{readRDS} or \code{readr::read_}rds`.} + +\item{original}{A logical, defaulting to TRUE. Whether to read the ingested, +archival version of the dataset if one exists. The archival versions are tab-delimited +\code{.tab} files so if \code{original = FALSE}, \code{.f} is set to \code{readr::read_tsv}. +If functions to read the original version is available, then \code{original = TRUE} +with a specified \code{.f} is better.} + +\item{...}{ + Arguments passed on to \code{\link[=get_file]{get_file}} + \describe{ + \item{\code{file}}{An integer specifying a file identifier; or a vector of integers +specifying file identifiers; or, if used with the prefix \code{"doi:"}, a +character with the file-specific DOI; or, if used without the prefix, a +filename accompanied by a dataset DOI in the \code{dataset} argument, or an object of +class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.} + \item{\code{format}}{A character string specifying a file format for download. +by default, this is \dQuote{original} (the original file format). If \code{NULL}, +no query is added, so ingested files are returned in their ingested TSV form. +For tabular datasets, the option \dQuote{bundle} downloads the bundle +of the original and archival versions, as well as the documentation. +See \url{https://guides.dataverse.org/en/latest/api/dataaccess.html} for details.} + \item{\code{vars}}{A character vector specifying one or more variable names, used to +extract a subset of the data.} + \item{\code{key}}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} + \item{\code{server}}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} + }} + +\item{fileid}{A numeric ID internally used for \code{get_file_by_id}} + +\item{filedoi}{A DOI for a single file (not the entire dataset), of the form +\code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}} +} +\description{ +\code{get_dataframe_by_id}, if you know the numeric ID of the dataset, or instead +\code{get_dataframe_by_name} if you know the filename and doi. The dataset +} +\examples{ + +# Retrieve data.frame from dataverse DOI and file name +df_from_rds_ingested <- + get_dataframe_by_name( + filename = "roster-bulls-1996.tab", + dataset = "doi:10.70122/FK2/HXJVJU", + server = "demo.dataverse.org" + ) + +# Retrieve the same data.frame from dataverse + file DOI +df_from_rds_ingested_by_doi <- + get_dataframe_by_doi( + filedoi = "10.70122/FK2/HXJVJU/SA3Z2V", + server = "demo.dataverse.org" + ) + +# Retrieve ingested file originally a Stata dta +df_from_stata_ingested <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" + ) + + +# To use the original file version, or for non-ingested data, +# please specify `original = TRUE` and specify a function in .f. + +# A data.frame is still returned, but the +if (requireNamespace("readr", quietly = TRUE)) { + df_from_rds_original <- + get_dataframe_by_name( + filename = "nlsw88_rds-export.rds", + dataset = "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org", + original = TRUE, + .f = readr::read_rds + ) +} + +if (requireNamespace("haven", quietly = TRUE)) { + df_from_stata_original <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org", + original = TRUE, + .f = haven::read_dta + ) +} +} diff --git a/man/get_dataframe_internal.Rd b/man/get_dataframe_internal.Rd new file mode 100644 index 0000000..d53841a --- /dev/null +++ b/man/get_dataframe_internal.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_dataframe.R +\name{get_dataframe_internal} +\alias{get_dataframe_internal} +\title{Write to temp and apply function} +\usage{ +get_dataframe_internal(raw, filename, .f) +} +\description{ +Write to temp and apply function +} +\keyword{internal} diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 70175c5..2611456 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -32,15 +32,25 @@ dataset_files( ) } \arguments{ -\item{dataset}{An integer specifying a dataset identification number or an object of class \dQuote{dataverse_dataset}. The identification number is the dataset's persistent identification number (not the integer specifying a specific version of the dataset, such as returned by \code{\link{dataset_versions}}).} +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} \item{version}{A character string specifying a version of the dataset. This can be one of \dQuote{:draft} (the current draft), \dQuote{:latest} (the latest draft, if it exists, or the latest published version), \dQuote{:latest-published} (the latest published version, ignoring any draft), or \dQuote{x.y} (where \samp{x} is a major version and \samp{y} is a minor version; the \samp{.y} can be omitted to obtain a major version). In lieu of this, a dataset's version-specific identification number can be used for the \code{dataset} argument.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} \item{block}{A character string specifying a metadata block to retrieve. By default this is \dQuote{citation}. Other values may be available, depending on the dataset, such as \dQuote{geospatial} or \dQuote{socialscience}.} } diff --git a/man/get_dataverse.Rd b/man/get_dataverse.Rd index 58a10db..832df18 100644 --- a/man/get_dataverse.Rd +++ b/man/get_dataverse.Rd @@ -23,13 +23,21 @@ dataverse_contents( \arguments{ \item{dataverse}{A character string specifying a Dataverse name or an object of class \dQuote{dataverse}.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} \item{check}{A logical indicating whether to check that the value of \code{dataverse} is actually a numeric} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list of class \dQuote{dataverse}. diff --git a/man/get_facets.Rd b/man/get_facets.Rd index 4361c9a..3819735 100644 --- a/man/get_facets.Rd +++ b/man/get_facets.Rd @@ -14,11 +14,19 @@ get_facets( \arguments{ \item{dataverse}{A character string specifying a Dataverse name or an object of class \dQuote{dataverse}.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list. diff --git a/man/get_file_metadata.Rd b/man/get_file_metadata.Rd new file mode 100644 index 0000000..14fb258 --- /dev/null +++ b/man/get_file_metadata.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_file_metadata.R +\name{get_file_metadata} +\alias{get_file_metadata} +\title{Retrieve a ddi metadata file} +\usage{ +get_file_metadata( + file, + dataset = NULL, + format = c("ddi", "preprocessed"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + ... +) +} +\arguments{ +\item{file}{An integer specifying a file identifier; or a vector of integers +specifying file identifiers; or, if used with the prefix \code{"doi:"}, a +character with the file-specific DOI; or, if used without the prefix, a +filename accompanied by a dataset DOI in the \code{dataset} argument, or an object of +class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.} + +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} + +\item{format}{Defaults to \dQuote{ddi} for metadata files} + +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} + +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} + +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} +} +\value{ +A character vector containing a DDI +metadata file. +} +\description{ +Retrieve a ddi metadata file +} diff --git a/man/get_user_key.Rd b/man/get_user_key.Rd index 7692cc2..5e6cd43 100644 --- a/man/get_user_key.Rd +++ b/man/get_user_key.Rd @@ -13,7 +13,9 @@ get_user_key(user, password, server = Sys.getenv("DATAVERSE_SERVER"), ...) \item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list. diff --git a/man/initiate_sword_dataset.Rd b/man/initiate_sword_dataset.Rd index 86b927d..b9e92c8 100644 --- a/man/initiate_sword_dataset.Rd +++ b/man/initiate_sword_dataset.Rd @@ -17,11 +17,19 @@ initiate_sword_dataset( \item{body}{A list containing one or more metadata fields. Field names must be valid Dublin Core Terms labels (see details, below). The \samp{title}, \samp{description}, and \samp{creator} fields are required.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ An object of class \dQuote{dataset_atom}. @@ -30,7 +38,7 @@ An object of class \dQuote{dataset_atom}. Initiate a SWORD (possibly unpublished) dataset } \details{ -This function is used to initiate a dataset in a (SWORD) Dataverse by supplying relevant metadata. The function is part of the SWORD API (see \href{http://www.ietf.org/rfc/rfc5023.txt}{Atom entry specification}), which is used to upload data to a Dataverse server. +This function is used to initiate a dataset in a (SWORD) Dataverse by supplying relevant metadata. The function is part of the SWORD API (see \href{https://www.ietf.org/rfc/rfc5023.txt}{Atom entry specification}), which is used to upload data to a Dataverse server. Allowed fields are: \dQuote{abstract}, \dQuote{accessRights}, \dQuote{accrualMethod}, \dQuote{accrualPeriodicity}, \dQuote{accrualPolicy}, \dQuote{alternative}, diff --git a/man/is_ingested.Rd b/man/is_ingested.Rd new file mode 100644 index 0000000..b46c456 --- /dev/null +++ b/man/is_ingested.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{is_ingested} +\alias{is_ingested} +\title{Identify if file is an ingested file} +\usage{ +is_ingested( + fileid, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER") +) +} +\arguments{ +\item{fileid}{A numeric fileid or file-specific DOI} + +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} + +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +} +\description{ +Identify if file is an ingested file +} diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index 8bbe410..3d0257f 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -14,11 +14,19 @@ list_datasets( \arguments{ \item{dataverse}{A Dataverse alias or ID number, or an object of class \dQuote{dataverse}, perhaps as returned by \code{\link{service_document}}.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list. diff --git a/man/publish_dataset.Rd b/man/publish_dataset.Rd index 1a82044..33f0392 100644 --- a/man/publish_dataset.Rd +++ b/man/publish_dataset.Rd @@ -13,15 +13,25 @@ publish_dataset( ) } \arguments{ -\item{dataset}{An integer specifying a dataset identification number or an object of class \dQuote{dataverse_dataset}. The identification number is the dataset's persistent identification number (not the integer specifying a specific version of the dataset, such as returned by \code{\link{dataset_versions}}).} +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} \item{minor}{A logical specifying whether the new release of the dataset is a \dQuote{minor} release (\code{TRUE}, by default), resulting in a minor version increase (e.g., from 1.1 to 1.2). If \code{FALSE}, the dataset is given a \dQuote{major} release (e.g., from 1.1 to 2.0).} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list. diff --git a/man/publish_dataverse.Rd b/man/publish_dataverse.Rd index 45a0b01..de2bbb2 100644 --- a/man/publish_dataverse.Rd +++ b/man/publish_dataverse.Rd @@ -14,11 +14,19 @@ publish_dataverse( \arguments{ \item{dataverse}{An object of class \dQuote{sword_collection}, as returned by \code{\link{service_document}}.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list. diff --git a/man/publish_sword_dataset.Rd b/man/publish_sword_dataset.Rd index c65a026..34c2e90 100644 --- a/man/publish_sword_dataset.Rd +++ b/man/publish_sword_dataset.Rd @@ -14,11 +14,19 @@ publish_sword_dataset( \arguments{ \item{dataset}{A dataset DOI (or other persistent identifier), an object of class \dQuote{dataset_atom} or \dQuote{dataset_statement}, or an appropriate and complete SWORD URL.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list. diff --git a/man/service_document.Rd b/man/service_document.Rd index 45f4c29..a11a7a6 100644 --- a/man/service_document.Rd +++ b/man/service_document.Rd @@ -11,11 +11,19 @@ service_document( ) } \arguments{ -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list of class \dQuote{sword_service_document}, possibly with one or more \dQuote{sword_collection} entries. The latter are SWORD representations of a Dataverse. These can be passed to other SWORD API functions, e.g., for creating a new dataset. diff --git a/man/set_dataverse_metadata.Rd b/man/set_dataverse_metadata.Rd index d08a693..77110c3 100644 --- a/man/set_dataverse_metadata.Rd +++ b/man/set_dataverse_metadata.Rd @@ -20,11 +20,19 @@ set_dataverse_metadata( \item{root}{A logical.} -\item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using \code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} -\item{server}{A character string specifying a Dataverse server. There are multiple Dataverse installations, but the defaults is to use the Harvard Dataverse. This can be modified atomically or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} +\item{server}{A character string specifying a Dataverse server. There are +multiple Dataverse installations, but the defaults is to use the Harvard +Dataverse (\code{server = "dataverse.harvard.edu"}). This can be modified atomically +or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com")}.} -\item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} } \value{ A list diff --git a/tests/testthat/manual/seed/seed-yaml.R b/tests/testthat/manual/seed/seed-yaml.R index a582b95..7b23fb8 100644 --- a/tests/testthat/manual/seed/seed-yaml.R +++ b/tests/testthat/manual/seed/seed-yaml.R @@ -1,8 +1,10 @@ import::from("magrittr", "%>%") dv <- get_dataverse("dataverse-client-r") contents <- dataverse_contents(dv) -ds_1 <- dataset_files(contents[[1]]) +ds_1 <- dataset_files(contents[[1]]) %>% + rlang::set_names(c("roster", "image")) # Manually add friendly names to each file +# ---- seed-dataverses --------------------------------------------------- get_dataverse(":root") %>% base::append(c("testing_name" = ":root")) %>% yaml::write_yaml("inst/expected-dataverse-root.yml") @@ -11,11 +13,53 @@ dv %>% base::append(c("testing_name" = "dataverse-client-r")) %>% yaml::write_yaml("inst/expected-dataverse.yml") + +# ---- seed-basketball-files --------------------------------------------------- +file_csv <- + get_dataframe_by_name( + filename = "roster-bulls-1996.tab", + dataset = "doi:10.70122/FK2/HXJVJU", + original = TRUE, + .f = readr::read_file + ) + +ds_1$roster$raw_value <- + get_dataframe_by_name( + # filename = "roster-bulls-1996.tab", + filename = ds_1$roster$label, + dataset = dirname(ds_1$roster$dataFile$persistentId), + original = TRUE, + .f = readr::read_file + ) + +ds_1$image$raw_value <- + paste0( # The yaml needs a terminal new line to mirror the real content. + get_dataframe_by_name( + # filename = "roster-bulls-1996.tab", + filename = ds_1$image$label, + dataset = dirname(ds_1$image$dataFile$persistentId), + original = TRUE, + .f = readr::read_file + ), + "\n" + ) + ds_1 %>% - rlang::set_names(c("roster", "image")) %>% # Manually add friendly names to each file + # rlang::set_names(c("roster", "image")) %>% # Manually add friendly names to each file yaml::write_yaml("inst/dataset-basketball/expected-metadata.yml") -# retrieve-from-file ------------------------------------------------------ + +# ---- save-expected-dataframe ------------------------------------------------- +ds_1$roster %>% + { + get_dataframe_by_name( + filename = .$label, + dataset = dirname(.$dataFile$persistentId) + ) + } %>% + readr::write_rds("inst/dataset-basketball/dataframe-from-tab.rds") + +# ---- practice-retrieving-from-file ------------------------------------------------------ y <- yaml::read_yaml(system.file("dataset-basketball/expected-metadata.yml", package = "dataverse")) y$roster diff --git a/tests/testthat/tests-get_dataframe-dataframe-basketball.R b/tests/testthat/tests-get_dataframe-dataframe-basketball.R new file mode 100644 index 0000000..290559f --- /dev/null +++ b/tests/testthat/tests-get_dataframe-dataframe-basketball.R @@ -0,0 +1,39 @@ +# See https://demo.dataverse.org/dataverse/dataverse-client-r +# https://doi.org/10.70122/FK2/HXJVJU + +test_that("roster-by-name", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- readr::read_rds(system.file("dataset-basketball/dataframe-from-tab.rds", package = "dataverse")) + + actual <- + get_dataframe_by_name( + filename = expected_ds$roster$label , # A value like "roster-bulls-1996.tab", + dataset = dirname(expected_ds$roster$dataFile$persistentId)#, # A value like "doi:10.70122/FK2/HXJVJU", + ) + + expect_equal(actual, expected_file) +}) + +test_that("roster-by-doi", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- readr::read_rds(system.file("dataset-basketball/dataframe-from-tab.rds", package = "dataverse")) + + actual <- + get_dataframe_by_doi( + filedoi = expected_ds$roster$dataFile$persistentId, # A value like "doi:10.70122/FK2/HXJVJU/SA3Z2V", + ) + + expect_equal(actual, expected_file) +}) + +test_that("roster-by-id", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- readr::read_rds(system.file("dataset-basketball/dataframe-from-tab.rds", package = "dataverse")) + + actual <- + get_dataframe_by_id( + fileid = expected_ds$roster$dataFile$id, # A value like 1734005 + ) + + expect_equal(actual, expected_file) +}) diff --git a/tests/testthat/tests-get_dataframe-original-basketball.R b/tests/testthat/tests-get_dataframe-original-basketball.R new file mode 100644 index 0000000..c26c900 --- /dev/null +++ b/tests/testthat/tests-get_dataframe-original-basketball.R @@ -0,0 +1,106 @@ +# See https://demo.dataverse.org/dataverse/dataverse-client-r +# https://doi.org/10.70122/FK2/HXJVJU + +test_that("roster-by-name", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- expected_ds$roster$raw_value + + actual <- + get_dataframe_by_name( + filename = expected_ds$roster$label , # A value like "roster-bulls-1996.tab", + dataset = dirname(expected_ds$roster$dataFile$persistentId), # A value like "doi:10.70122/FK2/HXJVJU", + original = TRUE, + .f = readr::read_file + ) + + expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) + expect_equal(nchar( actual ), nchar( expected_file )) + + expect_equal(actual, expected_file) +}) + +test_that("roster-by-doi", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- expected_ds$roster$raw_value + + actual <- + get_dataframe_by_doi( + filedoi = expected_ds$roster$dataFile$persistentId, # A value like "doi:10.70122/FK2/HXJVJU/SA3Z2V", + original = TRUE, + .f = readr::read_file + ) + + expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) + expect_equal(nchar( actual ), nchar( expected_file )) + + expect_equal(actual, expected_file) +}) + +test_that("roster-by-id", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- expected_ds$roster$raw_value + + actual <- + get_dataframe_by_id( + fileid = expected_ds$roster$dataFile$id, # A value like 1734005 + original = TRUE, + .f = readr::read_file + ) + + expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) + expect_equal(nchar( actual ), nchar( expected_file )) + + expect_equal(actual, expected_file) +}) + +test_that("image-by-name", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- expected_ds$image$raw_value + + actual <- + get_dataframe_by_name( + filename = expected_ds$image$label , #"vector-basketball.svg", + dataset = dirname(expected_ds$image$dataFile$persistentId), #"doi:10.70122/FK2/HXJVJU", + original = TRUE, + .f = readr::read_file + ) + + expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) + expect_equal(nchar( actual ), nchar( expected_file )) + + expect_equal(actual, expected_file) +}) + +test_that("image-by-doi", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- expected_ds$image$raw_value + + actual <- + get_dataframe_by_doi( + filedoi = expected_ds$image$dataFile$persistentId, # A value like "doi:10.70122/FK2/HXJVJU/FHV8ZB", + original = TRUE, + .f = readr::read_file + ) + + expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) + expect_equal(nchar( actual ), nchar( expected_file )) + + expect_equal(actual, expected_file) +}) + +test_that("image-by-id", { + expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") + expected_file <- expected_ds$image$raw_value + + actual <- + get_dataframe_by_id( + fileid = expected_ds$image$dataFile$id, # A value like 1734006 + original = TRUE, + .f = readr::read_file + ) + + expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) + expect_equal(nchar( actual ), nchar( expected_file )) + + expect_equal(actual, expected_file) +}) diff --git a/vignettes/A-introduction.Rmd b/vignettes/A-introduction.Rmd index 3f1501f..9b380cb 100644 --- a/vignettes/A-introduction.Rmd +++ b/vignettes/A-introduction.Rmd @@ -15,7 +15,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -The **dataverse** package is the official R client for [Dataverse 4](http://dataverse.org/) data repositories. The package enables data search, retrieval, and deposit with any Dataverse installation, thus allowing R users to integrate public data sharing into the reproducible research workflow. +The **dataverse** package is the official R client for [Dataverse 4](https://dataverse.org/) data repositories. The package enables data search, retrieval, and deposit with any Dataverse installation, thus allowing R users to integrate public data sharing into the reproducible research workflow. In addition to this introduction, the package contains three additional vignettes covering: @@ -66,7 +66,7 @@ get_file_metadata() get_file() ``` -For "native" Dataverse features (such as user account controls) or to create and publish a dataset, you will need an API key linked to a Dataverse installation account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](http://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: +For "native" Dataverse features (such as user account controls) or to create and publish a dataset, you will need an API key linked to a Dataverse installation account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](https://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: ```R Sys.setenv("DATAVERSE_KEY" = "examplekey12345") @@ -100,7 +100,7 @@ Your data are now publicly accessible. ## Appendix: dvn to dataverse Crosswalk -The original Dataverse client for R was called [dvn](https://cran.r-project.org/web/packages/dvn/index.html); it worked with Dataverse versions <= 3 and was removed from CRAN in favor of [dataverse](https://CRAN.R-project.org/package=dataverse) in 2018. dvn provided functionality for searching, retrieving, and depositing data. Here is a cross-walk of functionality in case you were already familiar with the dvn package: +The original Dataverse client for R was called [dvn](https://CRAN.R-project.org/package=dvn); it worked with Dataverse versions <= 3 and was removed from CRAN in favor of [dataverse](https://CRAN.R-project.org/package=dataverse) in 2018. dvn provided functionality for searching, retrieving, and depositing data. Here is a cross-walk of functionality in case you were already familiar with the dvn package: | API Category | **dataverse** functions | **dvn** functions | | ------------ | ----------------------- | ----------------- | diff --git a/vignettes/A-introduction.Rmd2 b/vignettes/A-introduction.Rmd2 index d33d8d3..22413ef 100644 --- a/vignettes/A-introduction.Rmd2 +++ b/vignettes/A-introduction.Rmd2 @@ -15,7 +15,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -The **dataverse** package is the official R client for [Dataverse 4](http://dataverse.org/) data repositories. The package enables data search, retrieval, and deposit with any Dataverse installation, thus allowing R users to integrate public data sharing into the reproducible research workflow. +The **dataverse** package is the official R client for [Dataverse 4](https://dataverse.org/) data repositories. The package enables data search, retrieval, and deposit with any Dataverse installation, thus allowing R users to integrate public data sharing into the reproducible research workflow. In addition to this introduction, the package contains three additional vignettes covering: @@ -46,7 +46,7 @@ library("dataverse") Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") ``` -This should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. The package attempts to compensate for any malformed values, though. +This should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. The package attempts to compensate for any malformed values, though. Within a given Dataverse installation, organizations or individuals can create objects that are also called "Dataverses". These Dataverses can then contain other *dataverses*, which can contain other *dataverses*, and so on. They can also contain *datasets* which in turn contain files. You can think of Harvard's Dataverse as a top-level installation, where an institution might have a *dataverse* that contains a subsidiary *dataverse* for each researcher at the organization, who in turn publishes all files relevant to a given study as a *dataset*. @@ -65,7 +65,7 @@ get_file_metadata() get_file() ``` -For "native" Dataverse features (such as user account controls) or to create and publish a dataset, you will need an API key linked to a Dataverse installation account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](http://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: +For "native" Dataverse features (such as user account controls) or to create and publish a dataset, you will need an API key linked to a Dataverse installation account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](https://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: ```R Sys.setenv("DATAVERSE_KEY" = "examplekey12345") diff --git a/vignettes/C-retrieval.Rmd b/vignettes/C-retrieval.Rmd index 71d91ad..2b24ded 100644 --- a/vignettes/C-retrieval.Rmd +++ b/vignettes/C-retrieval.Rmd @@ -17,11 +17,11 @@ vignette: > -This vignette shows how to download data from Dataverse using the dataverse package. We'll focus on a Dataverse repository that contains supplemental files for [Jamie Monogan](http://spia.uga.edu/faculty-member/jamie-monogan/)'s book [*Political Analysis Using R*](http://www.springer.com/gb/book/9783319234458), which is stored at Harvard University's [IQSS Dataverse Network](https://dataverse.harvard.edu/): +This vignette shows how to download data from Dataverse using the dataverse package. We'll focus on a Dataverse repository that contains supplemental files for [Jamie Monogan](https://spia.uga.edu/faculty-member/jamie-monogan/)'s book [*Political Analysis Using R*](https://www.springer.com/gb/book/9783319234458), which is stored at Harvard University's [IQSS Dataverse Network](https://dataverse.harvard.edu/): > Monogan, Jamie, 2015, "Political Analysis Using R: Example Code and Data, Plus Data for Practice Problems", [doi:10.7910/DVN/ARKOTI](https://doi.org/10.7910/DVN/ARKOTI), Harvard Dataverse, V1, UNF:6:+itU9hcUJ8I9E0Kqv8HWHg== -This study is persistently retrievable by a "[Digital Object Identifier (DOI)](https://www.doi.org/)": https://doi.org/10.7910/DVN/ARKOTI and the citation above (taken from the Dataverse page) includes a "[Universal Numeric Fingerprint (UNF)](http://guides.dataverse.org/en/latest/developers/unf/index.html)": `UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==`, which provides a versioned, multi-file hash for the entire study, which contains 32 files. +This study is persistently retrievable by a "[Digital Object Identifier (DOI)](https://www.doi.org/)": https://doi.org/10.7910/DVN/ARKOTI and the citation above (taken from the Dataverse page) includes a "[Universal Numeric Fingerprint (UNF)](https://guides.dataverse.org/en/latest/developers/unf/index.html)": `UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==`, which provides a versioned, multi-file hash for the entire study, which contains 32 files. If you don't already know what datasets and files you want to use from Dataverse, see the ["Data Search" vignette](B-search.html) for guidance on data search and discovery. diff --git a/vignettes/C-retrieval.Rmd2 b/vignettes/C-retrieval.Rmd2 index 15c6b77..a6c5230 100644 --- a/vignettes/C-retrieval.Rmd2 +++ b/vignettes/C-retrieval.Rmd2 @@ -20,11 +20,11 @@ options(width = 120) knitr::opts_chunk$set(results = "hold") ``` -This vignette shows how to download data from Dataverse using the dataverse package. We'll focus on a Dataverse repository that contains supplemental files for [Jamie Monogan](http://spia.uga.edu/faculty-member/jamie-monogan/)'s book [*Political Analysis Using R*](http://www.springer.com/gb/book/9783319234458), which is stored at Harvard University's [IQSS Dataverse Network](https://dataverse.harvard.edu/): +This vignette shows how to download data from Dataverse using the dataverse package. We'll focus on a Dataverse repository that contains supplemental files for [Jamie Monogan](https://spia.uga.edu/faculty-member/jamie-monogan/)'s book [*Political Analysis Using R*](https://www.springer.com/gb/book/9783319234458), which is stored at Harvard University's [IQSS Dataverse Network](https://dataverse.harvard.edu/): > Monogan, Jamie, 2015, "Political Analysis Using R: Example Code and Data, Plus Data for Practice Problems", [doi:10.7910/DVN/ARKOTI](https://doi.org/10.7910/DVN/ARKOTI), Harvard Dataverse, V1, UNF:6:+itU9hcUJ8I9E0Kqv8HWHg== -This study is persistently retrievable by a "[Digital Object Identifier (DOI)](https://www.doi.org/)": https://doi.org/10.7910/DVN/ARKOTI and the citation above (taken from the Dataverse page) includes a "[Universal Numeric Fingerprint (UNF)](http://guides.dataverse.org/en/latest/developers/unf/index.html)": `UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==`, which provides a versioned, multi-file hash for the entire study, which contains 32 files. +This study is persistently retrievable by a "[Digital Object Identifier (DOI)](https://www.doi.org/)": https://doi.org/10.7910/DVN/ARKOTI and the citation above (taken from the Dataverse page) includes a "[Universal Numeric Fingerprint (UNF)](https://guides.dataverse.org/en/latest/developers/unf/index.html)": `UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==`, which provides a versioned, multi-file hash for the entire study, which contains 32 files. If you don't already know what datasets and files you want to use from Dataverse, see the ["Data Search" vignette](B-search.html) for guidance on data search and discovery. @@ -44,7 +44,7 @@ The output prints some basic metadata and then the `str()` of the `files` data f dataset$files[c("filename", "contentType")] ``` -This shows that there are indeed 32 files, a mix of .R code files and tab- and comma-separated data files. +This shows that there are indeed 32 files, a mix of .R code files and tab- and comma-separated data files. You can also retrieve more extensive metadata using `dataset_metadata()`: @@ -67,9 +67,9 @@ writeBin(code3, "chapter03.R") Now we'll get the corresponding data and save it locally. For this code we need two data files: ```{r} -writeBin(get_file("constructionData.tab", "doi:10.7910/DVN/ARKOTI"), +writeBin(get_file("constructionData.tab", "doi:10.7910/DVN/ARKOTI"), "constructionData.dta") -writeBin(get_file("PESenergy.csv", "doi:10.7910/DVN/ARKOTI"), +writeBin(get_file("PESenergy.csv", "doi:10.7910/DVN/ARKOTI"), "PESenergy.csv") ```