diff --git a/NAMESPACE b/NAMESPACE index 4440fc54c..d020e9c1d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -325,6 +325,7 @@ exportMethods(filters) exportMethods(func) exportMethods(funcs) exportMethods(geo) +exportMethods(head) exportMethods(hide) exportMethods(id) exportMethods(ids) @@ -370,6 +371,7 @@ exportMethods(subset) exportMethods(subtotalArray) exportMethods(subtotals) exportMethods(subvariables) +exportMethods(tail) exportMethods(timestamps) exportMethods(toVariable) exportMethods(transforms) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 257cf8d44..65ddaa489 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -143,6 +143,8 @@ setGeneric("is.weight<-", function (x, value) standardGeneric("is.weight<-")) setGeneric("owner", function (x) standardGeneric("owner")) setGeneric("owner<-", function (x, value) standardGeneric("owner<-")) +setGeneric("head", function(x, n=6L, ...) utils::head(x, n, ...)) +setGeneric("tail", function(x, n=6L, ...) utils::tail(x, n, ...)) setGeneric("dim") setGeneric("ncol") setGeneric("mean") diff --git a/R/as-data-frame.R b/R/as-data-frame.R index 4874156f9..600d68ebd 100644 --- a/R/as-data-frame.R +++ b/R/as-data-frame.R @@ -83,7 +83,7 @@ as.data.frame.CrunchDataFrame <- function (x, ds <- attr(x, "crunchDataset") tmp <- tempfile() on.exit(unlink(tmp)) - write.csv(ds, tmp, categorical = "id") + tmp <- write.csv(ds, tmp, categorical = "id") # TODO: use variableMetadata to provide all `colClasses`? # meta <- variableMetadata(ds) ds_out <- read.csv(tmp, stringsAsFactors = FALSE) @@ -192,3 +192,48 @@ as.data.frame.FilterCatalog <- function (x, ...) { catalogToDataFrame(x, keys = keys, row.names = row.names, ...) } + +#' Head and tail methods for Crunch objects. See [utils::head()] for more details. +#' +#' @param x a CrunchDataset, CrunchDataFrame, or CrunchVariable +#' @param n a single integer representing the length of the returning object. +#' @param ... ignored +#' @name head-tail +#' @aliases head tail +NULL + +#' @rdname head-tail +#' @export +setMethod("head", "CrunchDataset", function (x, n=6L, ...) { + as.data.frame(x[head(seq_len(nrow(x)), n),], force=TRUE) +}) + +#' @rdname head-tail +#' @export +setMethod("head", "CrunchDataFrame", function (x, n=6L, ...) { + return(head(attr(x, "crunchDataset"))) +}) + +#' @rdname head-tail +#' @export +setMethod("head", "CrunchVariable", function (x, n=6L, ...) { + as.vector(x[head(seq_len(length(x)), n)], ...) +}) + +#' @rdname head-tail +#' @export +setMethod("tail", "CrunchDataset", function (x, n=6L, ...) { + as.data.frame(x[tail(seq_len(nrow(x)), n),], force=TRUE) +}) + +#' @rdname head-tail +#' @export +setMethod("tail", "CrunchDataFrame", function (x, n=6L, ...) { + return(tail(attr(x, "crunchDataset"))) +}) + +#' @rdname head-tail +#' @export +setMethod("tail", "CrunchVariable", function (x, n=6L, ...) { + as.vector(x[tail(seq_len(length(x)), n)]) +}) diff --git a/R/dataset-extract.R b/R/dataset-extract.R index 15cb58ca3..cd985caa7 100644 --- a/R/dataset-extract.R +++ b/R/dataset-extract.R @@ -54,6 +54,7 @@ setMethod("[", c("CrunchDataset", "logical", "missing"), function (x, i, j, ..., } i <- CrunchLogicalExpr(dataset_url=datasetReference(x), expression=.dispatchFilter(i)) + activeFilter(i) <- activeFilter(x) return(x[i,]) } else { halt("Logical filter vector is length ", length(i), @@ -131,6 +132,58 @@ setMethod("[", c("CrunchDataset", "CrunchLogicalExpr", "ANY"), function (x, i, j return(x[j]) }) +#' @rdname dataset-extract +#' @export +setMethod("[", c("CrunchDataset", "numeric", "missing"), function (x, i, j, ..., drop=FALSE) { + if (nargs() == 2L) { + ## x[i]. So subset the variables, list-wise + x@variables <- variables(x)[i] + return(x) + } + filt <- activeFilter(x) + if (!is.null(filt)) { + return(harmonizeFilters(x, filt, i)) + } else { + return(x[seq_len(nrow(x)) %in% i, ]) + } +}) + + +#' Sometimes you want to subset a filtered object using a numeric vector. In order +#' to do this on a crunch object we need to first get the rows which match the filter +#' and then apply the numeric vector. For instance if you have a dataset filter such +#' that `ds_filt <- ds[ds$var == 5, ]` then `ds_filt[1:5]` should return the first +#' five rows where `ds$var == 5`. This function takes a filtered object and returns +#' the correctly subsetted filtered object. +#' @rdname dataset-extract +#' @keywords internal +#' @param x a filtered Dataset or vector +#' @param filt the object's filter +#' @param i A numeric vector to harmonize with that filter +#' @return a properly filtered dataset or vector +harmonizeFilters <- function (x, filt, i){ + filt_lgl <- as.vector(filt) + unfiltered <- x + activeFilter(unfiltered) <- NULL + if (is.dataset(x)){ + out <- unfiltered[seq_len(nrow(unfiltered)) %in% which(filt_lgl)[i], ] + } else if (is.variable(x)) { + out <- unfiltered[seq_len(length(unfiltered)) %in% which(filt_lgl)[i]] + } else { + halt("Unsupported object type") + } + activeFilter(out) <- filt & activeFilter(out) + return(out) +} + +#' @rdname dataset-extract +#' @export +setMethod("[", c("CrunchDataset", "numeric", "ANY"), function (x, i, j, ..., drop=FALSE) { + ## Do the filtering of rows, then cols + x <- x[i,] + return(x[j]) +}) + #' @rdname dataset-extract #' @export setMethod("subset", "CrunchDataset", function (x, ...) { diff --git a/R/variable.R b/R/variable.R index 5a0ff229d..ee82a29b5 100644 --- a/R/variable.R +++ b/R/variable.R @@ -69,6 +69,15 @@ setMethod("digits<-", "CrunchVariable", function (x, value) { halt("digit specifications can only be set for numeric variables") }) +#' @rdname describe +#' @export +setMethod("length", "CrunchVariable", function (x) { + ds <- loadDataset(datasetReference(x)) + activeFilter(ds) <- activeFilter(x) + return(nrow(ds)) +}) + + #' Get and set Categories on Variables #' #' @param x a Variable @@ -226,9 +235,12 @@ setMethod("[", c("CrunchVariable", "CrunchExpr"), .updateActiveFilter) #' @rdname variable-extract #' @export setMethod("[", c("CrunchVariable", "numeric"), function (x, i, ...) { - i <- CrunchLogicalExpr(dataset_url=datasetReference(x), - expression=.dispatchFilter(i)) - return(x[i]) + filt <- activeFilter(x) + if (!is.null(filt)) { + return(harmonizeFilters(x, filt, i)) + } else { + return(x[seq_len(length(x)) %in% i]) + } }) #' @rdname variable-extract #' @export diff --git a/inst/app.crunch.io/api/datasets/1/export/csv-3a22fd-POST.R b/inst/app.crunch.io/api/datasets/1/export/csv-3a22fd-POST.R new file mode 100644 index 000000000..4a3ecefe3 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/export/csv-3a22fd-POST.R @@ -0,0 +1,71 @@ +structure(list(url = "https://app.crunch.io/api/datasets/1/export/csv/", + status_code = 202L, headers = structure(list(allow = "GET, HEAD, OPTIONS, POST", + `content-encoding` = "gzip", `content-type` = "application/json;charset=utf-8", + date = "Fri, 12 Jan 2018 14:23:13 GMT", location = "dataset_exports/test-ds.csv?Signature=SKknqNLKd2vJu3EcxKhOJxOjp4c%3D&Expires=1515770593&AWSAccessKeyId=AKIAJT4CEBNJXNPF3NZA", + server = "nginx", `set-cookie` = "REDACTED", vary = "Cookie, Accept-Encoding", + `x-timing` = "", `content-length` = "141", connection = "keep-alive"), .Names = c("allow", + "content-encoding", "content-type", "date", "location", "server", + "set-cookie", "vary", "x-timing", "content-length", "connection" + ), class = c("insensitive", "list")), all_headers = list( + structure(list(status = 202L, version = "HTTP/1.1", headers = structure(list( + allow = "GET, HEAD, OPTIONS, POST", `content-encoding` = "gzip", + `content-type` = "application/json;charset=utf-8", + date = "Fri, 12 Jan 2018 14:23:13 GMT", location = "dataset_exports/test-ds.csv?Signature=SKknqNLKd2vJu3EcxKhOJxOjp4c%3D&Expires=1515770593&AWSAccessKeyId=AKIAJT4CEBNJXNPF3NZA", + server = "nginx", `set-cookie` = "REDACTED", vary = "Cookie, Accept-Encoding", + `x-timing` = "", `content-length` = "141", connection = "keep-alive"), .Names = c("allow", + "content-encoding", "content-type", "date", "location", + "server", "set-cookie", "vary", "x-timing", "content-length", + "connection"), class = c("insensitive", "list"))), .Names = c("status", + "version", "headers"))), cookies = structure(list(domain = ".crunch.io", + flag = TRUE, path = "/", secure = FALSE, expiration = structure(1547302993, class = c("POSIXct", + "POSIXt")), name = "token", value = "REDACTED"), .Names = c("domain", + "flag", "path", "secure", "expiration", "name", "value"), row.names = c(NA, + -1L), class = "data.frame"), content = as.raw(c(0x7b, 0x22, + 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x22, 0x3a, 0x20, + 0x22, 0x73, 0x68, 0x6f, 0x6a, 0x69, 0x3a, 0x76, 0x69, 0x65, + 0x77, 0x22, 0x2c, 0x20, 0x22, 0x73, 0x65, 0x6c, 0x66, 0x22, + 0x3a, 0x20, 0x22, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, + 0x2f, 0x61, 0x70, 0x70, 0x2e, 0x63, 0x72, 0x75, 0x6e, 0x63, + 0x68, 0x2e, 0x69, 0x6f, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x64, + 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x2f, 0x34, 0x31, + 0x36, 0x32, 0x32, 0x35, 0x61, 0x35, 0x63, 0x38, 0x38, 0x37, + 0x34, 0x33, 0x66, 0x65, 0x61, 0x30, 0x37, 0x65, 0x62, 0x36, + 0x38, 0x31, 0x31, 0x37, 0x63, 0x61, 0x34, 0x37, 0x61, 0x64, + 0x2f, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x2f, 0x63, 0x73, + 0x76, 0x2f, 0x22, 0x2c, 0x20, 0x22, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x22, 0x3a, 0x20, 0x22, 0x68, 0x74, 0x74, 0x70, 0x73, + 0x3a, 0x2f, 0x2f, 0x61, 0x70, 0x70, 0x2e, 0x63, 0x72, 0x75, + 0x6e, 0x63, 0x68, 0x2e, 0x69, 0x6f, 0x2f, 0x61, 0x70, 0x69, + 0x2f, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2f, + 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x2f, 0x22, 0x7d + )), date = structure(1515766993, class = c("POSIXct", "POSIXt" + ), tzone = "GMT"), times = structure(c(0, 0.129607, 0.23883, + 0.501342, 0.629068, 0.629116), .Names = c("redirect", "namelookup", + "connect", "pretransfer", "starttransfer", "total")), request = structure(list( + method = "POST", url = "https://app.crunch.io/api/datasets/1/export/csv/", + headers = structure(c("application/json, text/xml, application/xml, */*", + "", "libcurl/7.54.0 curl/3.0 httr/1.3.1 rcrunch/1.19.1" + ), .Names = c("Accept", "Content-Type", "user-agent")), + fields = NULL, options = structure(list(useragent = "libcurl/7.54.0 r-curl/3.0 httr/1.3.1", + post = TRUE, postfieldsize = 131L, postfields = as.raw(c(0x7b, + 0x22, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x22, 0x3a, + 0x7b, 0x22, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x22, 0x3a, 0x22, 0x62, 0x65, 0x74, 0x77, 0x65, + 0x65, 0x6e, 0x22, 0x2c, 0x22, 0x61, 0x72, 0x67, 0x73, + 0x22, 0x3a, 0x5b, 0x7b, 0x22, 0x66, 0x75, 0x6e, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x3a, 0x22, 0x72, 0x6f, + 0x77, 0x22, 0x2c, 0x22, 0x61, 0x72, 0x67, 0x73, 0x22, + 0x3a, 0x5b, 0x5d, 0x7d, 0x2c, 0x7b, 0x22, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x22, 0x3a, 0x30, 0x7d, 0x2c, 0x7b, + 0x22, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x3a, 0x36, + 0x7d, 0x5d, 0x7d, 0x2c, 0x22, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x22, 0x3a, 0x7b, 0x22, 0x75, 0x73, + 0x65, 0x5f, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, + 0x79, 0x5f, 0x69, 0x64, 0x73, 0x22, 0x3a, 0x74, 0x72, + 0x75, 0x65, 0x7d, 0x7d)), postredir = 3), .Names = c("useragent", + "post", "postfieldsize", "postfields", "postredir")), + output = structure(list(), class = c("write_memory", + "write_function"))), .Names = c("method", "url", "headers", + "fields", "options", "output"), class = "request")), .Names = c("url", +"status_code", "headers", "all_headers", "cookies", "content", +"date", "times", "request"), class = "response") diff --git a/inst/app.crunch.io/api/datasets/1/summary-10d0ac.json b/inst/app.crunch.io/api/datasets/1/summary-10d0ac.json new file mode 100644 index 000000000..d3eb446f0 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/summary-10d0ac.json @@ -0,0 +1,17 @@ +{ + "element": + "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/summary/?filter=%7B%22function%22%3A%22between%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22value%22%3A19%7D%2C%7B%22value%22%3A25%7D%5D%7D", + "value": { + "unweighted": { + "filtered": 6, + "total": 25 + }, + "variables": 6, + "weighted": { + "filtered": 6, + "total": 25 + }, + "columns": 6 + } +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/summary-57a14f.json b/inst/app.crunch.io/api/datasets/1/summary-57a14f.json new file mode 100644 index 000000000..f0e9eeddd --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/summary-57a14f.json @@ -0,0 +1,16 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/summary/?filter=%7B%22function%22%3A%22between%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22value%22%3A0%7D%2C%7B%22value%22%3A6%7D%5D%7D", + "value": { + "unweighted": { + "filtered": 6, + "total": 25 + }, + "variables": 6, + "weighted": { + "filtered": 6, + "total": 25 + }, + "columns": 6 + } +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/gender/values-686a6e.json b/inst/app.crunch.io/api/datasets/1/variables/gender/values-686a6e.json new file mode 100644 index 000000000..fdece5004 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/gender/values-686a6e.json @@ -0,0 +1,5 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/gender/values/?filter=%7B%22function%22%3A%22between%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22value%22%3A0%7D%2C%7B%22value%22%3A6%7D%5D%7D&offset=0&limit=5000", + "value": ["w", "n", "x", "b", "q", "s"] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/gender/values-80c80e.json b/inst/app.crunch.io/api/datasets/1/variables/gender/values-80c80e.json new file mode 100644 index 000000000..7c943cfcc --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/gender/values-80c80e.json @@ -0,0 +1,5 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/gender/values/?filter=%7B%22function%22%3A%22between%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22value%22%3A0%7D%2C%7B%22value%22%3A4%7D%5D%7D&offset=0&limit=200000", + "value": [2, 2, {"?": -1}, 2] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/starttime/values-0b0abd.json b/inst/app.crunch.io/api/datasets/1/variables/starttime/values-0b0abd.json new file mode 100644 index 000000000..7cbba47ac --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/starttime/values-0b0abd.json @@ -0,0 +1,5 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/starttime/values/?filter=%7B%22function%22%3A%22in%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22column%22%3A%5B0%2C17%5D%7D%5D%7D&offset=0&limit=100000", + "value": ["1956-02-13", "1956-01-28"] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/starttime/values-0f7a5b.json b/inst/app.crunch.io/api/datasets/1/variables/starttime/values-0f7a5b.json new file mode 100644 index 000000000..d01569c88 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/starttime/values-0f7a5b.json @@ -0,0 +1,5 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/starttime/values/?filter=%7B%22function%22%3A%22in%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22column%22%3A%5B0%2C1%2C16%2C17%5D%7D%5D%7D&offset=0&limit=100000", + "value": ["1956-02-13", "1955-12-28", "1955-12-30", "1956-01-28"] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/starttime/values-ff7207.json b/inst/app.crunch.io/api/datasets/1/variables/starttime/values-ff7207.json new file mode 100644 index 000000000..ac65de4d2 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/starttime/values-ff7207.json @@ -0,0 +1,5 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/starttime/values/?filter=%7B%22function%22%3A%22between%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22value%22%3A0%7D%2C%7B%22value%22%3A25%7D%5D%7D&offset=0&limit=100000", + "value": ["1956-02-13", "1955-12-28", "1955-11-17", "1956-02-08", "1956-01-17", "1956-01-21", "1956-02-07", "1955-12-25", "1956-01-17", "1955-12-12", "1955-11-21", "1955-12-06", "1956-01-19", "1955-12-15", "1956-02-07", "1956-02-08", "1955-12-30", "1956-01-28", "1956-01-01", "1956-01-15", "1955-11-13", "1955-11-17", "1955-11-09", "1955-12-22", "1955-12-20"] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/textVar/values-0ba86f.json b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-0ba86f.json new file mode 100644 index 000000000..8b3072ef2 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-0ba86f.json @@ -0,0 +1,5 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/textVar/values/?filter=%7B%22function%22%3A%22between%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22value%22%3A0%7D%2C%7B%22value%22%3A25%7D%5D%7D&offset=0&limit=5000", + "value": ["w", "n", "x", "b", "q", "s", "l", "v", "v", "y", "m", "t", "s", "e", "z", "k", "n", "w", "v", "i", "h", "z", "m", "c", "x"] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/textVar/values-686a6e.json b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-686a6e.json new file mode 100644 index 000000000..e390a6355 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-686a6e.json @@ -0,0 +1,5 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/textVar/values/?filter=%7B%22function%22%3A%22between%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22value%22%3A0%7D%2C%7B%22value%22%3A6%7D%5D%7D&offset=0&limit=5000", + "value": ["w", "n", "x", "b", "q", "s"] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/textVar/values-9d21bb.json b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-9d21bb.json new file mode 100644 index 000000000..a0d4eb4b3 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-9d21bb.json @@ -0,0 +1,5 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/textVar/values/?filter=%7B%22function%22%3A%22between%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22value%22%3A19%7D%2C%7B%22value%22%3A25%7D%5D%7D&offset=0&limit=5000", + "value": ["i", "h", "z", "m", "c", "x"] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/textVar/values-a28816.json b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-a28816.json new file mode 100644 index 000000000..1afcabd3b --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-a28816.json @@ -0,0 +1,4 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1/variables/textVar/values/?filter=%7B%22function%22%3A%22in%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22column%22%3A%5B0%2C17%5D%7D%5D%7D&offset=0&limit=5000", "value": ["w", "w"] +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1/variables/textVar/values-e05e28.json b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-e05e28.json new file mode 100644 index 000000000..e3d540071 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1/variables/textVar/values-e05e28.json @@ -0,0 +1,3 @@ +{"element": "shoji:view", +"self": "https://app.crunch.io/api/datasets/1/variables/textVar/values/?filter=%7B%22function%22%3A%22in%22%2C%22args%22%3A%5B%7B%22function%22%3A%22row%22%2C%22args%22%3A%5B%5D%7D%2C%7B%22column%22%3A%5B0%2C1%2C16%2C17%5D%7D%5D%7D&offset=0&limit=5000", +"value": ["w", "n", "n", "w"]} \ No newline at end of file diff --git a/inst/app.crunch.io/api/datasets/1streaming/summary-73a614.json b/inst/app.crunch.io/api/datasets/1streaming/summary-73a614.json new file mode 100644 index 000000000..1a2d766e0 --- /dev/null +++ b/inst/app.crunch.io/api/datasets/1streaming/summary-73a614.json @@ -0,0 +1,13 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1streaming/summary/?filter=%7B%7D", + "value": { + "unweighted": { + "filtered": 25, + "total": 25}, + "variables": 6, + "weighted": { + "filtered": 25, + "total": 25}, + "columns": 6} +} \ No newline at end of file diff --git a/inst/app.crunch.io/api/progress/success.json b/inst/app.crunch.io/api/progress/success.json new file mode 100644 index 000000000..10066de13 --- /dev/null +++ b/inst/app.crunch.io/api/progress/success.json @@ -0,0 +1 @@ +{"element": "shoji:view", "self": "https://app.crunch.io/api/progress/success/", "views": {"result": "https://app.crunch.io/api/progress/success/result/"}, "value": {"progress": 100, "message": "complete"}} \ No newline at end of file diff --git a/inst/dataset_exports/test-ds.csv-eadc36.R b/inst/dataset_exports/test-ds.csv-eadc36.R new file mode 100644 index 000000000..fb686f3b7 --- /dev/null +++ b/inst/dataset_exports/test-ds.csv-eadc36.R @@ -0,0 +1,39 @@ +structure(list(url = "dataset_exports/test-ds.csv?Signature=SKknqNLKd2vJu3EcxKhOJxOjp4c%3D&Expires=1515770593&AWSAccessKeyId=AKIAJT4CEBNJXNPF3NZA", + status_code = 200L, headers = structure(list(`x-amz-id-2` = "hbf4NE2bXWgrKx543IbIkQWBNYuW2cYQJUA4IgyRMdh5kx+ycRWH4xpUePxeFf3NCqmTHYcLtjk=", + `x-amz-request-id` = "2FB86A9B8DC42D74", date = "Fri, 12 Jan 2018 14:23:14 GMT", + `last-modified` = "Fri, 12 Jan 2018 14:14:28 GMT", etag = "\"bcb13d034df67ff81550e9cf42e858ab\"", + `accept-ranges` = "bytes", `content-type` = "application/octet-stream", + `content-length` = "266", server = "AmazonS3"), .Names = c("x-amz-id-2", + "x-amz-request-id", "date", "last-modified", "etag", "accept-ranges", + "content-type", "content-length", "server"), class = c("insensitive", + "list")), all_headers = list(structure(list(status = 200L, + version = "HTTP/1.1", headers = structure(list(`x-amz-id-2` = "hbf4NE2bXWgrKx543IbIkQWBNYuW2cYQJUA4IgyRMdh5kx+ycRWH4xpUePxeFf3NCqmTHYcLtjk=", + `x-amz-request-id` = "2FB86A9B8DC42D74", date = "Fri, 12 Jan 2018 14:23:14 GMT", + `last-modified` = "Fri, 12 Jan 2018 14:14:28 GMT", + etag = "\"bcb13d034df67ff81550e9cf42e858ab\"", `accept-ranges` = "bytes", + `content-type` = "application/octet-stream", `content-length` = "266", + server = "AmazonS3"), .Names = c("x-amz-id-2", "x-amz-request-id", + "date", "last-modified", "etag", "accept-ranges", "content-type", + "content-length", "server"), class = c("insensitive", + "list"))), .Names = c("status", "version", "headers"))), + cookies = structure(list(domain = logical(0), flag = logical(0), + path = logical(0), secure = logical(0), expiration = structure(numeric(0), class = c("POSIXct", + "POSIXt")), name = logical(0), value = logical(0)), .Names = c("domain", + "flag", "path", "secure", "expiration", "name", "value"), row.names = integer(0), class = "data.frame"), + content = structure(httptest:::findMockFile("dataset_exports/test-ds.csv-eadc36.R-FILE"), class = "path"), + date = structure(1515766994, class = c("POSIXct", "POSIXt" + ), tzone = "GMT"), times = structure(c(0, 0.009498, 0.040893, + 0.121405, 0.173366, 0.173382), .Names = c("redirect", "namelookup", + "connect", "pretransfer", "starttransfer", "total")), request = structure(list( + method = "GET", url = "dataset_exports/test-ds.csv?Signature=SKknqNLKd2vJu3EcxKhOJxOjp4c%3D&Expires=1515770593&AWSAccessKeyId=AKIAJT4CEBNJXNPF3NZA", + headers = structure(c("application/json, text/xml, application/xml, */*", + "libcurl/7.54.0 curl/3.0 httr/1.3.1 rcrunch/1.19.1"), .Names = c("Accept", + "user-agent")), fields = NULL, options = structure(list( + useragent = "libcurl/7.54.0 r-curl/3.0 httr/1.3.1", + postredir = 3, httpget = TRUE), .Names = c("useragent", + "postredir", "httpget")), output = structure(list(path = "/var/folders/p2/jv1mknrj5y1gty91sn7hdsfr0000gn/T//Rtmp3p6SqV/file25eb3c38a274", + file = NULL), .Names = c("path", "file"), class = c("write_disk", + "write_function"))), .Names = c("method", "url", "headers", + "fields", "options", "output"), class = "request")), .Names = c("url", +"status_code", "headers", "all_headers", "cookies", "content", +"date", "times", "request"), class = "response") diff --git a/inst/dataset_exports/test-ds.csv-eadc36.R-FILE b/inst/dataset_exports/test-ds.csv-eadc36.R-FILE new file mode 100644 index 000000000..06a90cdf1 --- /dev/null +++ b/inst/dataset_exports/test-ds.csv-eadc36.R-FILE @@ -0,0 +1,7 @@ +birthyr,gender,location,textVar,starttime,subvar2,subvar1,subvar3 +0.57753,2,2,w,1956-02-13,2,1,1 +-1.0201,2,2,n,1955-12-28,2,1,-1 +-1.4967,-1,-1,x,1955-11-17,1,2,1 +-1.1101,2,2,b,1956-02-08,-1,-1,2 +0.70392,2,2,q,1956-01-17,1,-1,-1 +1.6662,2,2,s,1956-01-21,2,-1,1 diff --git a/man/dataset-extract.Rd b/man/dataset-extract.Rd index aba013e58..2b410a40b 100644 --- a/man/dataset-extract.Rd +++ b/man/dataset-extract.Rd @@ -11,6 +11,9 @@ \alias{[,CrunchDataset,missing,ANY-method} \alias{[,CrunchDataset,CrunchLogicalExpr,missing-method} \alias{[,CrunchDataset,CrunchLogicalExpr,ANY-method} +\alias{[,CrunchDataset,numeric,missing-method} +\alias{harmonizeFilters} +\alias{[,CrunchDataset,numeric,ANY-method} \alias{subset,CrunchDataset-method} \alias{[[,CrunchDataset,ANY-method} \alias{[[,CrunchDataset,character-method} @@ -34,6 +37,12 @@ \S4method{[}{CrunchDataset,CrunchLogicalExpr,ANY}(x, i, j, ..., drop = FALSE) +\S4method{[}{CrunchDataset,numeric,missing}(x, i, j, ..., drop = FALSE) + +harmonizeFilters(x, filt, i) + +\S4method{[}{CrunchDataset,numeric,ANY}(x, i, j, ..., drop = FALSE) + \S4method{subset}{CrunchDataset}(x, ...) \S4method{[[}{CrunchDataset,ANY}(x, i, ..., drop = FALSE) @@ -63,11 +72,27 @@ names); if numeric or logical, extracts variables accordingly. \item{drop}{logical: automatically simplify a 1-column Dataset to a Variable? Default is FALSE, and the TRUE option is in fact not implemented.} +\item{filt}{the object's filter} + \item{name}{columnar extraction for \code{$}} + +\item{x}{a filtered Dataset or vector} + +\item{i}{A numeric vector to harmonize with that filter} } \value{ \code{[} yields a Dataset; \code{[[} and \code{$} return a Variable + +a properly filtered dataset or vector } \description{ Subset datasets and extract variables + +Sometimes you want to subset a filtered object using a numeric vector. In order +to do this on a crunch object we need to first get the rows which match the filter +and then apply the numeric vector. For instance if you have a dataset filter such +that \code{ds_filt <- ds[ds$var == 5, ]} then \code{ds_filt[1:5]} should return the first +five rows where \code{ds$var == 5}. This function takes a filtered object and returns +the correctly subsetted filtered object. } +\keyword{internal} diff --git a/man/describe.Rd b/man/describe.Rd index 5ebcda0c5..abbac0402 100644 --- a/man/describe.Rd +++ b/man/describe.Rd @@ -42,6 +42,7 @@ \alias{digits,CrunchVariable-method} \alias{digits<-,NumericVariable-method} \alias{digits<-,CrunchVariable-method} +\alias{length,CrunchVariable-method} \title{Name, alias, and description for Crunch objects} \usage{ \S4method{name}{CrunchDataset}(x) @@ -93,6 +94,8 @@ \S4method{digits}{NumericVariable}(x) <- value \S4method{digits}{CrunchVariable}(x) <- value + +\S4method{length}{CrunchVariable}(x) } \arguments{ \item{x}{a Dataset or Variable.} diff --git a/man/head-tail.Rd b/man/head-tail.Rd new file mode 100644 index 000000000..f80bfbd03 --- /dev/null +++ b/man/head-tail.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as-data-frame.R +\docType{methods} +\name{head-tail} +\alias{head-tail} +\alias{head} +\alias{tail} +\alias{head,CrunchDataset-method} +\alias{head,CrunchDataFrame-method} +\alias{head,CrunchVariable-method} +\alias{tail,CrunchDataset-method} +\alias{tail,CrunchDataFrame-method} +\alias{tail,CrunchVariable-method} +\title{Head and tail methods for Crunch objects. See \code{\link[utils:head]{utils::head()}} for more details.} +\usage{ +\S4method{head}{CrunchDataset}(x, n = 6L, ...) + +\S4method{head}{CrunchDataFrame}(x, n = 6L, ...) + +\S4method{head}{CrunchVariable}(x, n = 6L, ...) + +\S4method{tail}{CrunchDataset}(x, n = 6L, ...) + +\S4method{tail}{CrunchDataFrame}(x, n = 6L, ...) + +\S4method{tail}{CrunchVariable}(x, n = 6L, ...) +} +\arguments{ +\item{x}{a CrunchDataset, CrunchDataFrame, or CrunchVariable} + +\item{n}{a single integer representing the length of the returning object.} + +\item{...}{ignored} +} +\description{ +Head and tail methods for Crunch objects. See \code{\link[utils:head]{utils::head()}} for more details. +} diff --git a/tests/testthat/test-active-filter.R b/tests/testthat/test-active-filter.R index e4f26d594..5003451c6 100644 --- a/tests/testthat/test-active-filter.R +++ b/tests/testthat/test-active-filter.R @@ -147,6 +147,30 @@ with_mock_crunch({ expect_identical(activeFilter(!is.na(ds2$birthyr)), ds$gender == "Male") }) + + test_that("harmonizeFilters creates the expected filter", { + expected_expr <- list( + `function` = "and", + args = list(list( + `function` = "==", + args = list(list(variable = "https://app.crunch.io/api/datasets/1/variables/gender/"), + list(value = 1L) + ) + ), list( + `function` = "in", + args = list( + list(`function` = "row", args = list()), + list(column = c(6, 10, 12, 20, 21) + ) + ) + ) + ) + ) + ds_harmonized <- harmonizeFilters(ds2, activeFilter(ds2), 1:5) + expect_is(ds_harmonized, "CrunchDataset") + expect_identical(activeFilter(ds_harmonized)@expression, + expected_expr) + }) }) with_test_authentication({ diff --git a/tests/testthat/test-exclusion-filter.R b/tests/testthat/test-exclusion-filter.R index eee05835d..d4a9c6ded 100644 --- a/tests/testthat/test-exclusion-filter.R +++ b/tests/testthat/test-exclusion-filter.R @@ -297,4 +297,37 @@ with_test_authentication({ expect_null(exclusion(ds)) }) }) + + + with(test.dataset(df), { + ds$keep <- rep(1:4, 5) + exclusion(ds) <- ds$keep == 2 + test_that("Exclusion is set", { + expect_identical(nrow(ds), 15L) + }) + + test_that("No problem with harmonized filters", { + expect_equivalent(as.vector(ds$v3), + c(8, 10:12, 14:16, 18:20, 22:24, 26, 27)) + # can subset with indexes + expect_equivalent(as.vector(ds$v3[c(1, 2, 3, 4)]), + c(8, 10:12)) # fails because index 2 is actually excluded so is not being returned. + expect_equivalent(as.vector(ds$v3[c(15, 14, 13, 12)]), + # The expectation is backwards (c(12, 13, 14, 15)), + # because the backend always returns rows in + # dataset order. + c(23, 24, 26, 27)) + # a new filter + ds <- ds[ds$v3 > 10] + expect_identical(nrow(ds), 13L) + # can still subset with indexes + expect_equivalent(as.vector(ds$v3[c(1, 2, 3, 4)]), + c(11, 12, 14, 15)) # fails because index 2 and others are actually excluded so are not being returned. + expect_equivalent(as.vector(ds$v3[c(13, 12, 10, 9)]), + # The expectation is backwards (c(12, 13, 14, 15)), + # because the backend always returns rows in + # dataset order. + c(23, 24, 26, 27)) + }) + }) }) diff --git a/tests/testthat/test-head.R b/tests/testthat/test-head.R new file mode 100644 index 000000000..7a9ee0251 --- /dev/null +++ b/tests/testthat/test-head.R @@ -0,0 +1,67 @@ +context("head and tail") + +with_mock_crunch({ + with_silent_progress({ + ds <- loadDataset("test ds") + with_mock(`crunch:::crDownload`=function (url, file) { + resp <- GET(url) + return(resp$content) + }, { + test_that("head method works on CrunchDatasets", { + head_df <- head(ds[,c("birthyr", "gender", "location", "mymrset", "textVar", "starttime")]) + expect_is(head_df, "data.frame") + expect_equal(nrow(head_df), 6) + expect_identical(head_df$textVar, c("w", "n", "x", "b", "q", "s")) + }) + test_that("tail generates the correct request", { + expect_POST(tail(as.data.frame(ds)), + 'https://app.crunch.io/api/datasets/1/export/csv/', + '{"filter":{"function":"between","args":[{"function":"row","args":[]},{"value":19},{"value":25}]},"options":{"use_category_ids":true}') + }) + test_that("head and tail work on CrunchVariables", { + expect_identical(head(ds$textVar), c("w", "n", "x", "b", "q", "s")) + expect_identical(tail(ds$textVar), c("i", "h", "z", "m", "c", "x")) + }) + }) + }) +}) + +with_test_authentication({ + ds <- newDatasetFromFixture("apidocs") + ds_df <- as.data.frame(ds, force = TRUE) + dfhead <- head(ds) + + test_that("head on dataset", { + expect_identical(dim(dfhead), c(6L, ncol(ds_df))) + expect_identical(dfhead$q1, ds_df$q1[1:6]) + + expect_identical(dim(tail(ds)), c(6L, ncol(ds_df))) + expect_identical(tail(ds)$ndogs, tail(ds_df$ndogs)) + + }) + test_that("head and tail on CrunchDataFrame", { + expect_identical(dim(head(as.data.frame(ds))), c(6L, ncol(ds_df))) + expect_identical(head(as.data.frame(ds)$q1), head(ds_df$q1)) + + expect_identical(dim(tail(as.data.frame(ds))), c(6L, ncol(ds_df))) + expect_identical(tail(as.data.frame(ds)$q1), tail(ds_df$q1)) + }) + + test_that("head on Variable", { + expect_identical(head(ds$q1), head(ds_df$q1)) + expect_identical(tail(ds$q1), tail(ds_df$q1)) + }) + test_that("head and tail on subsets of Dataset", { + ds_filt <- ds[ds$ndogs > 1, ] + df_filt <- as.data.frame(ds_filt, force = TRUE) + expect_identical(dim(head(ds_filt)), c(6L, ncol(ds_df))) + expect_identical(head(ds_filt)$q1, df_filt$q1[1:6]) + + expect_identical(dim(tail(ds_filt)), c(6L, ncol(ds_df))) + expect_identical(tail(ds_filt)$ndogs, tail(df_filt$ndogs)) + + #Filtered variables + expect_identical(head(ds_filt$ndogs), head(df_filt$ndogs)) + expect_identical(tail(ds_filt$ndogs), tail(df_filt$ndogs)) + }) +}) diff --git a/tests/testthat/test-variable-summary.R b/tests/testthat/test-variable-summary.R index 3774c9cd2..67a53e6c0 100644 --- a/tests/testthat/test-variable-summary.R +++ b/tests/testthat/test-variable-summary.R @@ -46,6 +46,9 @@ with_mock_crunch({ test_that("min", { expect_equal(min(ds$birthyr), -1.4967) }) + test_that("length", { + expect_identical(length(ds$birthyr), nrow(ds)) + }) }) with_test_authentication({