Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Combine mrs #8

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ Description: The Crunch.io service (http://crunch.io/) provides a cloud-based
datasets from within R. Importantly, this allows technical researchers to
collaborate naturally with team members, managers, and clients who prefer a
point-and-click interface.
Version: 1.7.12
Date: 2016-01-28
Version: 1.7.13
Date: 2016-01-29
Authors@R: c(person("Neal", "Richardson", role = c("aut", "cre"),
email = "[email protected]"))
Author: Neal Richardson [aut, cre]
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ export(batches)
export(bin)
export(catalogToDataFrame)
export(checkForNewVersion)
export(combineArrays)
export(combineMRs)
export(consent)
export(copy)
export(copyVariable)
Expand Down Expand Up @@ -85,6 +87,7 @@ export(login)
export(logout)
export(makeArray)
export(makeMR)
export(makeMRfromCat)
export(newDataset)
export(newDatasetByCSV)
export(newDatasetByColumn)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
### crunch 1.7.13

### crunch 1.7.12
* Add method for R logical &/| Crunch expression
* Upgrade for compatibility with httr 1.1
Expand Down
18 changes: 17 additions & 1 deletion R/add-subvariable.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,21 @@
##' @param subvariable the subvariable to add
##' @return a new version of \code{variable} with the indicated subvariables
##' @export
addSubvariable <- function (variable, subvariable){
addSubvariable <- function(variable, subvariable, dataset=NULL){
## Store some metadata up front
payload <- copyVariableReferences(variable)
subvars <- subvariables(variable)
subvar.urls <- urls(subvars)
subvar.names <- names(subvars)

## Identify subvariable URLs
if (inherits(subvariable, 'VariableDefinition')) {
dataset <- addVariables(dataset, subvariable)
subvariable <- dataset[[subvariable$alias]]
}
new_subvar.url <- self(subvariable)
new_subvar.name <- name(subvariable)

# TODO: could support taking a VariableDefinition for subvariable
# if (inherits(subvariable, 'VariableDefinition')) {
# ds <- addVariables(ds, subvariable)
Expand All @@ -31,11 +39,19 @@ addSubvariable <- function (variable, subvariable){

## Prune subvariable name prefix, or otherwise reset the names
subvars <- Subvariables(crGET(absoluteURL("subvariables/", new_url)))
print(names(subvars))
print(urls(subvars))
print(c(subvar.urls, new_subvar.url))
subvars <- subvars[match(urls(subvars),c(subvar.urls, new_subvar.url))]
names(subvars) <- c(subvar.names[na.omit(match(urls(subvars), subvar.urls))], new_subvar.name)
print(names(subvars))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can delete these 6 lines. The three lines below do this more robustly.


subvar.urls <- c(subvar.urls, self(subvariable))
subvar.names <- c(subvar.names, name(subvariable))
names(subvars) <- subvar.names[match(urls(subvars), subvar.urls)]

## What to return? This function is kind of a hack.
## maybe it should return a variable definition. so ds$var <- addSubvariable(ds$var, subvariable)
invisible(new_url)
}

Expand Down
51 changes: 51 additions & 0 deletions R/combine-arrays.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
##' Combine array variables into a new array
##'
##' This function lets you combine array variables into a new array
##' Arguments include at least two array variables as well as any metadata
##' for the new variable that is being created. This function uses the data from
##' the arrays in the order that they are given. If the first variable is NA for a given
##' row of a subvariable, it will use the data from the second variable and so on.
##' @param variable 1 to combine
##' @param variable 2 to combine
##' @param other desired attributes such as new variable alias, name,
##' description as well as any other variables you would like to combine
##' @return a new variable with combine values from the arrays
##' @export
combineArrays <- function(var1, var2, ...){
vars <- c(var1, var2, list(...)[sapply(1:length(list(...)), function(i) is.variable(list(...)[[i]]))])
stopifnot(is.variable(var1) & is.variable(var2))
stopifnot(any(sapply(vars, is.MultipleResponse)))

newbody <- list()
newbody$alias <- paste0(sapply(vars, alias), collapse = '_')
newbody$name <- paste0(paste0(sapply(vars, name), collapse = ', '), ", combine")
newbody$type <- 'categorical_array'

newbody <- updateList(newbody, list(...)[!sapply(1:length(list(...)), function(i) is.variable(list(...)[[i]]))])

subs <- unique(unlist(lapply(vars, function(var) names(subvariables(var)))))

nrows <- length(as.vector(var1[[1]]))
values <- sapply(subs, function(subvar) {
tmp <- rep(NA, nrows)
for (var in vars){
if (is.na(alias))
if (subvar %in% names(subvariables(var))) {
tmp[is.na(tmp)] <- as.character(as.vector(var[[subvar]][is.na(tmp)]))
}
}
return(tmp)
})
colnames(values) <- subs

newbody$subvariables <- lapply(subs, function(sub){
VarDef(data=factor(values[[sub]]),
alias=paste0(newbody$alias, "_", gsub(" ", "_", gsub("[[:punct:]]", "", tolower(sub)))),name=sub)
})

class(newbody) <- 'VariableDefinition'
return(newbody)
}



60 changes: 60 additions & 0 deletions R/combine-mrs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
##' Combine multiple_response variables into a new multiple_response
##'
##' This function lets you combine multiple_response variables into a new multiple_response
##' Arguments include at least two multiple_response variables as well as any metadata
##' for the new variable that is being created. The argument useNAsubs
##' is given the defult value FALSE. If it is made TRUE, any subvariables that
##' are NA in the categorical variables will become subvariables.
##' This function uses the data from the arrays in the order that they are
##' given. If the first variable is NA for a given row of a subvariable,
##' it will use the data from the second variable and so on.
##' @param variable 1 to combine
##' @param variable 2 to combine
##' @param other desired attributes such as new variable alias, name,
##' description as well as any other variables you would like to combine
##' @param a boolean describing whether subvariables that are NA in the
##' original variables should be made into subvariables in the new
##' variable
##' @param a boolean describing whether 'selected' should be given priority
##' if false, 'not selected' will be given priority
##' @return a new variable with combined responses from the given variables
##' @export
combineMRs <- function(var1, var2, ..., selectedFirst=TRUE){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I read the tests for this correctly, the purpose of this function is to take the subvariables from two or more MR variables and make a single MR from them--is that right? If so, how about we keep the first MR, unbind the rest, and use addSubvariables to add them to the first? Like:

combineMRs <- function (variable, ...) {
    newsubvars <- unlist(lapply(list(...), unbind)) ## unbind returns the URLs of the unbound variables
    return(addSubvariables(variable, newsubvars))
}

addSubvariables would need to take more than just one subvar, and it would need to accept URLs, but neither are too challenging (and they're more generally useful in that function itself too). And also this way, you wouldn't need separate functions to add to categorical arrays and multiple responses--same function would work.

## NOTE THAT THIS IS ONLY GOING TO WORK FOR VARIABLES WHERE THE CATEGORIES ARE SELECTED AND NOT SELECTED. IF THE CATEGORIES ARE MADE BY SPSS, THIS WON'T WORK.
## I've thought of a fix but it'll take more time (and is not currently useful to me).
if (length(list(...)) > 0) vars <- c(var1, var2, list(...)[sapply(1:length(list(...)), function(i) is.variable(list(...)[[i]]))])
else vars <- c(var1, var2)
stopifnot(is.variable(var1) & is.variable(var2))
stopifnot(any(sapply(vars, is.MultipleResponse)))

newbody <- list()
newbody$alias <- paste0(sapply(vars, alias), collapse = '_')
newbody$name <- paste0(paste0(sapply(vars, name), collapse = ', '), ", combine")
newbody$type <- 'categorical_array'

if (length(list(...)) > 0) newbody <- updateList(newbody, list(...)[!sapply(1:length(list(...)), function(i) is.variable(list(...)[[i]]))])

subs <- unique(unlist(lapply(vars, function(var) names(subvariables(var)))))

values <- sapply(subs, function(subvar) {
tmp <- list()
for (var in vars){
if (subvar %in% names(subvariables(var))) tmp[[alias(var)]] <- as.character(as.vector(var[[subvar]]))
}
tmp <- matrix(unlist(tmp), ncol=length(tmp))
if (selectedFirst) tmp2 <- ifelse(rowSums(tmp == 'selected', na.rm = TRUE) > 0, 'selected', ifelse(rowSums(tmp == 'not selected', na.rm = TRUE) > 0, 'not selected', NA))
if (!selectedFirst) tmp2 <- ifelse(rowSums(tmp == 'not selected', na.rm = TRUE) > 0, 'not selected', ifelse(rowSums(tmp == 'selected', na.rm = TRUE) > 0, 'selected', NA))
return(tmp2)
}, simplify=FALSE)

newbody$subvariables <- lapply(subs, function(sub){
VarDef(data=factor(values[[sub]]),
alias=paste0(newbody$alias, "_", gsub(" ", "_", gsub("[[:punct:]]", "", tolower(sub)))),name=sub)
})
print(3)
class(newbody) <- 'VariableDefinition'
return(newbody)


}

42 changes: 42 additions & 0 deletions R/make-mr-from-cat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
##' Combine categorical variables into an MR
##'
##' This function lets you combine categorical variables into an MR variable
##' Arguments include at least two categorical variables as well as any metadata
##' for the new variable that is being created. The final argument useNAcats
##' is given the defult value FALSE. If it is made TRUE, any categories that
##' are NA in the categorical variables will become subvariables.
##' @param variable 1 to combine
##' @param variable 2 to combine
##' @param other desired attributes such as new variable alias, name,
##' description as well as any other variables you would like to combine
##' @param a boolean describing whether categories that are NA in the
##' original variables should be made into subvariables in the new
##' variable
##' @return a new multiple_response variable with subvariables
##' that are the union of the categories of variables given
##' @export
makeMRfromCat <- function(var1, var2, ..., useNAcats = FALSE){
vars <- c(var1, var2, list(...)[sapply(1:length(list(...)), function(i) is.variable(list(...)[[i]]))])
stopifnot(is.variable(var1) & is.variable(var2))
stopifnot(any(sapply(vars, is.Categorical)))

newbody <- list()
newbody$alias <- paste0(sapply(vars, alias), collapse = '_')
newbody$name <- paste0(paste0(sapply(vars, name), collapse = ', '), ", combine")
newbody$type <- 'categorical_array'

newbody <- updateList(newbody, list(...)[!sapply(1:length(list(...)), function(i) is.variable(list(...)[[i]]))])

values <- matrix(sapply(vars, function(var) names(categories(var))[as.vector(var, mode='id')]), ncol=length(vars))
if (useNAcats) subs <- unique(unlist(lapply(vars, function(var) names(categories(var)))))
else subs <- unique(unlist(lapply(vars, function(var) names(categories(var))[!is.na(categories(var))])))

newbody$subvariables <- lapply(subs, function(sub){
VarDef(data=factor(ifelse(rowSums(values == sub,na.rm = TRUE) > 0, 'selected',
ifelse(rowSums(is.na(values)) == length(vars), NA, 'not selected'))),
alias=paste0(newbody$alias, "_", gsub(" ", "_", gsub("[[:punct:]]", "", tolower(sub)))),name=sub)
})

class(newbody) <- 'VariableDefinition'
return(newbody)
}
2 changes: 1 addition & 1 deletion man/addSubvariable.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
\alias{addSubvariable}
\title{Add subvariable to an array}
\usage{
addSubvariable(variable, subvariable)
addSubvariable(variable, subvariable, dataset = NULL)
}
\arguments{
\item{variable}{the array variable to modify}
Expand Down
27 changes: 27 additions & 0 deletions man/combineArrays.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/combine-arrays.R
\name{combineArrays}
\alias{combineArrays}
\title{Combine array variables into a new array}
\usage{
combineArrays(var1, var2, ...)
}
\arguments{
\item{variable}{1 to combine}

\item{variable}{2 to combine}

\item{other}{desired attributes such as new variable alias, name,
description as well as any other variables you would like to combine}
}
\value{
a new variable with combine values from the arrays
}
\description{
This function lets you combine array variables into a new array
Arguments include at least two array variables as well as any metadata
for the new variable that is being created. This function uses the data from
the arrays in the order that they are given. If the first variable is NA for a given
row of a subvariable, it will use the data from the second variable and so on.
}

37 changes: 37 additions & 0 deletions man/combineMRs.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/combine-mrs.R
\name{combineMRs}
\alias{combineMRs}
\title{Combine multiple_response variables into a new multiple_response}
\usage{
combineMRs(var1, var2, ..., selectedFirst = TRUE)
}
\arguments{
\item{variable}{1 to combine}

\item{variable}{2 to combine}

\item{other}{desired attributes such as new variable alias, name,
description as well as any other variables you would like to combine}

\item{a}{boolean describing whether subvariables that are NA in the
original variables should be made into subvariables in the new
variable}

\item{a}{boolean describing whether 'selected' should be given priority
if false, 'not selected' will be given priority}
}
\value{
a new variable with combined responses from the given variables
}
\description{
This function lets you combine multiple_response variables into a new multiple_response
Arguments include at least two multiple_response variables as well as any metadata
for the new variable that is being created. The argument useNAsubs
is given the defult value FALSE. If it is made TRUE, any subvariables that
are NA in the categorical variables will become subvariables.
This function uses the data from the arrays in the order that they are
given. If the first variable is NA for a given row of a subvariable,
it will use the data from the second variable and so on.
}

32 changes: 32 additions & 0 deletions man/makeMRfromCat.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/make-mr-from-cat.R
\name{makeMRfromCat}
\alias{makeMRfromCat}
\title{Combine categorical variables into an MR}
\usage{
makeMRfromCat(var1, var2, ..., useNAcats = FALSE)
}
\arguments{
\item{variable}{1 to combine}

\item{variable}{2 to combine}

\item{other}{desired attributes such as new variable alias, name,
description as well as any other variables you would like to combine}

\item{a}{boolean describing whether categories that are NA in the
original variables should be made into subvariables in the new
variable}
}
\value{
a new multiple_response variable with subvariables
that are the union of the categories of variables given
}
\description{
This function lets you combine categorical variables into an MR variable
Arguments include at least two categorical variables as well as any metadata
for the new variable that is being created. The final argument useNAcats
is given the defult value FALSE. If it is made TRUE, any categories that
are NA in the categorical variables will become subvariables.
}

4 changes: 2 additions & 2 deletions man/variable-to-R.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
\alias{variable-to-R}
\title{Convert Variables to local R objects}
\usage{
\S4method{as.vector}{CrunchExpr}(x, mode)
\S4method{as.vector}{CrunchExpr}(x, mode = "any")

\S4method{as.vector}{CrunchVariable}(x, mode)
\S4method{as.vector}{CrunchVariable}(x, mode = "any")
}
\arguments{
\item{x}{a CrunchVariable subclass}
Expand Down
20 changes: 20 additions & 0 deletions tests/testthat/test-add-subvariable.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ context('Adding subvariables')
if (run.integration.tests) {
with(test.authentication, {
with(test.dataset(newDatasetFromFixture("apidocs")), {
<<<<<<< HEAD
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another merge conflict. I reworked the test we wrote a little bit, which is the part starting on L25. That should replace your L7-13, and I recommend adapting it to cover the new case you added of supplying a VarDef.

expect_false("doggy daycare" %in% names(subvariables(ds$petloc)))
ds$petloc_daycare <- VariableDefinition(factor(rep(c("Cat", "Dog"), 10)), name="doggy daycare")
expect_true('petloc_daycare' %in% aliases(variables(ds)))
addSubvariable(ds$petloc, ds$petloc_daycare)
ds <- refresh(ds)
expect_true("doggy daycare" %in% names(subvariables(ds$petloc)))
expect_identical(c("Home", "Work", "doggy daycare"), names(subvariables(ds$petloc)))
})
with(test.dataset(newDatasetFromFixture("apidocs")), {
expect_false("doggy daycare" %in% names(subvariables(ds$petloc)))
petloc_daycare <- VariableDefinition(factor(rep(c("Cat", "Dog"), 10)), name="doggy daycare", alias='petloc_daycare')
expect_false('petloc_daycare' %in% aliases(variables(ds)))
addSubvariable(ds$petloc, petloc_daycare, ds)
ds <- refresh(ds)
print(names(subvariables(ds$petloc)))
expect_true("doggy daycare" %in% names(subvariables(ds$petloc)))
expect_identical(c("Home", "Work", "doggy daycare"), names(subvariables(ds$petloc)))
=======
test_that("Adding a subvariable to an array", {
ds$petloc_daycare <- VariableDefinition(factor(rep(c("Cat",
"Dog"), 10)), name="doggy daycare")
Expand All @@ -14,6 +33,7 @@ if (run.integration.tests) {
expect_identical(c("Home", "Work", "doggy daycare"),
names(subvariables(ds$petloc)))
})
>>>>>>> upstream/master
})
})
}
Loading