diff --git a/DESCRIPTION b/DESCRIPTION index 7775bb1..115bff9 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidyseurat Title: Brings Seurat to the Tidyverse -Version: 0.7.9 +Version: 0.8.0 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", diff --git a/NAMESPACE b/NAMESPACE index 61112e0..0cbbe39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ S3method(full_join,Seurat) S3method(ggplot,Seurat) S3method(glimpse,tidyseurat) S3method(group_by,Seurat) +S3method(group_split,Seurat) S3method(inner_join,Seurat) S3method(join_transcripts,Seurat) S3method(join_transcripts,default) @@ -71,6 +72,8 @@ importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,group_by_drop_default) +importFrom(dplyr,group_rows) +importFrom(dplyr,group_split) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 72ff940..78ce347 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -966,3 +966,41 @@ pull.Seurat <- function(.data, var=-1, name=NULL, ...) { as_tibble() %>% dplyr::pull( var=!!var, name=!!name, ...) } + +#' @name group_split +#' @rdname group_split +#' @inherit dplyr::group_split +#' +#' @examples +#' data(pbmc_small) +#' pbmc_small |> group_split(groups) +#' +#' @importFrom ellipsis check_dots_used +#' @importFrom dplyr group_by +#' @importFrom dplyr group_rows +#' @importFrom dplyr group_split +#' @export +group_split.Seurat <- function(.tbl, ..., .keep = TRUE) { + + var_list <- enquos(...) + + group_list <- .tbl |> + as_tibble() |> + dplyr::group_by(!!!var_list) + + groups <- group_list |> + dplyr::group_rows() + + v <- vector(mode = "list", length = length(groups)) + + for (i in seq_along(v)) { + v[[i]] <- .tbl[,groups[[i]]] + + if(.keep == FALSE) { + v[[i]] <- select(v[[i]], !(!!!var_list)) + } + } + + v + +} diff --git a/man/group_split.Rd b/man/group_split.Rd new file mode 100644 index 0000000..2035eb8 --- /dev/null +++ b/man/group_split.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{group_split} +\alias{group_split} +\alias{group_split.Seurat} +\title{Split data frame by groups} +\usage{ +\method{group_split}{Seurat}(.tbl, ..., .keep = TRUE) +} +\arguments{ +\item{.tbl}{A tbl.} + +\item{...}{If \code{.tbl} is an ungrouped data frame, a grouping specification, +forwarded to \code{\link[dplyr:group_by]{group_by()}}.} + +\item{.keep}{Should the grouping columns be kept?} +} +\value{ +A list of tibbles. Each tibble contains the rows of \code{.tbl} for the +associated group and all the columns, including the grouping variables. +Note that this returns a \link[vctrs:list_of]{list_of} which is slightly +stricter than a simple list but is useful for representing lists where +every element has the same type. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +\code{\link[dplyr:group_split]{group_split()}} works like \code{\link[base:split]{base::split()}} but: +\itemize{ +\item It uses the grouping structure from \code{\link[dplyr:group_by]{group_by()}} and therefore is subject +to the data mask +\item It does not name the elements of the list based on the grouping as this +only works well for a single character grouping variable. Instead, +use \code{\link[dplyr:group_keys]{group_keys()}} to access a data frame that defines the groups. +} + +\code{group_split()} is primarily designed to work with grouped data frames. +You can pass \code{...} to group and split an ungrouped data frame, but this +is generally not very useful as you want have easy access to the group +metadata. +} +\section{Lifecycle}{ + + +\code{group_split()} is not stable because you can achieve very similar results by +manipulating the nested column returned from +\code{\link[tidyr:nest]{tidyr::nest(.by =)}}. That also retains the group keys all +within a single data structure. \code{group_split()} may be deprecated in the +future. + +} + +\examples{ +data(pbmc_small) +pbmc_small |> group_split(groups) + +} +\seealso{ +Other grouping functions: +\code{\link[dplyr]{group_by}()}, +\code{\link[dplyr]{group_map}()}, +\code{\link[dplyr]{group_nest}()}, +\code{\link[dplyr]{group_trim}()} +} diff --git a/man/slice.Rd b/man/slice.Rd index 27dcc51..51d53f7 100644 --- a/man/slice.Rd +++ b/man/slice.Rd @@ -125,8 +125,8 @@ helpers for common use cases: \itemize{ \item \code{slice_head()} and \code{slice_tail()} select the first or last rows. \item \code{slice_sample()} randomly selects rows. -\item \code{slice_min()} and \code{slice_max()} select rows with the smallest or largest -values of a variable. +\item \code{slice_min()} and \code{slice_max()} select rows with highest or lowest values +of a variable. } If \code{.data} is a \link[dplyr]{grouped_df}, the operation will be performed on each group, diff --git a/tests/testthat/test-dplyr.R b/tests/testthat/test-dplyr.R index 250548a..639998f 100755 --- a/tests/testthat/test-dplyr.R +++ b/tests/testthat/test-dplyr.R @@ -214,3 +214,32 @@ test_that("rowwise", { ((pbmc_small[, 1]$nCount_RNA + pbmc_small[, 1]$nFeature_RNA) / 2) |> unname() ) }) + +test_that("group_split() works for one variable", { + fd <- pbmc_small |> + group_split(groups) + expect_equal(length(fd), length(unique(pbmc_small$groups))) +}) + +test_that("group_split() works for combination of variables", { + fd <- pbmc_small |> + group_split(groups, letter.idents) + expect_equal(length(fd), length(unique(pbmc_small$groups)) * + length(unique(pbmc_small$letter.idents))) +}) + +test_that("group_split() works for one logical statement", { + fd_log <- pbmc_small |> + group_split(groups=="g1") + fd_var <- pbmc_small |> + group_split(groups=="g1") + expect_equal(lapply(fd_var, count), lapply(fd_log, count)) +}) + +test_that("group_split() works for two logical statements", { + fd <- pbmc_small |> + group_split(PC_1>0 & groups=="g1") + fd_counts <- lapply(fd, count) + expect_equal(c(fd_counts[[1]], fd_counts[[2]], use.names = FALSE), + list(75, 5)) +})