From 75fdd599f99232fbf91885c989cd379ebebb1513 Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Tue, 22 Aug 2023 22:51:26 +1000
Subject: [PATCH 01/12] add argument

---
 R/functions_SE.R | 168 ++++++++++++++++++++++++++---------------------
 1 file changed, 92 insertions(+), 76 deletions(-)

diff --git a/R/functions_SE.R b/R/functions_SE.R
index ede03fef..fcee85d0 100755
--- a/R/functions_SE.R
+++ b/R/functions_SE.R
@@ -3,8 +3,8 @@
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom stats kmeans
 #' @importFrom rlang :=
@@ -55,8 +55,8 @@ get_clusters_kmeans_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom utils install.packages
@@ -113,8 +113,8 @@ get_clusters_SNN_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom purrr map_dfr
 #' @importFrom rlang :=
@@ -207,8 +207,8 @@ get_reduced_dimensions_MDS_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats prcomp
@@ -309,8 +309,8 @@ we suggest to partition the dataset for sample clusters.
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats setNames
@@ -392,8 +392,8 @@ get_reduced_dimensions_TSNE_bulk_SE <-
 #'
 #' @keywords internal
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats setNames
@@ -564,8 +564,8 @@ keep_variable_transcripts_SE = function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #'
@@ -694,8 +694,8 @@ remove_redundancy_elements_though_reduced_dimensions_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -727,7 +727,7 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 																											 ...) {
 
   .abundance = enquo(.abundance)
-  
+
 	# Check if omit_contrast_in_colnames is correctly setup
 	if(omit_contrast_in_colnames & length(.contrasts) > 1){
 		warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
@@ -765,16 +765,16 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	 )
-	
+
 	edgeR_object =
-		.data |> 
-	  assay(my_assay) |> 
+		.data |>
+	  assay(my_assay) |>
 		edgeR::DGEList() %>%
 
 		# Scale data if method is not "none"
@@ -869,8 +869,8 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -938,17 +938,17 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data,
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	)
-	
+
 	voom_object =
 		.data %>%
 
-	  assay(my_assay) |> 
+	  assay(my_assay) |>
 	  edgeR::DGEList() %>%
 
 		# Scale data if method is not "none"
@@ -1050,8 +1050,8 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -1074,30 +1074,32 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
                                                             .abundance = NULL,
                                                             .contrasts = NULL,
                                                             sample_annotation ,
-                                                            method = "deseq2",
-                                                            
+                                                            method,
+
                                                             test_above_log2_fold_change = NULL,
-                                                            
+
                                                             scaling_method = "TMM",
                                                             omit_contrast_in_colnames = FALSE,
                                                             prefix = "",
+                                                            .dispersion = NULL,
                                                             ...) {
-  
+
   .abundance = enquo(.abundance)
-  
+  .dispersion = enquo(.dispersion)
+
   # Check if contrasts are of the same form
   if(
     .contrasts %>% is.null %>% not() &
     .contrasts %>% class %>% equals("list") %>% not()
   )
     stop("tidybulk says: for DESeq2 the list of constrasts should be given in the form list(c(\"condition_column\",\"condition1\",\"condition2\")) i.e. list(c(\"genotype\",\"knockout\",\"wildtype\"))")
-  
+
   # Check if omit_contrast_in_colnames is correctly setup
   if(omit_contrast_in_colnames & length(.contrasts) > 1){
     warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
     omit_contrast_in_colnames = FALSE
   }
-  
+
   # Check if package is installed, otherwise install
   if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) {
     message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses")
@@ -1105,7 +1107,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
       install.packages("BiocManager", repos = "https://cloud.r-project.org")
     BiocManager::install("edgeR", ask = FALSE)
   }
-  
+
   # Check if package is installed, otherwise install
   if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) {
     message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses")
@@ -1113,65 +1115,79 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
       install.packages("BiocManager", repos = "https://cloud.r-project.org")
     BiocManager::install("glmmSeq", ask = FALSE)
   }
-  
+
   # If no assay is specified take first
   my_assay = ifelse(
-    quo_is_symbol(.abundance), 
-    quo_name(.abundance), 
+    quo_is_symbol(.abundance),
+    quo_name(.abundance),
     .data |>
       assayNames() |>
       extract2(1)
   )
-  
-  metadata = 
-    .data |> 
-    colData() 
-  
-  counts = 
+
+  metadata =
+    .data |>
+    colData()
+
+  counts =
     .data %>%
     assay(my_assay)
-  
-  glmmSeq_object = 
+
+  if(quo_is_symbolic(.dispersion))
+    dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe()
+  else
+    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+
+  # # Check dispersion
+  # if(!names(dispersion) |> sort() |> identical(
+  #   rownames(counts) |>
+  #   sort()
+  # )) stop("tidybulk says: The features in the dispersion vector do not overlap with the feature in the assay")
+
+  # Make sure the order matches the counts
+  dispersion = dispersion[rownames(counts)]
+
+  glmmSeq_object =
     glmmSeq( .formula,
                       countdata = counts ,
                       metadata =   metadata |> as.data.frame(),
-                      dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
-                      progress = TRUE, 
+                      dispersion = dispersion,
+                      progress = TRUE,
                       method = method |> str_remove("(?i)^glmmSeq_" ),
                       ...
-    ) 
-  
-  glmmSeq_object |> 
-    summary() |> 
+    )
+
+  glmmSeq_object |>
+    summary() |>
     as_tibble(rownames = "transcript") |>
-    mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |> 
-    
+    mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |>
+
     # Attach attributes
     reattach_internals(.data) %>%
-    
+
     # select method
-    memorise_methods_used("glmmSeq") %>% 
-    
+    memorise_methods_used("glmmSeq") %>%
+
     # # Add raw object
     # attach_to_internals(glmmSeq_object, "glmmSeq") %>%
-    
+
     # Communicate the attribute added
     {
       rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq",  .frequency = "once")
       (.)
     }  %>%
-    
+
     # Attach prefix
     setNames(c(
       colnames(.)[1],
       sprintf("%s%s", prefix, colnames(.)[2:ncol(.)])
-    )) |> 
-    
-    list() |> 
-    setNames("result") |> 
+    )) |>
+
+    list() |>
+    setNames("result") |>
     c(list(result_raw = glmmSeq_object))
-  
-  
+
+
 }
 
 
@@ -1180,8 +1196,8 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -1213,7 +1229,7 @@ get_differential_transcript_abundance_deseq2_SE <- function(.data,
                                                             ...) {
 
   .abundance = enquo(.abundance)
-  
+
 	# Check if contrasts are of the same form
 	if(
 		.contrasts %>% is.null %>% not() &
@@ -1238,20 +1254,20 @@ get_differential_transcript_abundance_deseq2_SE <- function(.data,
         if (is.null(test_above_log2_fold_change)) {
           test_above_log2_fold_change <- 0
         }
-  
+
 	my_contrasts = .contrasts
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	)
-	
+
 	deseq2_object =
-	  
+
 		# DESeq2
 		DESeq2::DESeqDataSetFromMatrix(
 		  countData = .data |> assay(my_assay),

From 91ddd0c8c2440677c9d024e4ecb3a57ac40d3d01 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Wed, 23 Aug 2023 12:49:45 +1000
Subject: [PATCH 02/12] make sure matrix is maintained when selecting rows

---
 R/glmmSeq.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/glmmSeq.R b/R/glmmSeq.R
index 7fa7d1dd..a5658000 100644
--- a/R/glmmSeq.R
+++ b/R/glmmSeq.R
@@ -185,7 +185,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N
   fullFormula <- update.formula(modelFormula, count ~ ., simplify = FALSE)
   subFormula <- lme4::subbars(modelFormula)
   variables <- rownames(attr(terms(subFormula), "factors"))
-  subsetMetadata <- metadata[, variables]
+  subsetMetadata <- metadata[, variables, drop=FALSE]
   if (is.null(id)) {
     fb <- lme4::findbars(modelFormula)
     id <- sub(".*[|]", "", fb)
@@ -195,9 +195,9 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N
   if (removeSingles) {
     nonSingles <- names(table(ids))[table(ids) > 1]
     nonSingleIDs <- ids %in% nonSingles
-    countdata <- countdata[, nonSingleIDs]
+    countdata <- countdata[, nonSingleIDs, drop=FALSE]
     sizeFactors <- sizeFactors[nonSingleIDs]
-    subsetMetadata <- subsetMetadata[nonSingleIDs, ]
+    subsetMetadata <- subsetMetadata[nonSingleIDs, , drop=FALSE]
     ids <- ids[nonSingleIDs]
   }
   if (!is.null(sizeFactors))

From 5f139e8f36037d462955e091588af6e17c33c654 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Mon, 28 Aug 2023 12:04:05 +1000
Subject: [PATCH 03/12] allow one gene for DE

---
 R/methods_SE.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/methods_SE.R b/R/methods_SE.R
index 8d4af0ff..5238012d 100755
--- a/R/methods_SE.R
+++ b/R/methods_SE.R
@@ -1336,7 +1336,7 @@ such as batch effects (if applicable) in the formula.
 	rowData(.data) = rowData(.data) %>% cbind(
 		my_differential_abundance$result %>%
 			as_matrix(rownames = "transcript") %>%
-			.[match(rownames(rowData(.data)), rownames(.)),]
+			.[match(rownames(rowData(.data)), rownames(.)),,drop=FALSE]
 	)
 
 

From 23666f42d02c0b4421e61003e64f4fb453112a16 Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Tue, 22 Aug 2023 22:51:26 +1000
Subject: [PATCH 04/12] add argument

---
 R/functions_SE.R | 168 ++++++++++++++++++++++++++---------------------
 1 file changed, 92 insertions(+), 76 deletions(-)

diff --git a/R/functions_SE.R b/R/functions_SE.R
index ede03fef..fcee85d0 100755
--- a/R/functions_SE.R
+++ b/R/functions_SE.R
@@ -3,8 +3,8 @@
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom stats kmeans
 #' @importFrom rlang :=
@@ -55,8 +55,8 @@ get_clusters_kmeans_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom utils install.packages
@@ -113,8 +113,8 @@ get_clusters_SNN_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom purrr map_dfr
 #' @importFrom rlang :=
@@ -207,8 +207,8 @@ get_reduced_dimensions_MDS_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats prcomp
@@ -309,8 +309,8 @@ we suggest to partition the dataset for sample clusters.
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats setNames
@@ -392,8 +392,8 @@ get_reduced_dimensions_TSNE_bulk_SE <-
 #'
 #' @keywords internal
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats setNames
@@ -564,8 +564,8 @@ keep_variable_transcripts_SE = function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #'
@@ -694,8 +694,8 @@ remove_redundancy_elements_though_reduced_dimensions_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -727,7 +727,7 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 																											 ...) {
 
   .abundance = enquo(.abundance)
-  
+
 	# Check if omit_contrast_in_colnames is correctly setup
 	if(omit_contrast_in_colnames & length(.contrasts) > 1){
 		warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
@@ -765,16 +765,16 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	 )
-	
+
 	edgeR_object =
-		.data |> 
-	  assay(my_assay) |> 
+		.data |>
+	  assay(my_assay) |>
 		edgeR::DGEList() %>%
 
 		# Scale data if method is not "none"
@@ -869,8 +869,8 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -938,17 +938,17 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data,
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	)
-	
+
 	voom_object =
 		.data %>%
 
-	  assay(my_assay) |> 
+	  assay(my_assay) |>
 	  edgeR::DGEList() %>%
 
 		# Scale data if method is not "none"
@@ -1050,8 +1050,8 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -1074,30 +1074,32 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
                                                             .abundance = NULL,
                                                             .contrasts = NULL,
                                                             sample_annotation ,
-                                                            method = "deseq2",
-                                                            
+                                                            method,
+
                                                             test_above_log2_fold_change = NULL,
-                                                            
+
                                                             scaling_method = "TMM",
                                                             omit_contrast_in_colnames = FALSE,
                                                             prefix = "",
+                                                            .dispersion = NULL,
                                                             ...) {
-  
+
   .abundance = enquo(.abundance)
-  
+  .dispersion = enquo(.dispersion)
+
   # Check if contrasts are of the same form
   if(
     .contrasts %>% is.null %>% not() &
     .contrasts %>% class %>% equals("list") %>% not()
   )
     stop("tidybulk says: for DESeq2 the list of constrasts should be given in the form list(c(\"condition_column\",\"condition1\",\"condition2\")) i.e. list(c(\"genotype\",\"knockout\",\"wildtype\"))")
-  
+
   # Check if omit_contrast_in_colnames is correctly setup
   if(omit_contrast_in_colnames & length(.contrasts) > 1){
     warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
     omit_contrast_in_colnames = FALSE
   }
-  
+
   # Check if package is installed, otherwise install
   if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) {
     message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses")
@@ -1105,7 +1107,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
       install.packages("BiocManager", repos = "https://cloud.r-project.org")
     BiocManager::install("edgeR", ask = FALSE)
   }
-  
+
   # Check if package is installed, otherwise install
   if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) {
     message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses")
@@ -1113,65 +1115,79 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
       install.packages("BiocManager", repos = "https://cloud.r-project.org")
     BiocManager::install("glmmSeq", ask = FALSE)
   }
-  
+
   # If no assay is specified take first
   my_assay = ifelse(
-    quo_is_symbol(.abundance), 
-    quo_name(.abundance), 
+    quo_is_symbol(.abundance),
+    quo_name(.abundance),
     .data |>
       assayNames() |>
       extract2(1)
   )
-  
-  metadata = 
-    .data |> 
-    colData() 
-  
-  counts = 
+
+  metadata =
+    .data |>
+    colData()
+
+  counts =
     .data %>%
     assay(my_assay)
-  
-  glmmSeq_object = 
+
+  if(quo_is_symbolic(.dispersion))
+    dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe()
+  else
+    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+
+  # # Check dispersion
+  # if(!names(dispersion) |> sort() |> identical(
+  #   rownames(counts) |>
+  #   sort()
+  # )) stop("tidybulk says: The features in the dispersion vector do not overlap with the feature in the assay")
+
+  # Make sure the order matches the counts
+  dispersion = dispersion[rownames(counts)]
+
+  glmmSeq_object =
     glmmSeq( .formula,
                       countdata = counts ,
                       metadata =   metadata |> as.data.frame(),
-                      dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
-                      progress = TRUE, 
+                      dispersion = dispersion,
+                      progress = TRUE,
                       method = method |> str_remove("(?i)^glmmSeq_" ),
                       ...
-    ) 
-  
-  glmmSeq_object |> 
-    summary() |> 
+    )
+
+  glmmSeq_object |>
+    summary() |>
     as_tibble(rownames = "transcript") |>
-    mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |> 
-    
+    mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |>
+
     # Attach attributes
     reattach_internals(.data) %>%
-    
+
     # select method
-    memorise_methods_used("glmmSeq") %>% 
-    
+    memorise_methods_used("glmmSeq") %>%
+
     # # Add raw object
     # attach_to_internals(glmmSeq_object, "glmmSeq") %>%
-    
+
     # Communicate the attribute added
     {
       rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq",  .frequency = "once")
       (.)
     }  %>%
-    
+
     # Attach prefix
     setNames(c(
       colnames(.)[1],
       sprintf("%s%s", prefix, colnames(.)[2:ncol(.)])
-    )) |> 
-    
-    list() |> 
-    setNames("result") |> 
+    )) |>
+
+    list() |>
+    setNames("result") |>
     c(list(result_raw = glmmSeq_object))
-  
-  
+
+
 }
 
 
@@ -1180,8 +1196,8 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -1213,7 +1229,7 @@ get_differential_transcript_abundance_deseq2_SE <- function(.data,
                                                             ...) {
 
   .abundance = enquo(.abundance)
-  
+
 	# Check if contrasts are of the same form
 	if(
 		.contrasts %>% is.null %>% not() &
@@ -1238,20 +1254,20 @@ get_differential_transcript_abundance_deseq2_SE <- function(.data,
         if (is.null(test_above_log2_fold_change)) {
           test_above_log2_fold_change <- 0
         }
-  
+
 	my_contrasts = .contrasts
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	)
-	
+
 	deseq2_object =
-	  
+
 		# DESeq2
 		DESeq2::DESeqDataSetFromMatrix(
 		  countData = .data |> assay(my_assay),

From 898918e74574520346142b06d350f4ab7666a4fb Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Wed, 23 Aug 2023 12:49:45 +1000
Subject: [PATCH 05/12] make sure matrix is maintained when selecting rows

---
 R/glmmSeq.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/glmmSeq.R b/R/glmmSeq.R
index 7fa7d1dd..a5658000 100644
--- a/R/glmmSeq.R
+++ b/R/glmmSeq.R
@@ -185,7 +185,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N
   fullFormula <- update.formula(modelFormula, count ~ ., simplify = FALSE)
   subFormula <- lme4::subbars(modelFormula)
   variables <- rownames(attr(terms(subFormula), "factors"))
-  subsetMetadata <- metadata[, variables]
+  subsetMetadata <- metadata[, variables, drop=FALSE]
   if (is.null(id)) {
     fb <- lme4::findbars(modelFormula)
     id <- sub(".*[|]", "", fb)
@@ -195,9 +195,9 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N
   if (removeSingles) {
     nonSingles <- names(table(ids))[table(ids) > 1]
     nonSingleIDs <- ids %in% nonSingles
-    countdata <- countdata[, nonSingleIDs]
+    countdata <- countdata[, nonSingleIDs, drop=FALSE]
     sizeFactors <- sizeFactors[nonSingleIDs]
-    subsetMetadata <- subsetMetadata[nonSingleIDs, ]
+    subsetMetadata <- subsetMetadata[nonSingleIDs, , drop=FALSE]
     ids <- ids[nonSingleIDs]
   }
   if (!is.null(sizeFactors))

From 3aeee9a394e712e70d8e67af60acb2e7eab592f4 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Mon, 28 Aug 2023 12:04:05 +1000
Subject: [PATCH 06/12] allow one gene for DE

---
 R/methods_SE.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/methods_SE.R b/R/methods_SE.R
index 8d4af0ff..5238012d 100755
--- a/R/methods_SE.R
+++ b/R/methods_SE.R
@@ -1336,7 +1336,7 @@ such as batch effects (if applicable) in the formula.
 	rowData(.data) = rowData(.data) %>% cbind(
 		my_differential_abundance$result %>%
 			as_matrix(rownames = "transcript") %>%
-			.[match(rownames(rowData(.data)), rownames(.)),]
+			.[match(rownames(rowData(.data)), rownames(.)),,drop=FALSE]
 	)
 
 

From fca9009e9fac64e2a1d7ff0dac817a0903cbebe8 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Tue, 29 Aug 2023 15:17:11 +1000
Subject: [PATCH 07/12] add dispersion to tibble input for glmmseq

---
 R/functions.R                                 | 16 +++++++++++-
 .../test-bulk_methods_SummarizedExperiment.R  | 25 ++++++++++++++++++-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/R/functions.R b/R/functions.R
index 4e1c8ded..cab2fc8f 100755
--- a/R/functions.R
+++ b/R/functions.R
@@ -756,11 +756,25 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
   # Reorder counts
   counts = counts[,rownames(metadata),drop=FALSE]
   
+  if(quo_is_symbolic(.dispersion))
+    dispersion = .data |> pivot_transcript() |> select(!!feature__$symbol, !!.dispersion) |> deframe()
+  else
+    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+  
+  # # Check dispersion
+  # if(!names(dispersion) |> sort() |> identical(
+  #   rownames(counts) |>
+  #   sort()
+  # )) stop("tidybulk says: The features in the dispersion vector do not overlap with the feature in the assay")
+  
+  # Make sure the order matches the counts
+  dispersion = dispersion[rownames(counts)]
+  
   glmmSeq_object = 
     glmmSeq( .formula,
           countdata = counts ,
           metadata =   metadata,
-          dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
+          dispersion = dispersion,
           progress = TRUE, 
           method = method |> str_remove("(?i)^glmmSeq_"),
           ...
diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
index bf56bf63..86cbe7df 100755
--- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R
+++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
@@ -443,7 +443,30 @@ test_that("differential trancript abundance - random effects SE",{
       tolerance=1e-3
     )
   
-  
+ # Custom dispersion
+ se_mini = 
+   se_mini |>
+   identify_abundant(factor_of_interest = condition)
+ 
+ rowData(se_mini)$disp_ = rnorm(nrow(se_mini))
+ 
+ res = 
+   se_mini |>
+   identify_abundant(factor_of_interest = condition) |> 
+   #mutate(time = time |> stringr::str_replace_all(" ", "_")) |> 
+   test_differential_abundance(
+     ~ condition + (1 + condition | time),
+     method = "glmmseq_lme4", 
+     dispersion = disp_
+   ) 
+ 
+ rowData(res)[,"P_condition_adjusted"] |> 
+   head(4) |> 
+   expect_equal(
+     c(0.1441371, 0.1066183, 0.1370748, NA),
+     tolerance=1e-3
+   )
+ 
 })
 
 

From 9c8c7874cfc62cc7bf43aa6f309476ef092f83f5 Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Tue, 22 Aug 2023 22:51:26 +1000
Subject: [PATCH 08/12] add argument

---
 R/functions_SE.R | 168 ++++++++++++++++++++++++++---------------------
 1 file changed, 92 insertions(+), 76 deletions(-)

diff --git a/R/functions_SE.R b/R/functions_SE.R
index ede03fef..fcee85d0 100755
--- a/R/functions_SE.R
+++ b/R/functions_SE.R
@@ -3,8 +3,8 @@
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom stats kmeans
 #' @importFrom rlang :=
@@ -55,8 +55,8 @@ get_clusters_kmeans_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom utils install.packages
@@ -113,8 +113,8 @@ get_clusters_SNN_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom purrr map_dfr
 #' @importFrom rlang :=
@@ -207,8 +207,8 @@ get_reduced_dimensions_MDS_bulk_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats prcomp
@@ -309,8 +309,8 @@ we suggest to partition the dataset for sample clusters.
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats setNames
@@ -392,8 +392,8 @@ get_reduced_dimensions_TSNE_bulk_SE <-
 #'
 #' @keywords internal
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats setNames
@@ -564,8 +564,8 @@ keep_variable_transcripts_SE = function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom rlang :=
 #'
@@ -694,8 +694,8 @@ remove_redundancy_elements_though_reduced_dimensions_SE <-
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -727,7 +727,7 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 																											 ...) {
 
   .abundance = enquo(.abundance)
-  
+
 	# Check if omit_contrast_in_colnames is correctly setup
 	if(omit_contrast_in_colnames & length(.contrasts) > 1){
 		warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
@@ -765,16 +765,16 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	 )
-	
+
 	edgeR_object =
-		.data |> 
-	  assay(my_assay) |> 
+		.data |>
+	  assay(my_assay) |>
 		edgeR::DGEList() %>%
 
 		# Scale data if method is not "none"
@@ -869,8 +869,8 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -938,17 +938,17 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data,
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	)
-	
+
 	voom_object =
 		.data %>%
 
-	  assay(my_assay) |> 
+	  assay(my_assay) |>
 	  edgeR::DGEList() %>%
 
 		# Scale data if method is not "none"
@@ -1050,8 +1050,8 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -1074,30 +1074,32 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
                                                             .abundance = NULL,
                                                             .contrasts = NULL,
                                                             sample_annotation ,
-                                                            method = "deseq2",
-                                                            
+                                                            method,
+
                                                             test_above_log2_fold_change = NULL,
-                                                            
+
                                                             scaling_method = "TMM",
                                                             omit_contrast_in_colnames = FALSE,
                                                             prefix = "",
+                                                            .dispersion = NULL,
                                                             ...) {
-  
+
   .abundance = enquo(.abundance)
-  
+  .dispersion = enquo(.dispersion)
+
   # Check if contrasts are of the same form
   if(
     .contrasts %>% is.null %>% not() &
     .contrasts %>% class %>% equals("list") %>% not()
   )
     stop("tidybulk says: for DESeq2 the list of constrasts should be given in the form list(c(\"condition_column\",\"condition1\",\"condition2\")) i.e. list(c(\"genotype\",\"knockout\",\"wildtype\"))")
-  
+
   # Check if omit_contrast_in_colnames is correctly setup
   if(omit_contrast_in_colnames & length(.contrasts) > 1){
     warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
     omit_contrast_in_colnames = FALSE
   }
-  
+
   # Check if package is installed, otherwise install
   if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) {
     message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses")
@@ -1105,7 +1107,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
       install.packages("BiocManager", repos = "https://cloud.r-project.org")
     BiocManager::install("edgeR", ask = FALSE)
   }
-  
+
   # Check if package is installed, otherwise install
   if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) {
     message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses")
@@ -1113,65 +1115,79 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
       install.packages("BiocManager", repos = "https://cloud.r-project.org")
     BiocManager::install("glmmSeq", ask = FALSE)
   }
-  
+
   # If no assay is specified take first
   my_assay = ifelse(
-    quo_is_symbol(.abundance), 
-    quo_name(.abundance), 
+    quo_is_symbol(.abundance),
+    quo_name(.abundance),
     .data |>
       assayNames() |>
       extract2(1)
   )
-  
-  metadata = 
-    .data |> 
-    colData() 
-  
-  counts = 
+
+  metadata =
+    .data |>
+    colData()
+
+  counts =
     .data %>%
     assay(my_assay)
-  
-  glmmSeq_object = 
+
+  if(quo_is_symbolic(.dispersion))
+    dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe()
+  else
+    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+
+  # # Check dispersion
+  # if(!names(dispersion) |> sort() |> identical(
+  #   rownames(counts) |>
+  #   sort()
+  # )) stop("tidybulk says: The features in the dispersion vector do not overlap with the feature in the assay")
+
+  # Make sure the order matches the counts
+  dispersion = dispersion[rownames(counts)]
+
+  glmmSeq_object =
     glmmSeq( .formula,
                       countdata = counts ,
                       metadata =   metadata |> as.data.frame(),
-                      dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
-                      progress = TRUE, 
+                      dispersion = dispersion,
+                      progress = TRUE,
                       method = method |> str_remove("(?i)^glmmSeq_" ),
                       ...
-    ) 
-  
-  glmmSeq_object |> 
-    summary() |> 
+    )
+
+  glmmSeq_object |>
+    summary() |>
     as_tibble(rownames = "transcript") |>
-    mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |> 
-    
+    mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |>
+
     # Attach attributes
     reattach_internals(.data) %>%
-    
+
     # select method
-    memorise_methods_used("glmmSeq") %>% 
-    
+    memorise_methods_used("glmmSeq") %>%
+
     # # Add raw object
     # attach_to_internals(glmmSeq_object, "glmmSeq") %>%
-    
+
     # Communicate the attribute added
     {
       rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq",  .frequency = "once")
       (.)
     }  %>%
-    
+
     # Attach prefix
     setNames(c(
       colnames(.)[1],
       sprintf("%s%s", prefix, colnames(.)[2:ncol(.)])
-    )) |> 
-    
-    list() |> 
-    setNames("result") |> 
+    )) |>
+
+    list() |>
+    setNames("result") |>
     c(list(result_raw = glmmSeq_object))
-  
-  
+
+
 }
 
 
@@ -1180,8 +1196,8 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
 #' @keywords internal
 #' @noRd
 #'
-#' 
-#' 
+#'
+#'
 #' @import tibble
 #' @importFrom magrittr set_colnames
 #' @importFrom stats model.matrix
@@ -1213,7 +1229,7 @@ get_differential_transcript_abundance_deseq2_SE <- function(.data,
                                                             ...) {
 
   .abundance = enquo(.abundance)
-  
+
 	# Check if contrasts are of the same form
 	if(
 		.contrasts %>% is.null %>% not() &
@@ -1238,20 +1254,20 @@ get_differential_transcript_abundance_deseq2_SE <- function(.data,
         if (is.null(test_above_log2_fold_change)) {
           test_above_log2_fold_change <- 0
         }
-  
+
 	my_contrasts = .contrasts
 
 	# If no assay is specified take first
 	my_assay = ifelse(
-	  quo_is_symbol(.abundance), 
-	  quo_name(.abundance), 
+	  quo_is_symbol(.abundance),
+	  quo_name(.abundance),
 	  .data |>
 	    assayNames() |>
 	    extract2(1)
 	)
-	
+
 	deseq2_object =
-	  
+
 		# DESeq2
 		DESeq2::DESeqDataSetFromMatrix(
 		  countData = .data |> assay(my_assay),

From 80bc6702273cc245a886c50c7dae7e72c096914f Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Wed, 23 Aug 2023 12:49:45 +1000
Subject: [PATCH 09/12] make sure matrix is maintained when selecting rows

---
 R/glmmSeq.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/glmmSeq.R b/R/glmmSeq.R
index d82499d1..4e30737c 100644
--- a/R/glmmSeq.R
+++ b/R/glmmSeq.R
@@ -185,7 +185,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N
   fullFormula <- update.formula(modelFormula, count ~ ., simplify = FALSE)
   subFormula <- lme4::subbars(modelFormula)
   variables <- rownames(attr(terms(subFormula), "factors"))
-  subsetMetadata <- metadata[, variables]
+  subsetMetadata <- metadata[, variables, drop=FALSE]
   if (is.null(id)) {
     fb <- lme4::findbars(modelFormula)
     id <- sub(".*[|]", "", fb)
@@ -195,9 +195,9 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N
   if (removeSingles) {
     nonSingles <- names(table(ids))[table(ids) > 1]
     nonSingleIDs <- ids %in% nonSingles
-    countdata <- countdata[, nonSingleIDs]
+    countdata <- countdata[, nonSingleIDs, drop=FALSE]
     sizeFactors <- sizeFactors[nonSingleIDs]
-    subsetMetadata <- subsetMetadata[nonSingleIDs, ]
+    subsetMetadata <- subsetMetadata[nonSingleIDs, , drop=FALSE]
     ids <- ids[nonSingleIDs]
   }
   if (!is.null(sizeFactors))

From 31291279538ba7103f5021fb9d8840f11b7f96b3 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Mon, 28 Aug 2023 12:04:05 +1000
Subject: [PATCH 10/12] allow one gene for DE

---
 R/methods_SE.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/methods_SE.R b/R/methods_SE.R
index 4851b652..85a9970f 100755
--- a/R/methods_SE.R
+++ b/R/methods_SE.R
@@ -1430,7 +1430,7 @@ such as batch effects (if applicable) in the formula.
 	rowData(.data) = rowData(.data) %>% cbind(
 		my_differential_abundance$result %>%
 			as_matrix(rownames = "transcript") %>%
-			.[match(rownames(rowData(.data)), rownames(.)),]
+			.[match(rownames(rowData(.data)), rownames(.)),,drop=FALSE]
 	)
 
 

From a1779458ba01fcb3406ba729904af4d76af4f983 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Tue, 29 Aug 2023 15:17:11 +1000
Subject: [PATCH 11/12] add dispersion to tibble input for glmmseq

---
 R/functions.R                                 | 22 ++++++++++++---
 .../test-bulk_methods_SummarizedExperiment.R  | 27 +++++++++++++++++--
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/R/functions.R b/R/functions.R
index 6ffa5add..271483d6 100755
--- a/R/functions.R
+++ b/R/functions.R
@@ -755,13 +755,27 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
 
   # Reorder counts
   counts = counts[,rownames(metadata),drop=FALSE]
-
-  glmmSeq_object =
+  
+  if(quo_is_symbolic(.dispersion))
+    dispersion = .data |> pivot_transcript() |> select(!!feature__$symbol, !!.dispersion) |> deframe()
+  else
+    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+  
+  # # Check dispersion
+  # if(!names(dispersion) |> sort() |> identical(
+  #   rownames(counts) |>
+  #   sort()
+  # )) stop("tidybulk says: The features in the dispersion vector do not overlap with the feature in the assay")
+  
+  # Make sure the order matches the counts
+  dispersion = dispersion[rownames(counts)]
+  
+  glmmSeq_object = 
     glmmSeq( .formula,
           countdata = counts ,
           metadata =   metadata,
-          dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
-          progress = TRUE,
+          dispersion = dispersion,
+          progress = TRUE, 
           method = method |> str_remove("(?i)^glmmSeq_"),
           ...
   )
diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
index c9946657..453f0496 100755
--- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R
+++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
@@ -474,8 +474,31 @@ test_that("differential trancript abundance - random effects SE",{
       c(0.1065866, 0.1109067, 0.1116562 , NA),
       tolerance=1e-2
     )
-
-
+  
+ # Custom dispersion
+ se_mini = 
+   se_mini |>
+   identify_abundant(factor_of_interest = condition)
+ 
+ rowData(se_mini)$disp_ = rnorm(nrow(se_mini))
+ 
+ res = 
+   se_mini |>
+   identify_abundant(factor_of_interest = condition) |> 
+   #mutate(time = time |> stringr::str_replace_all(" ", "_")) |> 
+   test_differential_abundance(
+     ~ condition + (1 + condition | time),
+     method = "glmmseq_lme4", 
+     dispersion = disp_
+   ) 
+ 
+ rowData(res)[,"P_condition_adjusted"] |> 
+   head(4) |> 
+   expect_equal(
+     c(0.1441371, 0.1066183, 0.1370748, NA),
+     tolerance=1e-3
+   )
+ 
 })
 
 

From cb96f3245ee1ef561d7a49e55516b632c7c143f8 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Wed, 30 Aug 2023 00:00:46 +1000
Subject: [PATCH 12/12] add custom dispersion for tibble input

---
 R/functions.R                                 |  6 ++-
 tests/testthat/test-bulk_methods.R            | 38 +++++++++++++++++--
 .../test-bulk_methods_SummarizedExperiment.R  | 13 ++++---
 3 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/R/functions.R b/R/functions.R
index 271483d6..82f9fc2a 100755
--- a/R/functions.R
+++ b/R/functions.R
@@ -685,13 +685,15 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
                                                                omit_contrast_in_colnames = FALSE,
                                                                prefix = "",
                                                                .sample_total_read_count = NULL,
+                                                          .dispersion = NULL,
                                                           ...) {
   # Get column names
   .sample = enquo(.sample)
   .transcript = enquo(.transcript)
   .abundance = enquo(.abundance)
   .sample_total_read_count = enquo(.sample_total_read_count)
-
+  .dispersion = enquo(.dispersion)
+  
   # Check if omit_contrast_in_colnames is correctly setup
   if(omit_contrast_in_colnames & length(.contrasts) > 1){
     warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
@@ -757,7 +759,7 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
   counts = counts[,rownames(metadata),drop=FALSE]
   
   if(quo_is_symbolic(.dispersion))
-    dispersion = .data |> pivot_transcript() |> select(!!feature__$symbol, !!.dispersion) |> deframe()
+    dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe()
   else
     dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
   
diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R
index 6db6d34f..417c5827 100755
--- a/tests/testthat/test-bulk_methods.R
+++ b/tests/testthat/test-bulk_methods.R
@@ -835,18 +835,22 @@ test_that("DESeq2 differential trancript abundance - no object",{
 
 test_that("differential trancript abundance - random effects",{
 
-  input_df |>
+  my_input = 
+    input_df |>
     identify_abundant(a, b, c, factor_of_interest = condition) |>
     mutate(time = time |> stringr::str_replace_all(" ", "_")) |>
-
-    filter(b %in% c("ABCB4" , "ABCB9" , "ACAP1",  "ACHE",   "ACP5" ,  "ADAM28"))|>
+    
+    filter(b %in% c("ABCB4" , "ABCB9" , "ACAP1",  "ACHE",   "ACP5" ,  "ADAM28"))
+  
+  my_input |>
     test_differential_abundance(
       ~ condition + (1 + condition | time),
       .sample = a,
       .transcript = b,
       .abundance = c,
       method = "glmmseq_lme4",
-      action="only"
+      action="only", 
+      cores = 1
     ) |>
     pull(P_condition_adjusted) |>
     head(4) |>
@@ -855,6 +859,32 @@ test_that("differential trancript abundance - random effects",{
       tolerance=1e-3
     )
 
+  # Custom dispersion
+  my_input = 
+    my_input |>
+    left_join(
+      my_input |> pivot_transcript(b) |> mutate(disp_ = 2 ), 
+      by = join_by(b, entrez, .abundant)
+    )
+   
+
+    my_input |>
+    test_differential_abundance(
+      ~ condition + (1 + condition | time),
+      .sample = a,
+      .transcript = b,
+      .abundance = c,
+      method = "glmmseq_lme4",
+      action="only", 
+      cores = 1, 
+      .dispersion = disp_
+    ) |>
+    pull(P_condition_adjusted) |>
+    head(4) |>
+    expect_equal(
+      c(0.1081176, 0.1303558, 0.1303558, 0.1693276),
+      tolerance=1e-3
+    )
 
 })
 
diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
index 453f0496..aa67e9d5 100755
--- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R
+++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
@@ -465,7 +465,8 @@ test_that("differential trancript abundance - random effects SE",{
     #mutate(time = time |> stringr::str_replace_all(" ", "_")) |>
     test_differential_abundance(
       ~ condition + (1 + condition | time),
-      method = "glmmseq_lme4"
+      method = "glmmseq_lme4", 
+      cores = 1
     )
 
  rowData(res)[,"P_condition_adjusted"] |>
@@ -480,22 +481,22 @@ test_that("differential trancript abundance - random effects SE",{
    se_mini |>
    identify_abundant(factor_of_interest = condition)
  
- rowData(se_mini)$disp_ = rnorm(nrow(se_mini))
+ rowData(se_mini)$disp_ = rep(2,nrow(se_mini))
  
  res = 
-   se_mini |>
-   identify_abundant(factor_of_interest = condition) |> 
+   se_mini[1:10,] |>
    #mutate(time = time |> stringr::str_replace_all(" ", "_")) |> 
    test_differential_abundance(
      ~ condition + (1 + condition | time),
      method = "glmmseq_lme4", 
-     dispersion = disp_
+     .dispersion = disp_, 
+     cores = 1
    ) 
  
  rowData(res)[,"P_condition_adjusted"] |> 
    head(4) |> 
    expect_equal(
-     c(0.1441371, 0.1066183, 0.1370748, NA),
+     c(0.1153254, 0.1668555, 0.1668555 ,       NA),
      tolerance=1e-3
    )