diff --git a/_quarto.yml b/_quarto.yml index ffe70f2..4a35e9f 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -29,4 +29,3 @@ format: # cache computations execute: freeze: auto - cache: true diff --git a/notebooks/usecase.qmd b/notebooks/usecase.qmd index 4e09cad..8a2065d 100644 --- a/notebooks/usecase.qmd +++ b/notebooks/usecase.qmd @@ -55,8 +55,21 @@ adata <- read_h5ad("usecase_data/sc_counts_reannotated_with_counts.h5ad") adata ``` +## 3. Subset data -## 3. Compute pseudobulk +Subset to a single small molecule and control for computational efficiency: + +```{python select_sm_celltype} +sm_name = "Belinostat" +control_name = "Dimethyl Sulfoxide" + +adata = adata[ + adata.obs["sm_name"].isin([sm_name, control_name]) +] +``` + + +## 4. Compute pseudobulk ```{python import_pandas} import pandas as pd @@ -97,7 +110,7 @@ Store to disk: pb_adata.write_h5ad("usecase_data/pseudobulk.h5ad") ``` -## 4. Compute DE +## 5. Compute DE ```{r load_pbdata} library(anndata) @@ -118,13 +131,10 @@ Create DESeq dataset: count_data <- t(pb_adata$X) storage.mode(count_data) <- "integer" -# subset to small molecule and control -ix <- pb_adata$obs$sm_name %in% c(sm_name, control_name) - # create dataset dds <- DESeq2::DESeqDataSetFromMatrix( - countData = count_data[, ix], - colData = pb_adata$obs[ix, ], + countData = count_data, + colData = pb_adata$obs, design = ~ sm_name + cell_type + plate_name, ) ```