diff --git a/.github/workflows/run_cell-type-nonETP-ALL-03.yml b/.github/workflows/run_cell-type-nonETP-ALL-03.yml index 1adf05214..7880d3596 100644 --- a/.github/workflows/run_cell-type-nonETP-ALL-03.yml +++ b/.github/workflows/run_cell-type-nonETP-ALL-03.yml @@ -89,4 +89,5 @@ jobs: # run module script(s) here Rscript scripts/00-01_processing_rds.R Rscript scripts/02-03_annotation.R - Rscript scripts/multipanel_plot.R + Rscript scripts/04_multipanel_plot.R + Rscript scripts/05_cluster_evaluation.R diff --git a/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000077-SCPCL000703_splitPlot.png b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000077-SCPCL000703_splitPlot.png new file mode 100644 index 000000000..685182a77 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000077-SCPCL000703_splitPlot.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000082-SCPCL000703_splitPlot.png b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000082-SCPCL000703_splitPlot.png new file mode 100644 index 000000000..1ea03531a Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000082-SCPCL000703_splitPlot.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000704-SCPCL000703_splitPlot.png b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000704-SCPCL000703_splitPlot.png new file mode 100644 index 000000000..5d2bbee49 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000704-SCPCL000703_splitPlot.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000706-SCPCL000703_splitPlot.png b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000706-SCPCL000703_splitPlot.png new file mode 100644 index 000000000..8eda777ce Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000706-SCPCL000703_splitPlot.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000710-SCPCL000703_splitPlot.png b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000710-SCPCL000703_splitPlot.png new file mode 100644 index 000000000..aa1b42242 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/SCPCL000710-SCPCL000703_splitPlot.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000076_blastModuleScore.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000076_blastModuleScore.png new file mode 100644 index 000000000..c17dd4f95 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000076_blastModuleScore.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000076_celltypeVScopykat.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000076_celltypeVScopykat.png new file mode 100644 index 000000000..c6a20075a Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000076_celltypeVScopykat.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000078_blastModuleScore.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000078_blastModuleScore.png new file mode 100644 index 000000000..4f7e0581c Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000078_blastModuleScore.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000078_celltypeVScopykat.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000078_celltypeVScopykat.png new file mode 100644 index 000000000..2a4ace4e0 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000078_celltypeVScopykat.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000079_blastModuleScore.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000079_blastModuleScore.png new file mode 100644 index 000000000..c3c82e1bb Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000079_blastModuleScore.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000079_celltypeVScopykat.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000079_celltypeVScopykat.png new file mode 100644 index 000000000..c4e8beb0d Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000079_celltypeVScopykat.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000080_blastModuleScore.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000080_blastModuleScore.png new file mode 100644 index 000000000..f68b799eb Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000080_blastModuleScore.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000080_celltypeVScopykat.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000080_celltypeVScopykat.png new file mode 100644 index 000000000..bc87e54db Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000080_celltypeVScopykat.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000081_blastModuleScore.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000081_blastModuleScore.png new file mode 100644 index 000000000..eae6b7b59 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000081_blastModuleScore.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000081_celltypeVScopykat.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000081_celltypeVScopykat.png new file mode 100644 index 000000000..3b6385348 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000081_celltypeVScopykat.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000703_blastModuleScore.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000703_blastModuleScore.png new file mode 100644 index 000000000..8312e8808 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000703_blastModuleScore.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000703_celltypeVScopykat.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000703_celltypeVScopykat.png new file mode 100644 index 000000000..d49cf1548 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000703_celltypeVScopykat.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000706_blastModuleScore.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000706_blastModuleScore.png new file mode 100644 index 000000000..a52926f25 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000706_blastModuleScore.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000706_celltypeVScopykat.png b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000706_celltypeVScopykat.png new file mode 100644 index 000000000..c154287ad Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/copykat_exploration/SCPCL000706_celltypeVScopykat.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000077-SCPCL000703.png b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000077-SCPCL000703.png new file mode 100644 index 000000000..62adbb336 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000077-SCPCL000703.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000082-SCPCL000703.png b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000082-SCPCL000703.png new file mode 100644 index 000000000..b8afd806b Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000082-SCPCL000703.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000704-SCPCL000703.png b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000704-SCPCL000703.png new file mode 100644 index 000000000..66b08dcca Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000704-SCPCL000703.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000706-SCPCL000703.png b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000706-SCPCL000703.png new file mode 100644 index 000000000..49484ab8b Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000706-SCPCL000703.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000710-SCPCL000703.png b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000710-SCPCL000703.png new file mode 100644 index 000000000..938a02e7e Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/multipanels_SCPCL000710-SCPCL000703.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000076_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000076_Bcells.png new file mode 100644 index 000000000..e08add940 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000076_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000076_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000076_newBcells.png new file mode 100644 index 000000000..e50ac8303 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000076_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000077_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000077_Bcells.png new file mode 100644 index 000000000..942a8d975 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000077_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000077_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000077_newBcells.png new file mode 100644 index 000000000..00ed4aa07 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000077_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000078_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000078_Bcells.png new file mode 100644 index 000000000..f1f85fd90 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000078_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000078_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000078_newBcells.png new file mode 100644 index 000000000..615d19fd6 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000078_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000079_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000079_Bcells.png new file mode 100644 index 000000000..a9b552ba1 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000079_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000079_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000079_newBcells.png new file mode 100644 index 000000000..5313da728 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000079_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000080_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000080_Bcells.png new file mode 100644 index 000000000..2a2bd77dd Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000080_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000080_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000080_newBcells.png new file mode 100644 index 000000000..e6966704a Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000080_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000081_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000081_Bcells.png new file mode 100644 index 000000000..e00d7964f Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000081_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000081_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000081_newBcells.png new file mode 100644 index 000000000..0e8c05333 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000081_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000703_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000703_Bcells.png new file mode 100644 index 000000000..362e5ca32 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000703_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000703_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000703_newBcells.png new file mode 100644 index 000000000..d93b1c7d9 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000703_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000704_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000704_Bcells.png new file mode 100644 index 000000000..07078cbc7 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000704_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000704_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000704_newBcells.png new file mode 100644 index 000000000..fdb7479f4 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000704_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000706_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000706_Bcells.png new file mode 100644 index 000000000..ae9601cce Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000706_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000706_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000706_newBcells.png new file mode 100644 index 000000000..9b0bac173 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000706_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000710_Bcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000710_Bcells.png new file mode 100644 index 000000000..8a55b0d2e Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000710_Bcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000710_newBcells.png b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000710_newBcells.png new file mode 100644 index 000000000..d6f9286f7 Binary files /dev/null and b/analyses/cell-type-nonETP-ALL-03/plots/sctype_exploration/SCPCL000710_newBcells.png differ diff --git a/analyses/cell-type-nonETP-ALL-03/renv.lock b/analyses/cell-type-nonETP-ALL-03/renv.lock index 607a8b2de..708c6c33b 100644 --- a/analyses/cell-type-nonETP-ALL-03/renv.lock +++ b/analyses/cell-type-nonETP-ALL-03/renv.lock @@ -76,6 +76,41 @@ ], "Hash": "3aec5928ca10897d7a0a1205aae64627" }, + "BiocNeighbors": { + "Package": "BiocNeighbors", + "Version": "1.22.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocParallel", + "Matrix", + "Rcpp", + "RcppHNSW", + "S4Vectors", + "methods", + "stats" + ], + "Hash": "da9f332c88453734623406dcca13ee03" + }, + "BiocParallel": { + "Package": "BiocParallel", + "Version": "1.38.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BH", + "R", + "codetools", + "cpp11", + "futile.logger", + "methods", + "parallel", + "snow", + "stats", + "utils" + ], + "Hash": "7b6e79f86e3d1c23f62c5e2052e848d4" + }, "BiocVersion": { "Package": "BiocVersion", "Version": "3.19.1", @@ -433,6 +468,13 @@ ], "Hash": "c92ba8b9a2c5c9ff600a1062a3b7b727" }, + "RhpcBLASctl": { + "Package": "RhpcBLASctl", + "Version": "0.23-42", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c966ea2957ff75e77afa5c908dfc89e1" + }, "Rtsne": { "Package": "Rtsne", "Version": "0.17", @@ -717,6 +759,25 @@ "Repository": "CRAN", "Hash": "da69e6b6f8feebec0827205aad3fdbd8" }, + "bluster": { + "Package": "bluster", + "Version": "1.14.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocNeighbors", + "BiocParallel", + "Matrix", + "Rcpp", + "S4Vectors", + "cluster", + "igraph", + "methods", + "stats", + "utils" + ], + "Hash": "ed9597168d850071aa9abbbef7be7204" + }, "bslib": { "Package": "bslib", "Version": "0.8.0", @@ -840,6 +901,7 @@ "RemoteSha": "d7d6569ae9e30bf774908301af312f626de4cbd5", "Requirements": [ "MCMCpack", + "R", "RColorBrewer", "cluster", "dlm", @@ -848,7 +910,7 @@ "parallel", "parallelDist" ], - "Hash": "efd05c69dffe1128eb4843f3107eb606" + "Hash": "e356046a6ab19635791f7ce46ecd5991" }, "cowplot": { "Package": "cowplot", @@ -1083,6 +1145,32 @@ ], "Hash": "c2efdd5f0bcd1ea861c2d4e2a883a67d" }, + "forcats": { + "Package": "forcats", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "magrittr", + "rlang", + "tibble" + ], + "Hash": "1a0a9a3d5083d0d573c4214576f1e690" + }, + "formatR": { + "Package": "formatR", + "Version": "1.14", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "63cb26d12517c7863f5abb006c5e0f25" + }, "fs": { "Package": "fs", "Version": "1.6.4", @@ -1094,6 +1182,29 @@ ], "Hash": "15aeb8c27f5ea5161f9f6a641fafd93a" }, + "futile.logger": { + "Package": "futile.logger", + "Version": "1.4.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "futile.options", + "lambda.r", + "utils" + ], + "Hash": "99f0ace8c05ec7d3683d27083c4f1e7e" + }, + "futile.options": { + "Package": "futile.options", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "0d9bf02413ddc2bbe8da9ce369dcdd2b" + }, "future": { "Package": "future", "Version": "1.34.0", @@ -1134,6 +1245,21 @@ ], "Hash": "15e9634c0fcd294799e9b2e929ed1b86" }, + "geometry": { + "Package": "geometry", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "Rcpp", + "RcppProgress", + "linprog", + "lpSolve", + "magic" + ], + "Hash": "b052bd270aeddeca332c20feecfb039d" + }, "ggplot2": { "Package": "ggplot2", "Version": "3.5.1", @@ -1189,6 +1315,19 @@ ], "Hash": "66488692cb8621bc78df1b9b819497a6" }, + "ggsci": { + "Package": "ggsci", + "Version": "3.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "ggplot2", + "grDevices", + "scales" + ], + "Hash": "0c3268cddf4d3a3ce4e7e6330f8e92c8" + }, "globals": { "Package": "globals", "Version": "0.16.3", @@ -1278,6 +1417,27 @@ ], "Hash": "588d091c35389f1f4a9d533c8d709b35" }, + "harmony": { + "Package": "harmony", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "Matrix", + "R", + "Rcpp", + "RcppArmadillo", + "RcppProgress", + "RhpcBLASctl", + "cowplot", + "dplyr", + "ggplot2", + "methods", + "rlang", + "tibble" + ], + "Hash": "1baaf521d42483d6d36d5c2f425e1ad9" + }, "here": { "Package": "here", "Version": "1.0.1", @@ -1475,6 +1635,17 @@ ], "Hash": "b64ec208ac5bc1852b285f665d6368b3" }, + "lambda.r": { + "Package": "lambda.r", + "Version": "1.2.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "formatR" + ], + "Hash": "b1e925c4b9ffeb901bacf812cbe9a6ad" + }, "later": { "Package": "later", "Version": "1.3.2", @@ -1537,6 +1708,17 @@ ], "Hash": "b8552d117e1b808b09a832f589b79035" }, + "linprog": { + "Package": "linprog", + "Version": "0.9-4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "lpSolve" + ], + "Hash": "66e9d4ebd71ddcd6f86a2a9a34f5cdc5" + }, "listenv": { "Package": "listenv", "Version": "0.9.1", @@ -1560,6 +1742,24 @@ ], "Hash": "c6fafa6cccb1e1dfe7f7d122efd6e6a7" }, + "lpSolve": { + "Package": "lpSolve", + "Version": "5.6.21", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "730a90bdc519fb0caff03df11218ddd8" + }, + "magic": { + "Package": "magic", + "Version": "1.6-1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "abind" + ], + "Hash": "1da6217cea8a3ef496258819b80770e1" + }, "magrittr": { "Package": "magrittr", "Version": "2.0.3", @@ -1748,6 +1948,17 @@ ], "Hash": "68a2d681e10cf72f0afa1d84d45380e5" }, + "pdfCluster": { + "Package": "pdfCluster", + "Version": "1.0-4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "geometry", + "methods" + ], + "Hash": "51e3a7a4af0b863e5d380575cbd33cda" + }, "pillar": { "Package": "pillar", "Version": "1.9.0", @@ -1899,6 +2110,30 @@ ], "Hash": "017561f17632c065388b7062da030952" }, + "rOpenScPCA": { + "Package": "rOpenScPCA", + "Version": "0.1.0", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteUsername": "AlexsLemonade", + "RemoteRepo": "OpenScPCA-analysis", + "RemoteSubdir": "packages/rOpenScPCA", + "RemoteRef": "main", + "RemoteSha": "a5c36235eff73530cf496c4d82715e33e79fc90e", + "Requirements": [ + "BiocParallel", + "SingleCellExperiment", + "bluster", + "dplyr", + "methods", + "pdfCluster", + "purrr", + "tibble", + "tidyr" + ], + "Hash": "f320ea9842ac8f8eb77685aa3155751d" + }, "rappdirs": { "Package": "rappdirs", "Version": "0.3.3", @@ -2165,6 +2400,17 @@ ], "Hash": "c956d93f6768a9789edbc13072b70c78" }, + "snow": { + "Package": "snow", + "Version": "0.4-4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "40b74690debd20c57d93d8c246b305d4" + }, "sourcetools": { "Package": "sourcetools", "Version": "0.1.7-1", diff --git a/analyses/cell-type-nonETP-ALL-03/results/README.md b/analyses/cell-type-nonETP-ALL-03/results/README.md index 9829d6637..11080bc8d 100644 --- a/analyses/cell-type-nonETP-ALL-03/results/README.md +++ b/analyses/cell-type-nonETP-ALL-03/results/README.md @@ -12,4 +12,8 @@ These are the generated outputs for each sample in the S3 bucket: - `rds` objects: `s3://researcher-650251722463-us-east-2/cell-type-nonETP-ALL-03/results/rds` - metadata and ScType results: `s3://researcher-650251722463-us-east-2/cell-type-nonETP-ALL-03/results/` - CopyKat results: `s3://researcher-650251722463-us-east-2/cell-type-nonETP-ALL-03/results/copykat_output` -- umap and dot plots: `s3://researcher-650251722463-us-east-2/cell-type-nonETP-ALL-03/plots` (are also found in the repository plots/). +- evaluating cluster separation, stability, and purity: `s3://researcher-650251722463-us-east-2/cell-type-nonETP-ALL-03/results/evalClus` +- umap and dot plots: `s3://researcher-650251722463-us-east-2/cell-type-nonETP-ALL-03/plots` +- violin and stacked bar plots for exploring the results of CopyKat prediction: `s3://researcher-650251722463-us-east-2/cell-type-nonETP-ALL-03/plots/copykat_exploration` + +\*\*All the plots are also found in the repository plots/. diff --git a/analyses/cell-type-nonETP-ALL-03/scripts/02-03_annotation.R b/analyses/cell-type-nonETP-ALL-03/scripts/02-03_annotation.R index d7ba5bedf..b98eae564 100644 --- a/analyses/cell-type-nonETP-ALL-03/scripts/02-03_annotation.R +++ b/analyses/cell-type-nonETP-ALL-03/scripts/02-03_annotation.R @@ -17,48 +17,6 @@ library(ggplot2) # load cell type annotation function source("https://github.com/IanevskiAleksandr/sc-type/raw/6db9eef49f185cf4d79bfec92a20fcf1edcccafb/R/sctype_score_.R") -gene_sets_prepare <- function(path_to_db_file, cell_type){ - cell_markers = read.csv(path_to_db_file, header = T) - cell_markers = cell_markers[cell_markers$tissueType == cell_type,] - cell_markers$ensembl_id_positive_marker = gsub(" ","",cell_markers$ensembl_id_positive_marker); cell_markers$ensembl_id_negative_marker = gsub(" ","",cell_markers$ensembl_id_negative_marker) - - # correct gene symbols from the given DB (up-genes) - cell_markers$ensembl_id_positive_marker = sapply(1:nrow(cell_markers), function(i){ - markers_all = gsub(" ", "", unlist(strsplit(cell_markers$ensembl_id_positive_marker[i],","))) - markers_all = toupper(markers_all[markers_all != "NA" & markers_all != ""]) - markers_all = sort(markers_all) - - if(length(markers_all) > 0){ - suppressMessages({markers_all = unique(na.omit(markers_all))}) #since the markers are provided in Ensembl ID, I removed checkGeneSymbols function here - paste0(markers_all, collapse=",") - } else { - "" - } - }) - - # correct gene symbols from the given DB (down-genes) - cell_markers$ensembl_id_negative_marker = sapply(1:nrow(cell_markers), function(i){ - markers_all = gsub(" ", "", unlist(strsplit(cell_markers$ensembl_id_negative_marker[i],","))) - markers_all = toupper(markers_all[markers_all != "NA" & markers_all != ""]) - markers_all = sort(markers_all) - - if(length(markers_all) > 0){ - suppressMessages({markers_all = unique(na.omit(markers_all))}) #since the markers are provided in Ensembl ID, I removed checkGeneSymbols function here - paste0(markers_all, collapse=",") - } else { - "" - } - }) - - cell_markers$ensembl_id_positive_marker = gsub("///",",",cell_markers$ensembl_id_positive_marker);cell_markers$ensembl_id_positive_marker = gsub(" ","",cell_markers$ensembl_id_positive_marker) - cell_markers$ensembl_id_negative_marker = gsub("///",",",cell_markers$ensembl_id_negative_marker);cell_markers$ensembl_id_negative_marker = gsub(" ","",cell_markers$ensembl_id_negative_marker) - - gs = lapply(1:nrow(cell_markers), function(j) gsub(" ","",unlist(strsplit(toString(cell_markers$ensembl_id_positive_marker[j]),",")))); names(gs) = cell_markers$cellName - gs2 = lapply(1:nrow(cell_markers), function(j) gsub(" ","",unlist(strsplit(toString(cell_markers$ensembl_id_negative_marker[j]),",")))); names(gs2) = cell_markers$cellName - - list(gs_positive = gs, gs_negative = gs2) -} - running_scType <- function(gs_list, annot.obj, assay = "RNA", thres = 4){ # check Seurat object version (scRNA-seq matrix extracted differently in Seurat v4/v5) seurat_package_v5 <- isFALSE('counts' %in% names(attributes(annot.obj[[assay]]))); @@ -85,7 +43,7 @@ running_scType <- function(gs_list, annot.obj, assay = "RNA", thres = 4){ cl_type = sctype_scores[sctype_scores$cluster==cluster_num,]; annot.obj@meta.data$sctype_classification[annot.obj@meta.data$leiden_clusters == cluster_num] = as.character(cl_type$type[1]) } - return (list(annot.obj, cL_results)) + return (list(annot.obj, cL_results, es.max)) } plot_modulescore <- function(gs_list, seu, sample.name){ @@ -99,6 +57,7 @@ plot_modulescore <- function(gs_list, seu, sample.name){ ggtitle(paste0(sample.name,": cell type module score")) + scale_color_gradient2(low = scales::muted("blue"), mid = "whitesmoke", high = scales::muted("red"), midpoint = 0) ggsave(file.path(out_loc,"plots",paste0(sample.name,"_features_dotplot.png")), width = 7, height = 7, bg = "white", dpi = 150) + return (seu) } run_annot <- function(ind.lib){ @@ -112,10 +71,12 @@ run_annot <- function(ind.lib){ res <- running_scType(gs_list, seu) seu <- res[[1]] #res[[2]] - A table with top 10 cell types with the highest scores for each cluster + #res[[3]] - A cell type x cells table tabulating the ScType score of each cell for all cell types write.table(res[[2]], file = file.path(out_loc,"results",paste0(ind.lib,"_sctype_top10_celltypes_perCluster.txt")), row.names = F, sep = "\t", quote = F) + write.table(t(res[[3]]), file = file.path(out_loc,"results",paste0(ind.lib,"_sctype_scores.txt")), sep = "\t", quote = F) - plot_modulescore(gs_list, seu, ind.lib) + seu <- plot_modulescore(gs_list, seu, ind.lib) #using copykat for tumor cells identification norm.cells <- colnames(seu)[which(seu$sctype_classification=="B")] @@ -154,6 +115,8 @@ metadata <- read.table(file.path(data_loc,"single_cell_metadata.tsv"), sep = "\t metadata <- metadata[which(metadata$scpca_project_id == projectID & metadata$diagnosis == "Non-early T-cell precursor T-cell acute lymphoblastic leukemia"), ] libraryID <- metadata$scpca_library_id + +source(file.path(out_loc, "scripts/util/gene-set-functions.R")) # DB file db <- file.path(out_loc,"Azimuth_BM_level1.csv") tissue <- "Immune system" diff --git a/analyses/cell-type-nonETP-ALL-03/scripts/multipanel_plot.R b/analyses/cell-type-nonETP-ALL-03/scripts/04_multipanel_plot.R similarity index 100% rename from analyses/cell-type-nonETP-ALL-03/scripts/multipanel_plot.R rename to analyses/cell-type-nonETP-ALL-03/scripts/04_multipanel_plot.R diff --git a/analyses/cell-type-nonETP-ALL-03/scripts/05_cluster_evaluation.R b/analyses/cell-type-nonETP-ALL-03/scripts/05_cluster_evaluation.R new file mode 100644 index 000000000..11c31a42b --- /dev/null +++ b/analyses/cell-type-nonETP-ALL-03/scripts/05_cluster_evaluation.R @@ -0,0 +1,47 @@ +#!/usr/bin/env Rscript + +## Calculates silhouette score and purity for each cluster and evalutes their stability, +## using the functions available in evaluate-cluster.R (on OpenscPCA portal) +## But this script is calling evaluate-cluster.R in the same directory (not sure how to call from OpenscPCA portal) + +library(Seurat) +library(dplyr) + +run_eval <- function(ind.lib){ + seu <- readRDS(file.path(out_loc,"results/rds",paste0(ind.lib,".rds"))) + clusID.df <- data.frame(FetchData(seu, vars = "leiden_clusters"))|> tibble::rownames_to_column(var = "cell_id") + colnames(clusID.df)[2] <- "cluster" + cluster_df1 <- rOpenScPCA::calculate_silhouette(x = seu, cluster_df = clusID.df, pc_name = "Xpca_") + cluster_df2 <- rOpenScPCA::calculate_purity(x = seu, cluster_df = clusID.df, pc_name = "Xpca_") + final_df <- merge(cluster_df1, cluster_df2, by = c("cell_id","cluster")) + perClus_df <- final_df %>% group_by(cluster) %>% + summarise(avgSil = mean(silhouette_width), avgPur = mean(purity)) %>% + data.frame() + stability_df <- rOpenScPCA::calculate_stability(x = seu, clusters = clusID.df$cluster, + pc_name = "Xpca_",algorithm = "leiden", + resolution = 1.0, objective_function = "modularity", + seed = 10) + write.table(final_df, sep = "\t", row.names = F, quote = F, + file = file.path(out_loc,"results/evalClus/",paste0(ind.lib,"_sil-purity_perCell.txt"))) + write.table(stability_df, sep = "\t", row.names = F, quote = F, + file = file.path(out_loc,"results/evalClus/",paste0(ind.lib,"_stability.txt"))) + write.table(perClus_df, sep = "\t", row.names = F, quote = F, + file = file.path(out_loc,"results/evalClus/",paste0(ind.lib,"_avgSil-purity_perClus.txt"))) +} + +project_root <- rprojroot::find_root(rprojroot::is_git_root) +projectID <- "SCPCP000003" +out_loc <- file.path(project_root, "analyses/cell-type-nonETP-ALL-03") +data_loc <- file.path(project_root, "data/current",projectID) +dir.create(file.path(out_loc, "results/evalClus"), showWarnings = FALSE) + +#loading functions for evaluating clusters +#source(file.path(out_loc,"scripts/evaluate-clusters.R")) + +metadata <- read.table(file.path(data_loc,"single_cell_metadata.tsv"), sep = "\t", header = T) +metadata <- metadata[which(metadata$scpca_project_id == projectID & + metadata$diagnosis == "Non-early T-cell precursor T-cell acute lymphoblastic leukemia"), ] +libraryID <- metadata$scpca_library_id + +purrr::walk(libraryID, run_eval) + diff --git a/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/copykat_exploration.R b/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/copykat_exploration.R new file mode 100644 index 000000000..89ef14248 --- /dev/null +++ b/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/copykat_exploration.R @@ -0,0 +1,63 @@ +#!/usr/bin/env Rscript + +#This script explores the results of CopyKat prediction with respective of cell types and blast module scores + +library(Seurat) +library(ggplot2) +library(dplyr) + +copykatInterpret <- function(annot.obj, library.id, ct.colors){ + tryCatch({ + exprs <- data.frame(FetchData(annot.obj, vars = c("sctype_classification","copykat.pred","lowConfidence_annot"))) + df <- exprs %>% + group_by(copykat.pred, sctype_classification) %>% + count(name = "Frequency") + total_df <- df %>% + group_by(copykat.pred) %>% + summarise(Total = sum(Frequency)) + p1 <- ggplot() + + geom_bar(data = df, aes(x = copykat.pred, y = Frequency, fill = sctype_classification), width = 0.5, stat = "identity", position = "fill") + + geom_text(data = total_df, aes(y = 100, x = copykat.pred, label = Total), size = 4, position = position_fill(vjust = 1.02)) + + scale_fill_manual(values = ct_color) + + df <- exprs %>% + dplyr::group_by(copykat.pred, lowConfidence_annot) %>% + dplyr::count(name = "Frequency") + p2 <- ggplot() + + geom_bar(data = df, aes(x = copykat.pred, y = Frequency, fill = lowConfidence_annot), width = 0.5, stat = "identity", position = "fill") + + geom_text(data = total_df, aes(y = 100, x = copykat.pred, label = Total), size = 4, position = position_fill(vjust = 1.02)) + + scale_fill_manual(values = ct_color) + cowplot::plot_grid(plotlist = list(p1,p2), nrow = 1) + + cowplot::draw_figure_label(library.id, position = "top", size = 14, fontface = "bold") + ggsave(file.path(out_loc,"plots/copykat_exploration",paste0(library.id,"_celltypeVScopykat.png")), width = 10, height = 5, bg = "white", dpi = 150) + + ### plotting blast module scores + Idents(annot.obj) <- factor(annot.obj$copykat.pred, levels = c("aneuploid","diploid","not.defined")) + VlnPlot(annot.obj, features = "Blast_Features1") + ggtitle(paste0(library.id,": Blast module score")) + NoLegend() + ggsave(file.path(out_loc,"plots/copykat_exploration",paste0(library.id,"_blastModuleScore.png")), width = 6, height = 6, bg = "white", dpi = 150) + }, error=function(e){}) +} + +project_root <- rprojroot::find_root(rprojroot::is_git_root) +projectID <- "SCPCP000003" +out_loc <- file.path(project_root, "analyses/cell-type-nonETP-ALL-03") +data_loc <- file.path(project_root, "data/current",projectID) +dir.create(file.path(out_loc, "plots/copykat_exploration"), showWarnings = FALSE) + +metadata <- read.table(file.path(data_loc,"single_cell_metadata.tsv"), sep = "\t", header = T) +metadata <- metadata[which(metadata$scpca_project_id == projectID & + metadata$diagnosis == "Non-early T-cell precursor T-cell acute lymphoblastic leukemia"), ] +libraryID <- metadata$scpca_library_id + +ct_color <- c("darkorchid","skyblue2","dodgerblue2","gold","beige","sienna1","green4","navy", + "chocolate4","red","darkred","#6A3D9A","maroon","yellow4","grey35","black","lightpink","grey80") +names(ct_color) <- c("B","CD4 T","CD8 T","DC","HSPC","Mono","NK","Other T","Macrophage", + "Early Eryth","Late Eryth","Plasma","Platelet","Stromal","Blast","Cancer","Pre Eryth","Unknown") + +seu.list <- list() +for (lib_iter in 1:length(libraryID)){ + seu.list[[lib_iter]] <- readRDS(file.path(out_loc,"results/rds",paste0(libraryID[lib_iter],".rds"))) + names(seu.list)[lib_iter] <- libraryID[lib_iter] +} + +purrr::walk2(seu.list, names(seu.list), ~ copykatInterpret (annot.obj = .x, library.id = .y, ct.colors = ct_color)) diff --git a/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/merging.R b/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/merging.R new file mode 100644 index 000000000..a61cf8590 --- /dev/null +++ b/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/merging.R @@ -0,0 +1,67 @@ +#!/usr/bin/env Rscript + +## This script merges sample without B cells with SCPCL000710 (the best annotated sample) +## and corrects the batch effect with Harmony to see if there are any cells from the sample +## without B cells cluster with SCPCL000710, deducing their tumor states. + +library(Seurat) +library(ggplot2) +library(dplyr) + +merging <- function(ind.lib, norm.lib = "SCPCL000703", num_PC = 20, res = 0.8){ + seu.list <- list() + for (lib in c(ind.lib,norm.lib)){ + seu.list[[lib]] <- readRDS(file.path(out_loc,"results/rds",paste0(lib,".rds"))) + seu.list[[lib]]$libraryID <- lib + } + sample.combined <- merge(seu.list[[ind.lib]], y = seu.list[[norm.lib]], + add.cell.ids = c(ind.lib, norm.lib)) + sample.combined <- NormalizeData(sample.combined, verbose = FALSE) #LogNormalize + + ## adding module score again because re-normalize + gs_list <- gene_sets_prepare(db, tissue) #prepare gene sets + for (i in 1:length(gs_list$gs_positive)){ + sample.combined <- AddModuleScore(object = sample.combined, name = paste0(gsub(" ","",names(gs_list$gs_positive[i])),"_Features"), + features = list(gs_list$gs_positive[[i]])) + } + final.obj <- sample.combined #avoid saving scale.data slot + + sample.combined <- FindVariableFeatures(sample.combined, selection.method = "vst", nfeatures = 2000, verbose = FALSE) + sample.combined <- ScaleData(sample.combined, features = rownames(sample.combined)) + sample.combined <- RunPCA(sample.combined, features = VariableFeatures(object = sample.combined)) + + set.seed(42) + sample.combined <- harmony::RunHarmony(sample.combined, "libraryID", plot_convergence = TRUE, nclust = 50, max_iter = 10, early_stop = T) + sample.combined <- sample.combined %>% + RunUMAP(reduction = "harmony", dims = 1:num_PC) %>% + FindNeighbors(reduction = "harmony", dims = 1:num_PC) %>% + FindClusters(resolution = res) %>% + identity() + final.obj$seurat_clusters <- sample.combined$seurat_clusters + final.obj[['pca']] <- CreateDimReducObject(embeddings = sample.combined@reductions[["pca"]]@cell.embeddings, key = "PCA_", global = F, assay = "RNA") + final.obj@reductions[["pca"]]@feature.loadings <- sample.combined@reductions[["pca"]]@feature.loadings + final.obj@reductions[["pca"]]@stdev <- sample.combined@reductions[["pca"]]@stdev + final.obj[['umap']] <- CreateDimReducObject(embeddings = sample.combined@reductions[["umap"]]@cell.embeddings, key = "UMAP_", global = T, assay = "RNA") + final.obj[['harmony']] <- CreateDimReducObject(embeddings = sample.combined@reductions[["harmony"]]@cell.embeddings, key = "harmony_", global = F, assay = "RNA") + final.obj@reductions[["harmony"]]@feature.loadings <- sample.combined@reductions[["harmony"]]@feature.loadings + final.obj@reductions[["harmony"]]@stdev <- sample.combined@reductions[["harmony"]]@stdev + + saveRDS(final.obj, file.path(out_loc,"results/rds",paste0(ind.lib,"-",norm.lib,".rds"))) +} + +project_root <- rprojroot::find_root(rprojroot::is_git_root) +projectID <- "SCPCP000003" +out_loc <- file.path(project_root, "analyses/cell-type-nonETP-ALL-03") +data_loc <- file.path(project_root, "data/current",projectID) + +source(file.path(out_loc, "scripts/util/gene-set-functions.R")) +db <- file.path(out_loc,"Azimuth_BM_level1.csv") +tissue <- "Immune system" +metadata <- read.table(file.path(data_loc,"single_cell_metadata.tsv"), sep = "\t", header = T) +metadata <- metadata[which(metadata$scpca_project_id == projectID & + metadata$diagnosis == "Non-early T-cell precursor T-cell acute lymphoblastic leukemia"), ] +libraryID <- metadata$scpca_library_id + +libraryID <- libraryID[c(2,3,9,10,11)] +purrr::walk(libraryID, merging) + diff --git a/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/merging.multipanel_plot.R b/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/merging.multipanel_plot.R new file mode 100644 index 000000000..d5d6fb061 --- /dev/null +++ b/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/merging.multipanel_plot.R @@ -0,0 +1,71 @@ +#!/usr/bin/env Rscript + +#This script combines multiple UMAP plots from an individual library into one multi-panel plot + +library(Seurat) +library(ggplot2) + +multiplot <- function(annot.obj, library.id, colors, n.row = 2, + variables.to.plot = c("seurat_clusters","sctype_classification","libraryID","copykat.pred")){ + plot.list <- list() + for (plot.type in variables.to.plot){ + if (plot.type == "sctype_classification"){ + clrs <- colors[[1]] + } else if (plot.type == "libraryID"){ + clrs <- colors[[2]] + } else{ + clrs <- NULL + } + + plot.list[[plot.type]] <- DimPlot(annot.obj, reduction = "umap", group.by = plot.type, + label = T, cols = clrs, repel = T) + NoLegend() + } + cowplot::plot_grid(plotlist = plot.list, nrow = n.row) + + cowplot::draw_figure_label(library.id, position = "top", size = 18, fontface = "bold") + ggsave(file.path(out_loc,"plots",paste0("multipanels_",library.id,".png")), width = 12, height = 12, bg = "white", dpi = 150) +} + +multi_splitPlot <- function(annot.obj, library.id, colors){ + p1 <- DimPlot(annot.obj, reduction = "umap", group.by = "leiden_clusters", + split.by = "libraryID", cols = colors[[2]]) + p2 <- DimPlot(annot.obj, reduction = "umap", group.by = "sctype_classification", + split.by = "libraryID", cols = colors[[1]]) + cowplot::plot_grid(plotlist = list(p1,p2), nrow = 2) + ggsave(file.path(out_loc,"plots",paste0(library.id,"_splitPlot.png")), width = 12, height = 12, bg = "white", dpi = 150) +} + +project_root <- rprojroot::find_root(rprojroot::is_git_root) +projectID <- "SCPCP000003" +out_loc <- file.path(project_root, "analyses/cell-type-nonETP-ALL-03") +data_loc <- file.path(project_root, "data/current",projectID) + +metadata <- read.table(file.path(data_loc,"single_cell_metadata.tsv"), sep = "\t", header = T) +metadata <- metadata[which(metadata$scpca_project_id == projectID & + metadata$diagnosis == "Non-early T-cell precursor T-cell acute lymphoblastic leukemia"), ] +libraryID <- metadata$scpca_library_id + +ct_color <- c("darkorchid","skyblue2","dodgerblue2","gold","beige","sienna1","green4","navy", + "chocolate4","red","darkred","#6A3D9A","maroon","yellow4","grey35","black","lightpink","grey80") +names(ct_color) <- c("B","CD4 T","CD8 T","DC","HSPC","Mono","NK","Other T","Macrophage", + "Early Eryth","Late Eryth","Plasma","Platelet","Stromal","Blast","Cancer","Pre Eryth","Unknown") +lib_color <- c(ggsci::pal_jco("default",alpha = 0.4)(10),"purple") +names(lib_color) <- sort(libraryID) +c30 <- c("dodgerblue2", "#E31A1C","green4","#6A3D9A", "#FF7F00","orchid1", "gold1","skyblue2", "#FB9A99", # lt pink + "palegreen2","#CAB2D6", # lt purple + "darkorange4", "brown","#FDBF6F", # lt orange + "gray70", "khaki2","maroon", "black", "deeppink1", "blue1", "steelblue4","darkturquoise", + "green1", "yellow4", "yellow3", + "beige","cyan","darkgreen","navy","darkorchid") + +merged.libraryID <- c("SCPCL000077-SCPCL000703","SCPCL000082-SCPCL000703","SCPCL000704-SCPCL000703", + "SCPCL000706-SCPCL000703","SCPCL000710-SCPCL000703") +seu.list <- list() +for (lib_iter in 1:length(merged.libraryID)){ + seu.list[[lib_iter]] <- readRDS(file.path(out_loc,"results/rds",paste0(merged.libraryID[lib_iter],".rds"))) + names(seu.list)[lib_iter] <- merged.libraryID[lib_iter] +} + +purrr::walk2(seu.list, names(seu.list), + ~ multiplot(annot.obj = .x, library.id = .y, colors = list(ct_color, lib_color))) +purrr::walk2(seu.list, names(seu.list), + ~ multi_splitPlot(annot.obj = .x, library.id = .y, colors = list(ct_color, c30))) diff --git a/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/sctype_exploration.R b/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/sctype_exploration.R new file mode 100644 index 000000000..7bf582d6b --- /dev/null +++ b/analyses/cell-type-nonETP-ALL-03/scripts/exploratory-analyses/sctype_exploration.R @@ -0,0 +1,120 @@ +#!/usr/bin/env Rscript + +## This script investigates how solid are the B cells annotation, by checking the ScType score and the purity of that cluster + +library(ggridges) +library(ggplot2) +library(dplyr) +library(Seurat) + +Bcell_check <- function(ind.lib, methods = c("ScType","SingleR","CellAssign"), + variables.to.plot = c("sctype_classification","singler_celltype_annotation","cellassign_celltype_annotation")){ + seu <- readRDS(file.path(out_loc,"results/rds",paste0(ind.lib,".rds"))) + sctype.score <- read.table(file.path(out_loc,"results",paste0(ind.lib,"_sctype_scores.txt")), + sep = "\t", header = T) + eval.clus <- read.table(file.path(out_loc,"results/evalClus/",paste0(ind.lib,"_sil-purity_perCell.txt")), + sep = "\t", header = T) + + plot.list <- list() + for (var_iter in 1:length(variables.to.plot)){ + plot.type <- variables.to.plot[var_iter] + if (plot.type == "sctype_classification"){ + Bcell.names <- colnames(seu)[which(seu$sctype_classification=="B")] + } else{ + celltype <- unique(seu@meta.data[[plot.type]]) + seu_df <- data.frame(FetchData(seu, vars = plot.type))|> tibble::rownames_to_column(var = "cell_id") + seu_df <- seu_df[which(seu_df[[plot.type]] %in% celltype[grep("B cell",celltype)]),] + Bcell.names <- seu_df$cell_id + } + + if (length(Bcell.names) == 0){next} + df <- sctype.score[match(Bcell.names, rownames(sctype.score)),] %>% + tidyr::pivot_longer(cols = colnames(sctype.score), names_to = "celltype", values_to = "ScType.score") + df$celltype <- gsub("\\."," ", df$celltype) + p1 <- ggplot(df, aes(x = ScType.score, y = forcats::fct_reorder(celltype,ScType.score), fill = celltype)) + + geom_density_ridges() + theme_ridges() + + theme(legend.position = "none", axis.title.x = element_text(hjust=0.5), axis.title.y = element_text(vjust=0.5)) + + scale_fill_manual(values = ct_color) + xlab ("ScType score") + + ylab(expr(bold(!!methods[var_iter])*~"("*!!length(Bcell.names)*")")) + + plot.df <- data.frame(cell_id=Bcell.names, + purity=eval.clus$purity[match(Bcell.names, eval.clus$cell_id)], + sctypeScore=sctype.score$B[match(Bcell.names, rownames(sctype.score))], + leidenCluster=as.factor(seu$leiden_clusters[match(Bcell.names,colnames(seu))])) + p2 <- ggplot(plot.df, aes(x = purity, y = sctypeScore, color = leidenCluster)) + + geom_point(size = 0.5) + theme_classic() + ylab("B cell ScType score") + xlim(0,1) + + plot.list <- c(plot.list, list(p1, p2)) + } + if (length(plot.list) == 0){return()} + cowplot::plot_grid(plotlist = plot.list, nrow = 3) + + patchwork::plot_annotation(title = paste0(ind.lib,": B cells identified in different methods")) & + theme(plot.title = element_text(hjust = 0.5, face="bold")) + ggsave(file.path(out_loc,"plots/sctype_exploration",paste0(ind.lib,"_Bcells.png")), + width = 10, height = 15, bg = "white", dpi = 150) +} + +#trying to find which cells in the annotated B from ScType are indeed B cells, by looking at the B cell ScType score +#using the 99 percentile of non-B ScType score as cutoff +plot_Bscore <- function(ind.lib){ + seu <- readRDS(file.path(out_loc,"results/rds",paste0(ind.lib,".rds"))) + sctype.score <- read.table(file.path(out_loc,"results",paste0(ind.lib,"_sctype_scores.txt")), + sep = "\t", header = T) + #shown from the B cells annotated by SingleR and dotplot (B_Features1) + special.lib <- c("SCPCL000077","SCPCL000704","SCPCL000710") + special.clus <- c(12,5,7) + if (ind.lib %in% special.lib){ + Bcell.names <- colnames(seu)[which(seu$leiden_clusters==special.clus[match(ind.lib,special.lib)])] + }else if (ind.lib == "SCPCL000082"){ + return() + }else{ + Bcell.names <- colnames(seu)[which(seu$sctype_classification=="B")] + } + + df <- sctype.score[match(Bcell.names, rownames(sctype.score)),] + nonB.df <- df[,which(!colnames(df) %in% "B")] + cutoff <- quantile(unlist(nonB.df, use.names = F), probs = 0.99) #99 percentile of non-B ScType score for annotated B + + seu$B_SctypeScore <- sctype.score$B[match(rownames(sctype.score),colnames(seu))] + p1 <- FeaturePlot(seu, features = "B_SctypeScore") + + scale_color_gradient2(low = "blue", mid = "whitesmoke", high = "red", midpoint = cutoff) + + new.B <- colnames(seu)[which(seu$B_SctypeScore > cutoff)] + new.df <- sctype.score[match(new.B, rownames(sctype.score)),] %>% + tidyr::pivot_longer(cols = colnames(sctype.score), names_to = "celltype", values_to = "ScType.score") + new.df$celltype <- gsub("\\."," ", new.df$celltype) + p2 <- ggplot(new.df, aes(x = ScType.score, y = forcats::fct_reorder(celltype,ScType.score), fill = celltype)) + + geom_density_ridges() + theme_ridges() + + theme(legend.position = "none", axis.title.x = element_text(hjust=0.5), axis.title.y = element_text(vjust=0.5)) + + scale_fill_manual(values = ct_color) + xlab ("ScType score") + + ylab(expr(bold(ScType)*~"("*!!length(new.B)*")")) + p2 + p1 + patchwork::plot_annotation(title = paste0(ind.lib,": new B cells by 99 percentile cutoff of non-B ScType score")) & + theme(plot.title = element_text(hjust = 0.5, face="bold")) + ggsave(file.path(out_loc,"plots/sctype_exploration",paste0(ind.lib,"_newBcells.png")), + width = 10, height = 5, bg = "white", dpi = 150) + + #writing annotation file for normal vs unknown cells + annot.df <- data.frame(FetchData(seu, vars = "sctype_classification")) + annot.df$sctype_classification[match(new.B, rownames(annot.df))] <- "new B" + write.table(annot.df, file = file.path(out_loc,"results",paste0(ind.lib,"_newB-normal-annotation.txt")), + sep = "\t", quote = F, col.names = F) +} + +project_root <- rprojroot::find_root(rprojroot::is_git_root) +projectID <- "SCPCP000003" +out_loc <- file.path(project_root, "analyses/cell-type-nonETP-ALL-03") +data_loc <- file.path(project_root, "data/current",projectID) +dir.create(file.path(out_loc, "plots/sctype_exploration"), showWarnings = FALSE) + +metadata <- read.table(file.path(data_loc,"single_cell_metadata.tsv"), sep = "\t", header = T) +metadata <- metadata[which(metadata$scpca_project_id == projectID & + metadata$diagnosis == "Non-early T-cell precursor T-cell acute lymphoblastic leukemia"), ] +libraryID <- metadata$scpca_library_id + +ct_color <- c("darkorchid","skyblue2","dodgerblue2","gold","beige","sienna1","green4","navy", + "chocolate4","red","darkred","#6A3D9A","maroon","yellow4","grey35","black","lightpink","grey80") +names(ct_color) <- c("B","CD4 T","CD8 T","DC","HSPC","Mono","NK","Other T","Macrophage", + "Early Eryth","Late Eryth","Plasma","Platelet","Stromal","Blast","Cancer","Pre Eryth","Unknown") + +purrr::walk(libraryID, Bcell_check) +purrr::walk(libraryID, plot_Bscore) diff --git a/analyses/cell-type-nonETP-ALL-03/scripts/util/gene-set-functions.R b/analyses/cell-type-nonETP-ALL-03/scripts/util/gene-set-functions.R new file mode 100644 index 000000000..756128789 --- /dev/null +++ b/analyses/cell-type-nonETP-ALL-03/scripts/util/gene-set-functions.R @@ -0,0 +1,45 @@ +#!/usr/bin/env Rscript + +#This script prepares the gene set for each cell type, extracting them from the marker list + +gene_sets_prepare <- function(path_to_db_file, cell_type){ + cell_markers = read.csv(path_to_db_file, header = T) + cell_markers = cell_markers[cell_markers$tissueType == cell_type,] + cell_markers$ensembl_id_positive_marker = gsub(" ","",cell_markers$ensembl_id_positive_marker); cell_markers$ensembl_id_negative_marker = gsub(" ","",cell_markers$ensembl_id_negative_marker) + + # correct gene symbols from the given DB (up-genes) + cell_markers$ensembl_id_positive_marker = sapply(1:nrow(cell_markers), function(i){ + markers_all = gsub(" ", "", unlist(strsplit(cell_markers$ensembl_id_positive_marker[i],","))) + markers_all = toupper(markers_all[markers_all != "NA" & markers_all != ""]) + markers_all = sort(markers_all) + + if(length(markers_all) > 0){ + suppressMessages({markers_all = unique(na.omit(markers_all))}) #since the markers are provided in Ensembl ID, I removed checkGeneSymbols function here + paste0(markers_all, collapse=",") + } else { + "" + } + }) + + # correct gene symbols from the given DB (down-genes) + cell_markers$ensembl_id_negative_marker = sapply(1:nrow(cell_markers), function(i){ + markers_all = gsub(" ", "", unlist(strsplit(cell_markers$ensembl_id_negative_marker[i],","))) + markers_all = toupper(markers_all[markers_all != "NA" & markers_all != ""]) + markers_all = sort(markers_all) + + if(length(markers_all) > 0){ + suppressMessages({markers_all = unique(na.omit(markers_all))}) #since the markers are provided in Ensembl ID, I removed checkGeneSymbols function here + paste0(markers_all, collapse=",") + } else { + "" + } + }) + + cell_markers$ensembl_id_positive_marker = gsub("///",",",cell_markers$ensembl_id_positive_marker);cell_markers$ensembl_id_positive_marker = gsub(" ","",cell_markers$ensembl_id_positive_marker) + cell_markers$ensembl_id_negative_marker = gsub("///",",",cell_markers$ensembl_id_negative_marker);cell_markers$ensembl_id_negative_marker = gsub(" ","",cell_markers$ensembl_id_negative_marker) + + gs = lapply(1:nrow(cell_markers), function(j) gsub(" ","",unlist(strsplit(toString(cell_markers$ensembl_id_positive_marker[j]),",")))); names(gs) = cell_markers$cellName + gs2 = lapply(1:nrow(cell_markers), function(j) gsub(" ","",unlist(strsplit(toString(cell_markers$ensembl_id_negative_marker[j]),",")))); names(gs2) = cell_markers$cellName + + list(gs_positive = gs, gs_negative = gs2) +}