diff --git a/.github/workflows/docker_cell-type-wilms-tumor-14.yml b/.github/workflows/docker_cell-type-wilms-tumor-14.yml index 51c992e57..0553cc4c5 100644 --- a/.github/workflows/docker_cell-type-wilms-tumor-14.yml +++ b/.github/workflows/docker_cell-type-wilms-tumor-14.yml @@ -13,22 +13,22 @@ concurrency: cancel-in-progress: true on: - # pull_request: - # branches: - # - main - # paths: - # - "analyses/cell-type-wilms-tumor-14/Dockerfile" - # - "analyses/cell-type-wilms-tumor-14/.dockerignore" - # - "analyses/cell-type-wilms-tumor-14/renv.lock" - # - "analyses/cell-type-wilms-tumor-14/conda-lock.yml" - # push: - # branches: - # - main - # paths: - # - "analyses/cell-type-wilms-tumor-14/Dockerfile" - # - "analyses/cell-type-wilms-tumor-14/.dockerignore" - # - "analyses/cell-type-wilms-tumor-14/renv.lock" - # - "analyses/cell-type-wilms-tumor-14/conda-lock.yml" + pull_request: + branches: + - main + paths: + - "analyses/cell-type-wilms-tumor-14/Dockerfile" + - "analyses/cell-type-wilms-tumor-14/.dockerignore" + - "analyses/cell-type-wilms-tumor-14/renv.lock" + - "analyses/cell-type-wilms-tumor-14/conda-lock.yml" + push: + branches: + - main + paths: + - "analyses/cell-type-wilms-tumor-14/Dockerfile" + - "analyses/cell-type-wilms-tumor-14/.dockerignore" + - "analyses/cell-type-wilms-tumor-14/renv.lock" + - "analyses/cell-type-wilms-tumor-14/conda-lock.yml" workflow_dispatch: inputs: push-ecr: diff --git a/analyses/cell-type-dsrct/.Rprofile b/analyses/cell-type-dsrct/.Rprofile new file mode 100644 index 000000000..45301dbc7 --- /dev/null +++ b/analyses/cell-type-dsrct/.Rprofile @@ -0,0 +1,4 @@ +# Don't activate renv in an OpenScPCA docker image +if(Sys.getenv('OPENSCPCA_DOCKER') != 'TRUE'){ + source('renv/activate.R') +} diff --git a/analyses/cell-type-dsrct/cell-type-dsrct.Rproj b/analyses/cell-type-dsrct/cell-type-dsrct.Rproj new file mode 100644 index 000000000..efd491bc5 --- /dev/null +++ b/analyses/cell-type-dsrct/cell-type-dsrct.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/analyses/cell-type-dsrct/exploratory_analysis/01-marker-gene-tumor-classification.Rmd b/analyses/cell-type-dsrct/exploratory_analysis/01-marker-gene-tumor-classification.Rmd new file mode 100644 index 000000000..821e3c781 --- /dev/null +++ b/analyses/cell-type-dsrct/exploratory_analysis/01-marker-gene-tumor-classification.Rmd @@ -0,0 +1,437 @@ +--- +title: "EDA of DSRCT Samples" +author: Danh Truong +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_depth: 3 +--- + + +## Introduction + +This notebook looks explores the data from the DSRCT sample set, `SCPCP000013`. +We then see if we can use expression of DSRCT-specific genes to manually classify tumor and normal cells. +The main goal of this notebook is only to identify tumor cells, identification and labeling of the other cells is a separate question that we do not answer here. + +- First we look at expression of each of the DSRCT-specific genes across all cells. +- Then we use a z-transform prior to summing expression of all DSRCT-specific genes +Cells with a z-score for any DSRCT-specific genes > 0 are classified as tumor cells. +- We anticipate that normal cells will not express DSRCT-specific genes. + +## Setup +```{r packages} +suppressPackageStartupMessages({ + # load required packages + library(SingleCellExperiment) + library(ggplot2) +}) + +# Set default ggplot theme +theme_set( + theme_bw() +) +``` + + +```{r base paths} +# The base path for the OpenScPCA repository, found by its (hidden) .git directory +repository_base <- rprojroot::find_root(rprojroot::is_git_root) + +# The current data directory, found within the repository base directory +data_dir <- file.path(repository_base, "data", "current") +#sample_dir <- file.path(data_dir, "SCPCP000013", params$sample_id) + +# The path to this module +module_base <- file.path(repository_base, "analyses", "cell-type-DSRCT") +``` + + +```{r} +metadata_file <- file.path(data_dir, "SCPCP000013", "single_cell_metadata.tsv") +metadata <- read.csv(metadata_file, sep = '\t') +metadata_DSRCT <- dplyr::filter(metadata, diagnosis == 'Desmoplastic small round cell tumor') + +``` + + +```{r paths} +sce_file_list <- file.path( + data_dir, "SCPCP000013", + metadata_DSRCT$scpca_sample_id, + paste0(metadata_DSRCT$scpca_library_id, "_processed.rds") +) + +marker_genes <- file.path(module_base, "references", "tumor-marker-genes.tsv") + +# output tumor/normal classifications +results_dir <- file.path(module_base, "results", "marker_gene_analysis") +fs::dir_create(results_dir) + +#classifications_filename <- glue::glue("{params$library_id}_tumor_normal_classifications.tsv") +#output_classifications_file <- file.path(results_dir, classifications_filename) +``` + +Read in each data as a separate object `SingleCellExperiment` in the list. +```{r} +sce_list <- lapply(sce_file_list, readr::read_rds) + +#adding the sample id to the name of each SingleCellExperiment +names(sce_list) <- metadata_DSRCT$scpca_library_id + +# read in marker genes table +marker_genes_df <- readr::read_tsv(marker_genes) |> + # account for genes being from multiple sources + dplyr::select(cell_type, ensembl_gene_id, gene_symbol) |> + dplyr::distinct() + +marker_genes_df + +marker_genes <- marker_genes_df |> + dplyr::filter(cell_type == "tumor") |> + dplyr::pull(ensembl_gene_id) +``` + + +## Analysis content + +### Explore marker gene expression + +The first thing we do here is just create a faceted UMAP showing the expression of each marker gene for tumor cells. + +```{r} +umap_df_list <- lapply(sce_list, function(x) { +# pull out the UMAP coordinates and genes and make a data frame to use for plotting + umap_df <- x |> + scuttle::makePerCellDF(features = marker_genes, use.dimred = "UMAP") |> + # replace UMAP.1 with UMAP1 + dplyr::rename_with(\(x) stringr::str_replace(x, "^UMAP\\.", "UMAP")) |> + # combine all genes into a single column for easy faceting + tidyr::pivot_longer(cols = starts_with("ENSG"), + names_to = "ensembl_gene_id", + values_to = "gene_expression") |> + # join with marker gene df to get gene symbols for plotting + dplyr::left_join(marker_genes_df, by = c("ensembl_gene_id")) |> + dplyr::select(barcodes, + UMAP1, + UMAP2, + gene_symbol, + ensembl_gene_id, + gene_expression, + cluster) +}) + +``` + + +```{r, fig.width=8} +for(i in 1:length(umap_df_list)){ + # faceted umap showing a umap panel for each marker gene + p <- ggplot(umap_df_list[[i]], aes(x = UMAP1, y = UMAP2, color = gene_expression)) + + geom_point(alpha = 0.8, size = 0.1) + + facet_wrap(vars(gene_symbol)) + + scale_color_viridis_c() + + labs(color = "Log-normalized gene expression") + + # remove axis numbers and background grid + scale_x_continuous(labels = NULL, breaks = NULL) + + scale_y_continuous(labels = NULL, breaks = NULL) + + theme( + aspect.ratio = 1, + legend.position = "bottom", + axis.title = element_text(size = 9, color = "black"), + strip.text = element_text(size = 8), + legend.title = element_text(size = 9), + legend.text = element_text(size = 8) + ) + + guides(colour = guide_colorbar(title.position = "bottom", title.hjust = 0.5)) + + ggtitle(names(umap_df_list)[i]) + + #save plots + ggsave( + filename = paste0( + module_base, + '/plots/marker_expression_', + names(umap_df_list)[i], + '.png' + ), + plot = p, + width = 6, + height = 6, + units = 'in', + dpi = 150 + ) +} + + +``` + + +In my experience, ST6GALNAC5 is a strong marker for DSRCT in single-cell data. As can be seen, several of the samples have expression of ST6GALNAC5, but some do not, such as SCPCS000731 and SCPCS000729. In fact, these SCPCS000729 contains low number of cells. These samples in particular are the PDX samples and I will be excluding SCPCS000729 from further analyses. + +```{r} +umap_df_list_excluded <- umap_df_list[!(names(umap_df_list) %in% c('SCPCS000731'))] +``` + + +We can also look at the distributions for each marker gene. +I would expect to see some sort of bimodal distribution separating cells that do and do not have expression of the marker gene. What is clear from these plots that ST6GALNAC5, CACNA2D2, PTPRQ, IQCJ-SCHIP1, show a bi modal distribution among the cells. To some extent, we do see expression of the other markers. + +```{r} +for(i in 1:length(umap_df_list_excluded)) { + p <- ggplot(umap_df_list_excluded[[i]], + aes(x = gene_expression, fill = gene_symbol)) + + geom_density() + + facet_wrap(vars(gene_symbol), scales = 'free_y') + + theme(legend.position = "none") + + ggtitle(names(umap_df_list_excluded)[i]) + print(p) + + #save plots + ggsave( + filename = paste0( + module_base, + '/plots/marker_distribution_', + names(umap_df_list)[i], + '.png' + ), + plot = p, + width = 6, + height = 6, + units = 'in', + dpi = 150 + ) +} +``` + +Although we see some slight semblance of a bimodal distribution for most marker genes, it is hard to see and we cannot directly compare gene expression values across genes. +This would make it hard to identify a cut off to categorize cells as expression or not expressing the marker gene. + +Now we will transform each of the gene expression vectors by generating z-scores. +Then we might be able to find a cut off we can use across samples for if marker genes are present in a cell or not. + +```{r} +umap_df_list_excluded_scaled <- lapply(umap_df_list_excluded, function(x){ + x |> + dplyr::group_by(gene_symbol) |> + # get z-scores for each gene + dplyr::mutate(transformed_gene_expression = scale(gene_expression)[, 1]) |> + dplyr::ungroup() +}) + +``` + + +Now we can create the same density plot but looking at z-scores. Interestingly, in some samples, like SPCS000489, we see that ST6GALNAC5 has negative values. Presumably, these are tumor cells but there may be a skew due to the number of tumor cells compared to normal. + +```{r} + +for(i in 1:length(umap_df_list_excluded_scaled)) { + p <- ggplot(umap_df_list_excluded_scaled[[i]], + aes(x = transformed_gene_expression, fill = gene_symbol)) + + geom_density() + + facet_wrap(vars(gene_symbol), scales = 'free_y') + + theme(legend.position = "none") + + ggtitle(names(umap_df_list_excluded_scaled)[i]) + + print(p) + + #save plots + ggsave( + filename = paste0( + module_base, + '/plots/marker_distribution_zscaled_', + names(umap_df_list)[i], + '.png' + ), + plot = p, + width = 6, + height = 6, + units = 'in', + dpi = 150 + ) + +} +``` + +It looks like some marker genes have distinct groups of cells with z-score > 0, while other marker genes may not be as informative. + +### Classify tumor cells using marker genes only + +Let's try and use the marker gene expression to classify tumor cells. +It looks like we could use a cutoff of z-score > 0 to count that cell as a tumor cell. + +We could either count any cell that expresses at least one marker gene > 0 as a tumor cell, or look at the combined expression. +Let's start with classifying tumor cells as tumor if any marker gene is present (z-score > 0). + +Below, we can get the sum of the transformed gene expression of all marker genes and plot in a single UMAP. + +```{r} +# calculate sum gene expression across all marker genes in list +marker_sum_exp <- lapply(umap_df_list_excluded_scaled, function(x){ + x |> + dplyr::group_by(barcodes) |> + dplyr::mutate(sum_exp = sum(transformed_gene_expression, na.rm = T)) |> + dplyr::select(barcodes, UMAP1, UMAP2, sum_exp, cluster) |> + dplyr::distinct() +}) + + +# plot mean gene expression + +for(i in 1:length(marker_sum_exp)) { + p <- ggplot(marker_sum_exp[[i]], aes(x = UMAP1, y = UMAP2, color = sum_exp)) + + geom_point(size = 0.5, alpha = 0.5) + + scale_color_viridis_c() + + ggtitle(names(umap_df_list_excluded_scaled)[i]) + print(p) + + #save plots + ggsave( + filename = paste0( + module_base, + '/plots/UMAP_marker_expression_', + names(umap_df_list)[i], + '.png' + ), + plot = p, + width = 6, + height = 6, + units = 'in', + dpi = 150 + ) +} + +``` +Similar to the individual plots, it looks like there is one group of cells on the bottom right that has the highest marker gene expression. +We would anticipate that these are most likely to be the tumor cells. + +Now let's classify any cell that has a sum of marker genes > 0 (after z-transformation) as tumor cells. + +```{r} +# classify tumor cells based on presence of any marker genes +marker_sum_exp <- lapply(marker_sum_exp, function(x){ + x |> + dplyr::mutate(sum_classification = dplyr::if_else(sum_exp > 0, "Tumor", "Normal"))}) + + +for(i in 1:length(marker_sum_exp)) { + p <- ggplot(marker_sum_exp[[i]], aes(x = UMAP1, y = UMAP2, color = sum_classification)) + + geom_point(size = 0.5, alpha = 1) + + ggtitle(names(umap_df_list_excluded_scaled)[i]) + print(p) + + #save plots + ggsave( + filename = paste0( + module_base, + '/plots/UMAP_classification_', + names(umap_df_list)[i], + '.png' + ), + plot = p, + width = 6, + height = 6, + units = 'in', + dpi = 150 + ) +} + +``` + +This gives us a rough idea of cells that may be classified as tumor cells. However, I believe re-doing this analysis using the cluster level data may give us better results. This is highly dependent on whether the clusters were generated with enough granularity. + +```{r} +# calculate sum gene expression across all marker genes in list +marker_sum_exp <- lapply(umap_df_list_excluded_scaled, function(x){ + x |> + dplyr::group_by(cluster) |> #change to cluster + dplyr::mutate(sum_exp = sum(transformed_gene_expression, na.rm = T)) |> + dplyr::select(barcodes, UMAP1, UMAP2, sum_exp, cluster) |> + dplyr::distinct() +}) + + +# plot mean gene expression + +for(i in 1:length(marker_sum_exp)) { + p <- ggplot(marker_sum_exp[[i]], aes(x = UMAP1, y = UMAP2, color = sum_exp)) + + geom_point(size = 0.5, alpha = 0.5) + + scale_color_viridis_c() + + ggtitle(names(umap_df_list_excluded_scaled)[i]) + print(p) + + #save plots + ggsave( + filename = paste0(module_base, '/plots/UMAP_marker_expression_cluster_', names(umap_df_list)[i], '.png'), + plot = p, + width = 6, + height = 6, + units = 'in', + dpi = 150 + ) +} + +``` +It looks like there are group of cells with high marker gene expression. We anticipate that these are most likely to be the tumor cells. The groups with low or negative values are likely normal cells. + +Now let's classify clusters that has a sum of marker genes > 0 (after z-transformation) as tumor cell clusters. + + +```{r} +# classify tumor cells based on presence of any marker genes +marker_sum_exp <- lapply(marker_sum_exp, function(x){ + d <- density(x$sum_exp) + + x |> + dplyr::mutate(sum_classification = dplyr::if_else(sum_exp > 0, "Tumor", "Normal"))}) + + +for(i in 1:length(marker_sum_exp)) { + p <- ggplot(marker_sum_exp[[i]], aes(x = UMAP1, y = UMAP2, color = sum_classification)) + + geom_point(size = 0.5, alpha = 1) + + ggtitle(names(umap_df_list_excluded_scaled)[i]) + print(p) + + #save plots + ggsave( + filename = paste0( + module_base, + '/plots/UMAP_classification_cluster_', + names(umap_df_list)[i], + '.png' + ), + plot = p, + width = 6, + height = 6, + units = 'in', + dpi = 150 + ) + +} + +``` + +Based on my experiences, the above plots show that we are unable to adequately determine the tumor cells from the normal cells. This can be seen in SCPCS000729, which is most likely all tumor cells, since it is collected from a patient-derived xenograft. + +## Conclusions + +- We do see variation in DSRCT-specific gene expression across cells, suggesting that there may be gene drop-out reducing the quality of the data. +- `ST6GALNAC5`, `PTRPQ`, and `IQCJ-SCHIP1` are good markers for the DSRCT cells within this data. The other markers are not as highly expressed either due to heterogeneity or data quality. +- For the next steps, we may try to use `SingleR` or `CellAssign` to identify the normal cells and then work to identify remaining cells as tumor cells. Normal cells may have more definitive markers than tumor cells. + +## Save outputs +```{r} +# get an RDS of the processed data +saveRDS(sce_file_list, '../results/SCPCP000013_sce_file_list.rds') +# get an RDS of the UMAP data with marker genes +saveRDS(umap_df_list, file.path(module_base, 'results/SCPCP000013_umap_df_list.rds')) +``` + + +## Session Info + +```{r session info} +# record the versions of the packages used in this analysis and other environment information +sessionInfo() +``` diff --git a/analyses/cell-type-dsrct/exploratory_analysis/01-marker-gene-tumor-classification.html b/analyses/cell-type-dsrct/exploratory_analysis/01-marker-gene-tumor-classification.html new file mode 100644 index 000000000..e8d3f7226 --- /dev/null +++ b/analyses/cell-type-dsrct/exploratory_analysis/01-marker-gene-tumor-classification.html @@ -0,0 +1,930 @@ + + + + + + + + + + + + + + + +EDA of DSRCT Samples + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+ +
+ +
+

Introduction

+

This notebook looks explores the data from the DSRCT sample set, +SCPCP000013. We then see if we can use expression of +DSRCT-specific genes to manually classify tumor and normal cells. The +main goal of this notebook is only to identify tumor cells, +identification and labeling of the other cells is a separate question +that we do not answer here.

+ +
+
+

Setup

+
suppressPackageStartupMessages({
+  # load required packages
+  library(SingleCellExperiment)
+  library(ggplot2)
+})
+
## Warning: package 'matrixStats' was built under R version 4.4.1
+
## Warning: package 'GenomicRanges' was built under R version 4.4.1
+
## Warning: package 'S4Vectors' was built under R version 4.4.1
+
## Warning: package 'IRanges' was built under R version 4.4.1
+
# Set default ggplot theme
+theme_set(
+  theme_bw()
+)
+
# The base path for the OpenScPCA repository, found by its (hidden) .git directory
+repository_base <- rprojroot::find_root(rprojroot::is_git_root)
+
+# The current data directory, found within the repository base directory
+data_dir <- file.path(repository_base, "data", "current")
+#sample_dir <- file.path(data_dir, "SCPCP000013", params$sample_id)
+
+# The path to this module
+module_base <- file.path(repository_base, "analyses", "cell-type-DSRCT")
+
metadata_file <- file.path(data_dir, "SCPCP000013", "single_cell_metadata.tsv")
+metadata <- read.csv(metadata_file,  sep = '\t')
+metadata_DSRCT <- dplyr::filter(metadata, diagnosis == 'Desmoplastic small round cell tumor')
+
sce_file_list <- file.path(
+  data_dir, "SCPCP000013",  
+  metadata_DSRCT$scpca_sample_id, 
+  paste0(metadata_DSRCT$scpca_library_id, "_processed.rds")
+)
+
+marker_genes <- file.path(module_base, "references", "tumor-marker-genes.tsv")
+
+# output tumor/normal classifications
+results_dir <- file.path(module_base, "results", "marker_gene_analysis")
+fs::dir_create(results_dir)
+
+#classifications_filename <- glue::glue("{params$library_id}_tumor_normal_classifications.tsv")
+#output_classifications_file <- file.path(results_dir, classifications_filename)
+

Read in each data as a separate object +SingleCellExperiment in the list.

+
sce_list <- lapply(sce_file_list, readr::read_rds)
+
+#adding the sample id to the name of each SingleCellExperiment
+names(sce_list) <- metadata_DSRCT$scpca_library_id
+
+# read in marker genes table 
+marker_genes_df <- readr::read_tsv(marker_genes) |> 
+  # account for genes being from multiple sources
+  dplyr::select(cell_type, ensembl_gene_id, gene_symbol) |> 
+  dplyr::distinct()
+
## Rows: 7 Columns: 4
+## ── Column specification ────────────────────────────────────────────────────────
+## Delimiter: "\t"
+## chr (4): cell_type, gene_symbol, ensembl_gene_id, source
+## 
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+
marker_genes_df
+
## # A tibble: 7 × 3
+##   cell_type ensembl_gene_id gene_symbol
+##   <chr>     <chr>           <chr>      
+## 1 tumor     ENSG00000117069 ST6GALNAC5 
+## 2 tumor     ENSG00000007402 CACNA2D2   
+## 3 tumor     ENSG00000283154 IQCJ-SCHIP1
+## 4 tumor     ENSG00000139304 PTPRQ      
+## 5 tumor     ENSG00000069482 GAL        
+## 6 tumor     ENSG00000197487 GALP       
+## 7 tumor     ENSG00000165474 GJB2
+
marker_genes <- marker_genes_df |> 
+  dplyr::filter(cell_type == "tumor") |> 
+  dplyr::pull(ensembl_gene_id)
+
+
+

Analysis content

+
+

Explore marker gene expression

+

The first thing we do here is just create a faceted UMAP showing the +expression of each marker gene for tumor cells.

+
umap_df_list <- lapply(sce_list, function(x) {
+# pull out the UMAP coordinates and genes and make a data frame to use for plotting
+  umap_df <- x |>
+    scuttle::makePerCellDF(features = marker_genes, use.dimred = "UMAP") |>
+    # replace UMAP.1 with UMAP1
+    dplyr::rename_with(\(x) stringr::str_replace(x, "^UMAP\\.", "UMAP")) |>
+    # combine all genes into a single column for easy faceting
+    tidyr::pivot_longer(cols = starts_with("ENSG"),
+                        names_to = "ensembl_gene_id",
+                        values_to = "gene_expression") |>
+    # join with marker gene df to get gene symbols for plotting
+    dplyr::left_join(marker_genes_df, by = c("ensembl_gene_id")) |>
+    dplyr::select(barcodes,
+                  UMAP1,
+                  UMAP2,
+                  gene_symbol,
+                  ensembl_gene_id,
+                  gene_expression,
+                  cluster)
+})
+
for(i in 1:length(umap_df_list)){
+  # faceted umap showing a umap panel for each marker gene
+  p <- ggplot(umap_df_list[[i]], aes(x = UMAP1, y = UMAP2, color = gene_expression)) +
+    geom_point(alpha = 0.8, size = 0.1) +
+    facet_wrap(vars(gene_symbol)) +
+    scale_color_viridis_c() +
+    labs(color = "Log-normalized gene expression") +
+    # remove axis numbers and background grid
+    scale_x_continuous(labels = NULL, breaks = NULL) +
+    scale_y_continuous(labels = NULL, breaks = NULL) +
+    theme(
+      aspect.ratio = 1,
+      legend.position = "bottom",
+      axis.title = element_text(size = 9, color = "black"),
+      strip.text = element_text(size = 8),
+      legend.title = element_text(size = 9),
+      legend.text = element_text(size = 8)
+    ) +
+    guides(colour = guide_colorbar(title.position = "bottom", title.hjust = 0.5)) +
+    ggtitle(names(umap_df_list)[i])
+  
+  #save plots
+  ggsave(
+    filename = paste0(
+      module_base,
+      '/plots/marker_expression_',
+      names(umap_df_list)[i],
+      '.png'
+    ),
+    plot = p,
+    width = 6,
+    height = 6,
+    units = 'in',
+    dpi = 150
+  )
+}
+

In my experience, ST6GALNAC5 is a strong marker for DSRCT in +single-cell data. As can be seen, several of the samples have expression +of ST6GALNAC5, but some do not, such as SCPCS000731 and SCPCS000729. In +fact, these SCPCS000729 contains low number of cells. These samples in +particular are the PDX samples and I will be excluding SCPCS000729 from +further analyses.

+
umap_df_list_excluded <- umap_df_list[!(names(umap_df_list) %in% c('SCPCS000731'))]
+

We can also look at the distributions for each marker gene. I would +expect to see some sort of bimodal distribution separating cells that do +and do not have expression of the marker gene. What is clear from these +plots that ST6GALNAC5, CACNA2D2, PTPRQ, IQCJ-SCHIP1, show a bi modal +distribution among the cells. To some extent, we do see expression of +the other markers.

+
for(i in 1:length(umap_df_list_excluded)) {
+  p <- ggplot(umap_df_list_excluded[[i]],
+              aes(x = gene_expression, fill = gene_symbol)) +
+    geom_density() +
+    facet_wrap(vars(gene_symbol), scales = 'free_y') +
+    theme(legend.position = "none") + 
+    ggtitle(names(umap_df_list_excluded)[i])
+  print(p)
+  
+  #save plots
+  ggsave(
+    filename = paste0(
+      module_base,
+      '/plots/marker_distribution_',
+      names(umap_df_list)[i],
+      '.png'
+    ),
+    plot = p,
+    width = 6,
+    height = 6,
+    units = 'in',
+    dpi = 150
+  )
+}
+

+

Although we see some slight semblance of a bimodal distribution for +most marker genes, it is hard to see and we cannot directly compare gene +expression values across genes. This would make it hard to identify a +cut off to categorize cells as expression or not expressing the marker +gene.

+

Now we will transform each of the gene expression vectors by +generating z-scores. Then we might be able to find a cut off we can use +across samples for if marker genes are present in a cell or not.

+
umap_df_list_excluded_scaled <- lapply(umap_df_list_excluded, function(x){
+  x   |>
+  dplyr::group_by(gene_symbol) |>
+  # get z-scores for each gene
+  dplyr::mutate(transformed_gene_expression = scale(gene_expression)[, 1]) |>
+  dplyr::ungroup()
+})
+

Now we can create the same density plot but looking at z-scores. +Interestingly, in some samples, like SPCS000489, we see that ST6GALNAC5 +has negative values. Presumably, these are tumor cells but there may be +a skew due to the number of tumor cells compared to normal.

+
for(i in 1:length(umap_df_list_excluded_scaled)) {
+  p <- ggplot(umap_df_list_excluded_scaled[[i]],
+              aes(x = transformed_gene_expression, fill = gene_symbol)) +
+    geom_density() +
+    facet_wrap(vars(gene_symbol), scales = 'free_y') +
+    theme(legend.position = "none") + 
+    ggtitle(names(umap_df_list_excluded_scaled)[i])
+  
+  print(p)
+  
+  #save plots
+  ggsave(
+    filename = paste0(
+      module_base,
+      '/plots/marker_distribution_zscaled_',
+      names(umap_df_list)[i],
+      '.png'
+    ),
+    plot = p,
+    width = 6,
+    height = 6,
+    units = 'in',
+    dpi = 150
+  )
+  
+}
+
## Warning: Removed 1324 rows containing non-finite outside the scale range
+## (`stat_density()`).
+## Removed 1324 rows containing non-finite outside the scale range
+## (`stat_density()`).
+

+
## Warning: Removed 6140 rows containing non-finite outside the scale range
+## (`stat_density()`).
+
## Warning: Removed 6140 rows containing non-finite outside the scale range
+## (`stat_density()`).
+

+
## Warning: Removed 164 rows containing non-finite outside the scale range
+## (`stat_density()`).
+
## Warning: Removed 164 rows containing non-finite outside the scale range
+## (`stat_density()`).
+

+

It looks like some marker genes have distinct groups of cells with +z-score > 0, while other marker genes may not be as informative.

+
+
+

Classify tumor cells using marker genes only

+

Let’s try and use the marker gene expression to classify tumor cells. +It looks like we could use a cutoff of z-score > 0 to count that cell +as a tumor cell.

+

We could either count any cell that expresses at least one marker +gene > 0 as a tumor cell, or look at the combined expression. Let’s +start with classifying tumor cells as tumor if any marker gene is +present (z-score > 0).

+

Below, we can get the sum of the transformed gene expression of all +marker genes and plot in a single UMAP.

+
# calculate sum gene expression across all marker genes in list 
+marker_sum_exp <- lapply(umap_df_list_excluded_scaled, function(x){
+  x |>
+  dplyr::group_by(barcodes) |> 
+  dplyr::mutate(sum_exp = sum(transformed_gene_expression, na.rm = T)) |> 
+  dplyr::select(barcodes, UMAP1, UMAP2, sum_exp, cluster) |> 
+  dplyr::distinct()
+})
+
+
+# plot mean gene expression 
+
+for(i in 1:length(marker_sum_exp)) {
+  p <- ggplot(marker_sum_exp[[i]], aes(x = UMAP1, y = UMAP2, color = sum_exp)) +
+  geom_point(size = 0.5, alpha = 0.5) +
+  scale_color_viridis_c() +
+    ggtitle(names(umap_df_list_excluded_scaled)[i])
+  print(p)
+  
+  #save plots
+  ggsave(
+    filename = paste0(
+      module_base,
+      '/plots/UMAP_marker_expression_',
+      names(umap_df_list)[i],
+      '.png'
+    ),
+    plot = p,
+    width = 6,
+    height = 6,
+    units = 'in',
+    dpi = 150
+  )
+}
+

+Similar to the individual plots, it looks like there is one group of +cells on the bottom right that has the highest marker gene expression. +We would anticipate that these are most likely to be the tumor +cells.

+

Now let’s classify any cell that has a sum of marker genes > 0 +(after z-transformation) as tumor cells.

+
# classify tumor cells based on presence of any marker genes 
+marker_sum_exp <- lapply(marker_sum_exp, function(x){
+  x |> 
+  dplyr::mutate(sum_classification = dplyr::if_else(sum_exp > 0, "Tumor", "Normal"))})
+
+
+for(i in 1:length(marker_sum_exp)) {
+  p <- ggplot(marker_sum_exp[[i]], aes(x = UMAP1, y = UMAP2, color = sum_classification)) +
+  geom_point(size = 0.5, alpha = 1) +
+  ggtitle(names(umap_df_list_excluded_scaled)[i])
+  print(p)
+  
+  #save plots
+  ggsave(
+    filename = paste0(
+      module_base,
+      '/plots/UMAP_classification_',
+      names(umap_df_list)[i],
+      '.png'
+    ),
+    plot = p,
+    width = 6,
+    height = 6,
+    units = 'in',
+    dpi = 150
+  )
+}
+

+

This gives us a rough idea of cells that may be classified as tumor +cells. However, I believe re-doing this analysis using the cluster level +data may give us better results. This is highly dependent on whether the +clusters were generated with enough granularity.

+
# calculate sum gene expression across all marker genes in list 
+marker_sum_exp <- lapply(umap_df_list_excluded_scaled, function(x){
+  x |> 
+  dplyr::group_by(cluster) |> #change to cluster
+  dplyr::mutate(sum_exp = sum(transformed_gene_expression, na.rm = T)) |> 
+  dplyr::select(barcodes, UMAP1, UMAP2, sum_exp, cluster) |> 
+  dplyr::distinct()
+})
+
+
+# plot mean gene expression 
+
+for(i in 1:length(marker_sum_exp)) {
+  p <- ggplot(marker_sum_exp[[i]], aes(x = UMAP1, y = UMAP2, color = sum_exp)) +
+  geom_point(size = 0.5, alpha = 0.5) +
+  scale_color_viridis_c() +
+    ggtitle(names(umap_df_list_excluded_scaled)[i])
+  print(p)
+  
+  #save plots
+  ggsave(
+    filename = paste0(module_base, '/plots/UMAP_marker_expression_cluster_', names(umap_df_list)[i], '.png'),
+    plot = p,
+    width = 6,
+    height = 6,
+    units = 'in',
+    dpi = 150
+  )
+}
+

+It looks like there are group of cells with high marker gene expression. +We anticipate that these are most likely to be the tumor cells. The +groups with low or negative values are likely normal cells.

+

Now let’s classify clusters that has a sum of marker genes > 0 +(after z-transformation) as tumor cell clusters.

+
# classify tumor cells based on presence of any marker genes 
+marker_sum_exp <- lapply(marker_sum_exp, function(x){
+  d <- density(x$sum_exp)
+  
+  x |> 
+  dplyr::mutate(sum_classification = dplyr::if_else(sum_exp > 0, "Tumor", "Normal"))})
+
+
+for(i in 1:length(marker_sum_exp)) {
+  p <- ggplot(marker_sum_exp[[i]], aes(x = UMAP1, y = UMAP2, color = sum_classification)) +
+  geom_point(size = 0.5, alpha = 1) +
+  ggtitle(names(umap_df_list_excluded_scaled)[i])
+  print(p)
+
+  #save plots
+  ggsave(
+    filename = paste0(
+      module_base,
+      '/plots/UMAP_classification_cluster_',
+      names(umap_df_list)[i],
+      '.png'
+    ),
+    plot = p,
+    width = 6,
+    height = 6,
+    units = 'in',
+    dpi = 150
+  )
+
+}
+

+

Based on my experiences, the above plots show that we are unable to +adequately determine the tumor cells from the normal cells. This can be +seen in SCPCS000729, which is most likely all tumor cells, since it is +collected from a patient-derived xenograft.

+
+
+
+

Conclusions

+ +
+
+

Save outputs

+
# get an RDS of the processed data
+saveRDS(sce_file_list, '../results/SCPCP000013_sce_file_list.rds')
+# get an RDS of the UMAP data with marker genes
+saveRDS(umap_df_list, file.path(module_base, 'results/SCPCP000013_umap_df_list.rds'))
+
+
+

Session Info

+
# record the versions of the packages used in this analysis and other environment information
+sessionInfo()
+
## R version 4.4.0 (2024-04-24)
+## Platform: aarch64-apple-darwin20
+## Running under: macOS Ventura 13.6.8
+## 
+## Matrix products: default
+## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
+## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
+## 
+## locale:
+## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+## 
+## time zone: America/Chicago
+## tzcode source: internal
+## 
+## attached base packages:
+## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
+## [8] base     
+## 
+## other attached packages:
+##  [1] ggplot2_3.5.1               SingleCellExperiment_1.26.0
+##  [3] SummarizedExperiment_1.34.0 Biobase_2.64.0             
+##  [5] GenomicRanges_1.56.2        GenomeInfoDb_1.40.1        
+##  [7] IRanges_2.38.1              S4Vectors_0.42.1           
+##  [9] BiocGenerics_0.50.0         MatrixGenerics_1.16.0      
+## [11] matrixStats_1.4.1          
+## 
+## loaded via a namespace (and not attached):
+##  [1] gtable_0.3.5              xfun_0.48                
+##  [3] bslib_0.8.0               lattice_0.22-6           
+##  [5] tzdb_0.4.0                vctrs_0.6.5              
+##  [7] tools_4.4.0               generics_0.1.3           
+##  [9] parallel_4.4.0            tibble_3.2.1             
+## [11] fansi_1.0.6               highr_0.11               
+## [13] pkgconfig_2.0.3           Matrix_1.7-0             
+## [15] sparseMatrixStats_1.16.0  lifecycle_1.0.4          
+## [17] GenomeInfoDbData_1.2.12   farver_2.1.2             
+## [19] stringr_1.5.1             compiler_4.4.0           
+## [21] textshaping_0.4.0         munsell_0.5.1            
+## [23] codetools_0.2-20          htmltools_0.5.8.1        
+## [25] sass_0.4.9                yaml_2.3.10              
+## [27] pillar_1.9.0              crayon_1.5.3             
+## [29] jquerylib_0.1.4           tidyr_1.3.1              
+## [31] BiocParallel_1.38.0       DelayedArray_0.30.1      
+## [33] cachem_1.1.0              abind_1.4-8              
+## [35] tidyselect_1.2.1          digest_0.6.37            
+## [37] stringi_1.8.4             purrr_1.0.2              
+## [39] dplyr_1.1.4               labeling_0.4.3           
+## [41] rprojroot_2.0.4           fastmap_1.2.0            
+## [43] grid_4.4.0                colorspace_2.1-1         
+## [45] cli_3.6.3                 SparseArray_1.4.8        
+## [47] magrittr_2.0.3            S4Arrays_1.4.1           
+## [49] utf8_1.2.4                readr_2.1.5              
+## [51] withr_3.0.1               DelayedMatrixStats_1.26.0
+## [53] scales_1.3.0              UCSC.utils_1.0.0         
+## [55] bit64_4.5.2               rmarkdown_2.28           
+## [57] XVector_0.44.0            httr_1.4.7               
+## [59] bit_4.5.0                 ragg_1.3.3               
+## [61] hms_1.1.3                 beachmat_2.20.0          
+## [63] evaluate_1.0.1            knitr_1.48               
+## [65] viridisLite_0.4.2         rlang_1.1.4              
+## [67] Rcpp_1.0.13               scuttle_1.14.0           
+## [69] glue_1.8.0                rstudioapi_0.17.0        
+## [71] vroom_1.6.5               jsonlite_1.8.9           
+## [73] R6_2.5.1                  systemfonts_1.1.0        
+## [75] fs_1.6.4                  zlibbioc_1.50.0
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000486.png b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000486.png new file mode 100644 index 000000000..3717929b0 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000486.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000487.png b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000487.png new file mode 100644 index 000000000..92d6322e7 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000487.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000488.png b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000488.png new file mode 100644 index 000000000..d83d4b759 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000488.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000489.png b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000489.png new file mode 100644 index 000000000..db356b795 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000489.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000729.png b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000729.png new file mode 100644 index 000000000..2d2efe15d Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000729.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000731.png b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000731.png new file mode 100644 index 000000000..2c2cc5a5c Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_SCPCS000731.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000486.png b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000486.png new file mode 100644 index 000000000..e59012467 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000486.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000487.png b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000487.png new file mode 100644 index 000000000..1c31f2d11 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000487.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000488.png b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000488.png new file mode 100644 index 000000000..6a6c5a29e Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000488.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000489.png b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000489.png new file mode 100644 index 000000000..92013aec2 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000489.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000729.png b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000729.png new file mode 100644 index 000000000..64c346bee Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000729.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000731.png b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000731.png new file mode 100644 index 000000000..77602ad39 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_classification_cluster_SCPCS000731.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000486.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000486.png new file mode 100644 index 000000000..8215087cd Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000486.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000487.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000487.png new file mode 100644 index 000000000..53ac9688d Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000487.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000488.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000488.png new file mode 100644 index 000000000..782be163c Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000488.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000489.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000489.png new file mode 100644 index 000000000..40dc3a7a3 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000489.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000729.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000729.png new file mode 100644 index 000000000..763358cb5 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000729.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000731.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000731.png new file mode 100644 index 000000000..397645de2 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_SCPCS000731.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000486.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000486.png new file mode 100644 index 000000000..f84ea0b7c Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000486.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000487.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000487.png new file mode 100644 index 000000000..b2c182d90 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000487.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000488.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000488.png new file mode 100644 index 000000000..880e2a133 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000488.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000489.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000489.png new file mode 100644 index 000000000..92fda3a1b Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000489.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000729.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000729.png new file mode 100644 index 000000000..dc5856303 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000729.png differ diff --git a/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000731.png b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000731.png new file mode 100644 index 000000000..e9440fa86 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/UMAP_marker_expression_cluster_SCPCS000731.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000486.png b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000486.png new file mode 100644 index 000000000..80a808436 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000486.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000487.png b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000487.png new file mode 100644 index 000000000..6cfa03c16 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000487.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000488.png b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000488.png new file mode 100644 index 000000000..28ac79c44 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000488.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000489.png b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000489.png new file mode 100644 index 000000000..7e6b467ef Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000489.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000729.png b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000729.png new file mode 100644 index 000000000..54e0c848e Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000729.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000731.png b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000731.png new file mode 100644 index 000000000..2bad66495 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_SCPCS000731.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000486.png b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000486.png new file mode 100644 index 000000000..db85ab5fe Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000486.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000487.png b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000487.png new file mode 100644 index 000000000..7e889efb8 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000487.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000488.png b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000488.png new file mode 100644 index 000000000..da6086bf9 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000488.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000489.png b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000489.png new file mode 100644 index 000000000..62260f521 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000489.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000729.png b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000729.png new file mode 100644 index 000000000..661a005da Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000729.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000731.png b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000731.png new file mode 100644 index 000000000..efe78f3bd Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_distribution_zscaled_SCPCS000731.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000486.png b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000486.png new file mode 100644 index 000000000..15be82fbf Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000486.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000487.png b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000487.png new file mode 100644 index 000000000..b97e24082 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000487.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000488.png b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000488.png new file mode 100644 index 000000000..1bc26dbb0 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000488.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000489.png b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000489.png new file mode 100644 index 000000000..f35662d9d Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000489.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000729.png b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000729.png new file mode 100644 index 000000000..f85f3765f Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000729.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000731.png b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000731.png new file mode 100644 index 000000000..97248c460 Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000731.png differ diff --git a/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000732.png b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000732.png new file mode 100644 index 000000000..10fd4790a Binary files /dev/null and b/analyses/cell-type-dsrct/plots/marker_expression_SCPCS000732.png differ diff --git a/analyses/cell-type-dsrct/references/tumor-marker-genes.tsv b/analyses/cell-type-dsrct/references/tumor-marker-genes.tsv new file mode 100644 index 000000000..c9e073a6a --- /dev/null +++ b/analyses/cell-type-dsrct/references/tumor-marker-genes.tsv @@ -0,0 +1,9 @@ +cell_type gene_symbol ensembl_gene_id source +tumor ST6GALNAC5 ENSG00000117069 Truong & Ludwig;10.3390/cancers13236072 +tumor CACNA2D2 ENSG00000007402 Truong & Ludwig;10.3390/cancers13236072 +tumor IQCJ-SCHIP1 ENSG00000283154 Truong & Ludwig;10.3390/cancers13236072 +tumor PTPRQ ENSG00000139304 Truong & Ludwig;10.3390/cancers13236072 +tumor GAL ENSG00000069482 https://doi.org/10.1002/gcc.22955 +tumor GALP ENSG00000197487 https://doi.org/10.1002/gcc.22955 +tumor GJB2 ENSG00000165474 https://doi.org/10.1002/gcc.22955 + diff --git a/analyses/cell-type-dsrct/renv.lock b/analyses/cell-type-dsrct/renv.lock new file mode 100644 index 000000000..02bf8cf92 --- /dev/null +++ b/analyses/cell-type-dsrct/renv.lock @@ -0,0 +1,1453 @@ +{ + "R": { + "Version": "4.4.0", + "Repositories": [ + { + "Name": "CRAN", + "URL": "https://packagemanager.posit.co/cran/latest" + } + ] + }, + "Bioconductor": { + "Version": "3.19" + }, + "Packages": { + "BH": { + "Package": "BH", + "Version": "1.84.0-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a8235afbcd6316e6e91433ea47661013" + }, + "Biobase": { + "Package": "Biobase", + "Version": "2.64.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "R", + "methods", + "utils" + ], + "Hash": "9bc4cabd3bfda461409172213d932813" + }, + "BiocGenerics": { + "Package": "BiocGenerics", + "Version": "0.50.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "R", + "graphics", + "methods", + "stats", + "utils" + ], + "Hash": "ef32d07aafdd12f24c5827374ae3590d" + }, + "BiocManager": { + "Package": "BiocManager", + "Version": "1.30.25", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "utils" + ], + "Hash": "3aec5928ca10897d7a0a1205aae64627" + }, + "BiocParallel": { + "Package": "BiocParallel", + "Version": "1.38.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BH", + "R", + "codetools", + "cpp11", + "futile.logger", + "methods", + "parallel", + "snow", + "stats", + "utils" + ], + "Hash": "7b6e79f86e3d1c23f62c5e2052e848d4" + }, + "BiocVersion": { + "Package": "BiocVersion", + "Version": "3.19.1", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "R" + ], + "Hash": "b892e27fc9659a4c8f8787d34c37b8b2" + }, + "DelayedArray": { + "Package": "DelayedArray", + "Version": "0.30.1", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "IRanges", + "Matrix", + "MatrixGenerics", + "R", + "S4Arrays", + "S4Vectors", + "SparseArray", + "methods", + "stats", + "stats4" + ], + "Hash": "395472c65cd9d606a1a345687102f299" + }, + "DelayedMatrixStats": { + "Package": "DelayedMatrixStats", + "Version": "1.26.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "DelayedArray", + "IRanges", + "Matrix", + "MatrixGenerics", + "S4Vectors", + "methods", + "sparseMatrixStats" + ], + "Hash": "5d9536664ccddb0eaa68a90afe4ee76e" + }, + "GenomeInfoDb": { + "Package": "GenomeInfoDb", + "Version": "1.40.1", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "GenomeInfoDbData", + "IRanges", + "R", + "S4Vectors", + "UCSC.utils", + "methods", + "stats", + "stats4", + "utils" + ], + "Hash": "171e9becd9bb948b9e64eb3759208c94" + }, + "GenomeInfoDbData": { + "Package": "GenomeInfoDbData", + "Version": "1.2.12", + "Source": "Bioconductor", + "Requirements": [ + "R" + ], + "Hash": "c3c792a7b7f2677be56e8632c5b7543d" + }, + "GenomicRanges": { + "Package": "GenomicRanges", + "Version": "1.56.2", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "GenomeInfoDb", + "IRanges", + "R", + "S4Vectors", + "XVector", + "methods", + "stats", + "stats4", + "utils" + ], + "Hash": "fecd026026c4d45e3b57eee97bbbba92" + }, + "IRanges": { + "Package": "IRanges", + "Version": "2.38.1", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "R", + "S4Vectors", + "methods", + "stats", + "stats4", + "utils" + ], + "Hash": "066f3c5d6b022ed62c91ce49e4d8f619" + }, + "MASS": { + "Package": "MASS", + "Version": "7.3-61", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "methods", + "stats", + "utils" + ], + "Hash": "0cafd6f0500e5deba33be22c46bf6055" + }, + "Matrix": { + "Package": "Matrix", + "Version": "1.7-0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "grid", + "lattice", + "methods", + "stats", + "utils" + ], + "Hash": "1920b2f11133b12350024297d8a4ff4a" + }, + "MatrixGenerics": { + "Package": "MatrixGenerics", + "Version": "1.16.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "matrixStats", + "methods" + ], + "Hash": "152dbbcde6a9a7c7f3beef79b68cd76a" + }, + "R6": { + "Package": "R6", + "Version": "2.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "470851b6d5d0ac559e9d01bb352b4021" + }, + "RColorBrewer": { + "Package": "RColorBrewer", + "Version": "1.1-3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "45f0398006e83a5b10b72a90663d8d8c" + }, + "Rcpp": { + "Package": "Rcpp", + "Version": "1.0.13", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "methods", + "utils" + ], + "Hash": "f27411eb6d9c3dada5edd444b8416675" + }, + "S4Arrays": { + "Package": "S4Arrays", + "Version": "1.4.1", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "IRanges", + "Matrix", + "R", + "S4Vectors", + "abind", + "crayon", + "methods", + "stats" + ], + "Hash": "deeed4802c5132e88f24a432a1caf5e0" + }, + "S4Vectors": { + "Package": "S4Vectors", + "Version": "0.42.1", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "R", + "methods", + "stats", + "stats4", + "utils" + ], + "Hash": "86398fc7c5f6be4ba29fe23ed08c2da6" + }, + "SingleCellExperiment": { + "Package": "SingleCellExperiment", + "Version": "1.26.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "DelayedArray", + "GenomicRanges", + "S4Vectors", + "SummarizedExperiment", + "methods", + "stats", + "utils" + ], + "Hash": "4476ad434a5e7887884521417cab3764" + }, + "SparseArray": { + "Package": "SparseArray", + "Version": "1.4.8", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "IRanges", + "Matrix", + "MatrixGenerics", + "R", + "S4Arrays", + "S4Vectors", + "XVector", + "matrixStats", + "methods", + "stats", + "utils" + ], + "Hash": "97f70ff11c14edd379ee2429228cbb60" + }, + "SummarizedExperiment": { + "Package": "SummarizedExperiment", + "Version": "1.34.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "Biobase", + "BiocGenerics", + "DelayedArray", + "GenomeInfoDb", + "GenomicRanges", + "IRanges", + "Matrix", + "MatrixGenerics", + "R", + "S4Arrays", + "S4Vectors", + "methods", + "stats", + "tools", + "utils" + ], + "Hash": "2f6c8cc972ed6aee07c96e3dff729d15" + }, + "UCSC.utils": { + "Package": "UCSC.utils", + "Version": "1.0.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "S4Vectors", + "httr", + "jsonlite", + "methods", + "stats" + ], + "Hash": "83d45b690bffd09d1980c224ef329f5b" + }, + "XVector": { + "Package": "XVector", + "Version": "0.44.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "IRanges", + "R", + "S4Vectors", + "methods", + "tools", + "utils", + "zlibbioc" + ], + "Hash": "4245b9938ac74c0dbddbebbec6036ab4" + }, + "abind": { + "Package": "abind", + "Version": "1.4-8", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods", + "utils" + ], + "Hash": "2288423bb0f20a457800d7fc47f6aa54" + }, + "askpass": { + "Package": "askpass", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "sys" + ], + "Hash": "c39f4155b3ceb1a9a2799d700fbd4b6a" + }, + "base64enc": { + "Package": "base64enc", + "Version": "0.1-3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "543776ae6848fde2f48ff3816d0628bc" + }, + "beachmat": { + "Package": "beachmat", + "Version": "2.20.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "DelayedArray", + "Matrix", + "Rcpp", + "SparseArray", + "methods" + ], + "Hash": "10e94b1bce9070632a40c6b873f8b2d4" + }, + "bit": { + "Package": "bit", + "Version": "4.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "5dc7b2677d65d0e874fc4aaf0e879987" + }, + "bit64": { + "Package": "bit64", + "Version": "4.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "bit", + "methods", + "stats", + "utils" + ], + "Hash": "e84984bf5f12a18628d9a02322128dfd" + }, + "bslib": { + "Package": "bslib", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "base64enc", + "cachem", + "fastmap", + "grDevices", + "htmltools", + "jquerylib", + "jsonlite", + "lifecycle", + "memoise", + "mime", + "rlang", + "sass" + ], + "Hash": "b299c6741ca9746fb227debcb0f9fb6c" + }, + "cachem": { + "Package": "cachem", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "fastmap", + "rlang" + ], + "Hash": "cd9a672193789068eb5a2aad65a0dedf" + }, + "cli": { + "Package": "cli", + "Version": "3.6.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "b21916dd77a27642b447374a5d30ecf3" + }, + "clipr": { + "Package": "clipr", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "utils" + ], + "Hash": "3f038e5ac7f41d4ac41ce658c85e3042" + }, + "codetools": { + "Package": "codetools", + "Version": "0.2-20", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "61e097f35917d342622f21cdc79c256e" + }, + "colorspace": { + "Package": "colorspace", + "Version": "2.1-1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "methods", + "stats" + ], + "Hash": "d954cb1c57e8d8b756165d7ba18aa55a" + }, + "cpp11": { + "Package": "cpp11", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "91570bba75d0c9d3f1040c835cee8fba" + }, + "crayon": { + "Package": "crayon", + "Version": "1.5.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "grDevices", + "methods", + "utils" + ], + "Hash": "859d96e65ef198fd43e82b9628d593ef" + }, + "curl": { + "Package": "curl", + "Version": "5.2.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "d91263322a58af798f6cf3b13fd56dde" + }, + "digest": { + "Package": "digest", + "Version": "0.6.37", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "33698c4b3127fc9f506654607fb73676" + }, + "dplyr": { + "Package": "dplyr", + "Version": "1.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "cli", + "generics", + "glue", + "lifecycle", + "magrittr", + "methods", + "pillar", + "rlang", + "tibble", + "tidyselect", + "utils", + "vctrs" + ], + "Hash": "fedd9d00c2944ff00a0e2696ccf048ec" + }, + "evaluate": { + "Package": "evaluate", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "3fd29944b231036ad67c3edb32e02201" + }, + "fansi": { + "Package": "fansi", + "Version": "1.0.6", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "utils" + ], + "Hash": "962174cf2aeb5b9eea581522286a911f" + }, + "farver": { + "Package": "farver", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "680887028577f3fa2a81e410ed0d6e42" + }, + "fastmap": { + "Package": "fastmap", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "aa5e1cd11c2d15497494c5292d7ffcc8" + }, + "fontawesome": { + "Package": "fontawesome", + "Version": "0.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "htmltools", + "rlang" + ], + "Hash": "c2efdd5f0bcd1ea861c2d4e2a883a67d" + }, + "formatR": { + "Package": "formatR", + "Version": "1.14", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "63cb26d12517c7863f5abb006c5e0f25" + }, + "fs": { + "Package": "fs", + "Version": "1.6.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "15aeb8c27f5ea5161f9f6a641fafd93a" + }, + "futile.logger": { + "Package": "futile.logger", + "Version": "1.4.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "futile.options", + "lambda.r", + "utils" + ], + "Hash": "99f0ace8c05ec7d3683d27083c4f1e7e" + }, + "futile.options": { + "Package": "futile.options", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "0d9bf02413ddc2bbe8da9ce369dcdd2b" + }, + "generics": { + "Package": "generics", + "Version": "0.1.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "15e9634c0fcd294799e9b2e929ed1b86" + }, + "ggplot2": { + "Package": "ggplot2", + "Version": "3.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "MASS", + "R", + "cli", + "glue", + "grDevices", + "grid", + "gtable", + "isoband", + "lifecycle", + "mgcv", + "rlang", + "scales", + "stats", + "tibble", + "vctrs", + "withr" + ], + "Hash": "44c6a2f8202d5b7e878ea274b1092426" + }, + "glue": { + "Package": "glue", + "Version": "1.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "5899f1eaa825580172bb56c08266f37c" + }, + "gtable": { + "Package": "gtable", + "Version": "0.3.5", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "grid", + "lifecycle", + "rlang" + ], + "Hash": "e18861963cbc65a27736e02b3cd3c4a0" + }, + "highr": { + "Package": "highr", + "Version": "0.11", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "xfun" + ], + "Hash": "d65ba49117ca223614f71b60d85b8ab7" + }, + "hms": { + "Package": "hms", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "lifecycle", + "methods", + "pkgconfig", + "rlang", + "vctrs" + ], + "Hash": "b59377caa7ed00fa41808342002138f9" + }, + "htmltools": { + "Package": "htmltools", + "Version": "0.5.8.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "base64enc", + "digest", + "fastmap", + "grDevices", + "rlang", + "utils" + ], + "Hash": "81d371a9cc60640e74e4ab6ac46dcedc" + }, + "httr": { + "Package": "httr", + "Version": "1.4.7", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "curl", + "jsonlite", + "mime", + "openssl" + ], + "Hash": "ac107251d9d9fd72f0ca8049988f1d7f" + }, + "isoband": { + "Package": "isoband", + "Version": "0.2.7", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "grid", + "utils" + ], + "Hash": "0080607b4a1a7b28979aecef976d8bc2" + }, + "jquerylib": { + "Package": "jquerylib", + "Version": "0.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "htmltools" + ], + "Hash": "5aab57a3bd297eee1c1d862735972182" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.8.9", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "methods" + ], + "Hash": "4e993b65c2c3ffbffce7bb3e2c6f832b" + }, + "knitr": { + "Package": "knitr", + "Version": "1.48", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "evaluate", + "highr", + "methods", + "tools", + "xfun", + "yaml" + ], + "Hash": "acf380f300c721da9fde7df115a5f86f" + }, + "labeling": { + "Package": "labeling", + "Version": "0.4.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "graphics", + "stats" + ], + "Hash": "b64ec208ac5bc1852b285f665d6368b3" + }, + "lambda.r": { + "Package": "lambda.r", + "Version": "1.2.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "formatR" + ], + "Hash": "b1e925c4b9ffeb901bacf812cbe9a6ad" + }, + "lattice": { + "Package": "lattice", + "Version": "0.22-6", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "grid", + "stats", + "utils" + ], + "Hash": "cc5ac1ba4c238c7ca9fa6a87ca11a7e2" + }, + "lifecycle": { + "Package": "lifecycle", + "Version": "1.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "rlang" + ], + "Hash": "b8552d117e1b808b09a832f589b79035" + }, + "magrittr": { + "Package": "magrittr", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "7ce2733a9826b3aeb1775d56fd305472" + }, + "matrixStats": { + "Package": "matrixStats", + "Version": "1.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "8885ffb1f46e820dede6b2ca9442abca" + }, + "memoise": { + "Package": "memoise", + "Version": "2.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "cachem", + "rlang" + ], + "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c" + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.9-1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "Matrix", + "R", + "graphics", + "methods", + "nlme", + "splines", + "stats", + "utils" + ], + "Hash": "110ee9d83b496279960e162ac97764ce" + }, + "mime": { + "Package": "mime", + "Version": "0.12", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "tools" + ], + "Hash": "18e9c28c1d3ca1560ce30658b22ce104" + }, + "munsell": { + "Package": "munsell", + "Version": "0.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "colorspace", + "methods" + ], + "Hash": "4fd8900853b746af55b81fda99da7695" + }, + "nlme": { + "Package": "nlme", + "Version": "3.1-166", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "graphics", + "lattice", + "stats", + "utils" + ], + "Hash": "ccbb8846be320b627e6aa2b4616a2ded" + }, + "openssl": { + "Package": "openssl", + "Version": "2.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "askpass" + ], + "Hash": "d413e0fef796c9401a4419485f709ca1" + }, + "pillar": { + "Package": "pillar", + "Version": "1.9.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "cli", + "fansi", + "glue", + "lifecycle", + "rlang", + "utf8", + "utils", + "vctrs" + ], + "Hash": "15da5a8412f317beeee6175fbc76f4bb" + }, + "pkgconfig": { + "Package": "pkgconfig", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "utils" + ], + "Hash": "01f28d4278f15c76cddbea05899c5d6f" + }, + "prettyunits": { + "Package": "prettyunits", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "6b01fc98b1e86c4f705ce9dcfd2f57c7" + }, + "progress": { + "Package": "progress", + "Version": "1.2.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "crayon", + "hms", + "prettyunits" + ], + "Hash": "f4625e061cb2865f111b47ff163a5ca6" + }, + "purrr": { + "Package": "purrr", + "Version": "1.0.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "lifecycle", + "magrittr", + "rlang", + "vctrs" + ], + "Hash": "1cba04a4e9414bdefc9dcaa99649a8dc" + }, + "rappdirs": { + "Package": "rappdirs", + "Version": "0.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "5e3c5dc0b071b21fa128676560dbe94d" + }, + "readr": { + "Package": "readr", + "Version": "2.1.5", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "cli", + "clipr", + "cpp11", + "crayon", + "hms", + "lifecycle", + "methods", + "rlang", + "tibble", + "tzdb", + "utils", + "vroom" + ], + "Hash": "9de96463d2117f6ac49980577939dfb3" + }, + "renv": { + "Package": "renv", + "Version": "1.0.11", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "utils" + ], + "Hash": "47623f66b4e80b3b0587bc5d7b309888" + }, + "rlang": { + "Package": "rlang", + "Version": "1.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "3eec01f8b1dee337674b2e34ab1f9bc1" + }, + "rmarkdown": { + "Package": "rmarkdown", + "Version": "2.28", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "bslib", + "evaluate", + "fontawesome", + "htmltools", + "jquerylib", + "jsonlite", + "knitr", + "methods", + "tinytex", + "tools", + "utils", + "xfun", + "yaml" + ], + "Hash": "062470668513dcda416927085ee9bdc7" + }, + "rprojroot": { + "Package": "rprojroot", + "Version": "2.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "4c8415e0ec1e29f3f4f6fc108bef0144" + }, + "sass": { + "Package": "sass", + "Version": "0.4.9", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R6", + "fs", + "htmltools", + "rappdirs", + "rlang" + ], + "Hash": "d53dbfddf695303ea4ad66f86e99b95d" + }, + "scales": { + "Package": "scales", + "Version": "1.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "RColorBrewer", + "cli", + "farver", + "glue", + "labeling", + "lifecycle", + "munsell", + "rlang", + "viridisLite" + ], + "Hash": "c19df082ba346b0ffa6f833e92de34d1" + }, + "scuttle": { + "Package": "scuttle", + "Version": "1.14.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "BiocParallel", + "DelayedArray", + "DelayedMatrixStats", + "GenomicRanges", + "Matrix", + "Rcpp", + "S4Vectors", + "SingleCellExperiment", + "SummarizedExperiment", + "beachmat", + "methods", + "stats", + "utils" + ], + "Hash": "6d94b72071aefd6e8b041c34ee83ebd0" + }, + "snow": { + "Package": "snow", + "Version": "0.4-4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "40b74690debd20c57d93d8c246b305d4" + }, + "sparseMatrixStats": { + "Package": "sparseMatrixStats", + "Version": "1.16.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "Matrix", + "MatrixGenerics", + "Rcpp", + "matrixStats", + "methods" + ], + "Hash": "7e500a5a527460ca0406473bdcade286" + }, + "stringi": { + "Package": "stringi", + "Version": "1.8.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "stats", + "tools", + "utils" + ], + "Hash": "39e1144fd75428983dc3f63aa53dfa91" + }, + "stringr": { + "Package": "stringr", + "Version": "1.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "magrittr", + "rlang", + "stringi", + "vctrs" + ], + "Hash": "960e2ae9e09656611e0b8214ad543207" + }, + "sys": { + "Package": "sys", + "Version": "3.4.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "de342ebfebdbf40477d0758d05426646" + }, + "tibble": { + "Package": "tibble", + "Version": "3.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "fansi", + "lifecycle", + "magrittr", + "methods", + "pillar", + "pkgconfig", + "rlang", + "utils", + "vctrs" + ], + "Hash": "a84e2cc86d07289b3b6f5069df7a004c" + }, + "tidyr": { + "Package": "tidyr", + "Version": "1.3.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "cpp11", + "dplyr", + "glue", + "lifecycle", + "magrittr", + "purrr", + "rlang", + "stringr", + "tibble", + "tidyselect", + "utils", + "vctrs" + ], + "Hash": "915fb7ce036c22a6a33b5a8adb712eb1" + }, + "tidyselect": { + "Package": "tidyselect", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "rlang", + "vctrs", + "withr" + ], + "Hash": "829f27b9c4919c16b593794a6344d6c0" + }, + "tinytex": { + "Package": "tinytex", + "Version": "0.53", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "xfun" + ], + "Hash": "9db859e8aabbb474293dde3097839420" + }, + "tzdb": { + "Package": "tzdb", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "f561504ec2897f4d46f0c7657e488ae1" + }, + "utf8": { + "Package": "utf8", + "Version": "1.2.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "62b65c52671e6665f803ff02954446e9" + }, + "vctrs": { + "Package": "vctrs", + "Version": "0.6.5", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "rlang" + ], + "Hash": "c03fa420630029418f7e6da3667aac4a" + }, + "viridisLite": { + "Package": "viridisLite", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "c826c7c4241b6fc89ff55aaea3fa7491" + }, + "vroom": { + "Package": "vroom", + "Version": "1.6.5", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "bit64", + "cli", + "cpp11", + "crayon", + "glue", + "hms", + "lifecycle", + "methods", + "progress", + "rlang", + "stats", + "tibble", + "tidyselect", + "tzdb", + "vctrs", + "withr" + ], + "Hash": "390f9315bc0025be03012054103d227c" + }, + "withr": { + "Package": "withr", + "Version": "3.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics" + ], + "Hash": "07909200e8bbe90426fbfeb73e1e27aa" + }, + "xfun": { + "Package": "xfun", + "Version": "0.48", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "stats", + "tools" + ], + "Hash": "89e455b87c84e227eb7f60a1b4e5fe1f" + }, + "yaml": { + "Package": "yaml", + "Version": "2.3.10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "51dab85c6c98e50a18d7551e9d49f76c" + }, + "zlibbioc": { + "Package": "zlibbioc", + "Version": "1.50.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Hash": "3db02e3c460e1c852365df117a2b441b" + } + } +} diff --git a/analyses/cell-type-dsrct/renv/.gitignore b/analyses/cell-type-dsrct/renv/.gitignore new file mode 100644 index 000000000..0ec0cbba2 --- /dev/null +++ b/analyses/cell-type-dsrct/renv/.gitignore @@ -0,0 +1,7 @@ +library/ +local/ +cellar/ +lock/ +python/ +sandbox/ +staging/ diff --git a/analyses/cell-type-dsrct/renv/activate.R b/analyses/cell-type-dsrct/renv/activate.R new file mode 100644 index 000000000..0eb51088a --- /dev/null +++ b/analyses/cell-type-dsrct/renv/activate.R @@ -0,0 +1,1305 @@ + +local({ + + # the requested version of renv + version <- "1.0.11" + attr(version, "sha") <- NULL + + # the project directory + project <- Sys.getenv("RENV_PROJECT") + if (!nzchar(project)) + project <- getwd() + + # use start-up diagnostics if enabled + diagnostics <- Sys.getenv("RENV_STARTUP_DIAGNOSTICS", unset = "FALSE") + if (diagnostics) { + start <- Sys.time() + profile <- tempfile("renv-startup-", fileext = ".Rprof") + utils::Rprof(profile) + on.exit({ + utils::Rprof(NULL) + elapsed <- signif(difftime(Sys.time(), start, units = "auto"), digits = 2L) + writeLines(sprintf("- renv took %s to run the autoloader.", format(elapsed))) + writeLines(sprintf("- Profile: %s", profile)) + print(utils::summaryRprof(profile)) + }, add = TRUE) + } + + # figure out whether the autoloader is enabled + enabled <- local({ + + # first, check config option + override <- getOption("renv.config.autoloader.enabled") + if (!is.null(override)) + return(override) + + # if we're being run in a context where R_LIBS is already set, + # don't load -- presumably we're being run as a sub-process and + # the parent process has already set up library paths for us + rcmd <- Sys.getenv("R_CMD", unset = NA) + rlibs <- Sys.getenv("R_LIBS", unset = NA) + if (!is.na(rlibs) && !is.na(rcmd)) + return(FALSE) + + # next, check environment variables + # TODO: prefer using the configuration one in the future + envvars <- c( + "RENV_CONFIG_AUTOLOADER_ENABLED", + "RENV_AUTOLOADER_ENABLED", + "RENV_ACTIVATE_PROJECT" + ) + + for (envvar in envvars) { + envval <- Sys.getenv(envvar, unset = NA) + if (!is.na(envval)) + return(tolower(envval) %in% c("true", "t", "1")) + } + + # enable by default + TRUE + + }) + + # bail if we're not enabled + if (!enabled) { + + # if we're not enabled, we might still need to manually load + # the user profile here + profile <- Sys.getenv("R_PROFILE_USER", unset = "~/.Rprofile") + if (file.exists(profile)) { + cfg <- Sys.getenv("RENV_CONFIG_USER_PROFILE", unset = "TRUE") + if (tolower(cfg) %in% c("true", "t", "1")) + sys.source(profile, envir = globalenv()) + } + + return(FALSE) + + } + + # avoid recursion + if (identical(getOption("renv.autoloader.running"), TRUE)) { + warning("ignoring recursive attempt to run renv autoloader") + return(invisible(TRUE)) + } + + # signal that we're loading renv during R startup + options(renv.autoloader.running = TRUE) + on.exit(options(renv.autoloader.running = NULL), add = TRUE) + + # signal that we've consented to use renv + options(renv.consent = TRUE) + + # load the 'utils' package eagerly -- this ensures that renv shims, which + # mask 'utils' packages, will come first on the search path + library(utils, lib.loc = .Library) + + # unload renv if it's already been loaded + if ("renv" %in% loadedNamespaces()) + unloadNamespace("renv") + + # load bootstrap tools + ansify <- function(text) { + if (renv_ansify_enabled()) + renv_ansify_enhanced(text) + else + renv_ansify_default(text) + } + + renv_ansify_enabled <- function() { + + override <- Sys.getenv("RENV_ANSIFY_ENABLED", unset = NA) + if (!is.na(override)) + return(as.logical(override)) + + pane <- Sys.getenv("RSTUDIO_CHILD_PROCESS_PANE", unset = NA) + if (identical(pane, "build")) + return(FALSE) + + testthat <- Sys.getenv("TESTTHAT", unset = "false") + if (tolower(testthat) %in% "true") + return(FALSE) + + iderun <- Sys.getenv("R_CLI_HAS_HYPERLINK_IDE_RUN", unset = "false") + if (tolower(iderun) %in% "false") + return(FALSE) + + TRUE + + } + + renv_ansify_default <- function(text) { + text + } + + renv_ansify_enhanced <- function(text) { + + # R help links + pattern <- "`\\?(renv::(?:[^`])+)`" + replacement <- "`\033]8;;ide:help:\\1\a?\\1\033]8;;\a`" + text <- gsub(pattern, replacement, text, perl = TRUE) + + # runnable code + pattern <- "`(renv::(?:[^`])+)`" + replacement <- "`\033]8;;ide:run:\\1\a\\1\033]8;;\a`" + text <- gsub(pattern, replacement, text, perl = TRUE) + + # return ansified text + text + + } + + renv_ansify_init <- function() { + + envir <- renv_envir_self() + if (renv_ansify_enabled()) + assign("ansify", renv_ansify_enhanced, envir = envir) + else + assign("ansify", renv_ansify_default, envir = envir) + + } + + `%||%` <- function(x, y) { + if (is.null(x)) y else x + } + + catf <- function(fmt, ..., appendLF = TRUE) { + + quiet <- getOption("renv.bootstrap.quiet", default = FALSE) + if (quiet) + return(invisible()) + + msg <- sprintf(fmt, ...) + cat(msg, file = stdout(), sep = if (appendLF) "\n" else "") + + invisible(msg) + + } + + header <- function(label, + ..., + prefix = "#", + suffix = "-", + n = min(getOption("width"), 78)) + { + label <- sprintf(label, ...) + n <- max(n - nchar(label) - nchar(prefix) - 2L, 8L) + if (n <= 0) + return(paste(prefix, label)) + + tail <- paste(rep.int(suffix, n), collapse = "") + paste0(prefix, " ", label, " ", tail) + + } + + heredoc <- function(text, leave = 0) { + + # remove leading, trailing whitespace + trimmed <- gsub("^\\s*\\n|\\n\\s*$", "", text) + + # split into lines + lines <- strsplit(trimmed, "\n", fixed = TRUE)[[1L]] + + # compute common indent + indent <- regexpr("[^[:space:]]", lines) + common <- min(setdiff(indent, -1L)) - leave + text <- paste(substring(lines, common), collapse = "\n") + + # substitute in ANSI links for executable renv code + ansify(text) + + } + + startswith <- function(string, prefix) { + substring(string, 1, nchar(prefix)) == prefix + } + + bootstrap <- function(version, library) { + + friendly <- renv_bootstrap_version_friendly(version) + section <- header(sprintf("Bootstrapping renv %s", friendly)) + catf(section) + + # attempt to download renv + catf("- Downloading renv ... ", appendLF = FALSE) + withCallingHandlers( + tarball <- renv_bootstrap_download(version), + error = function(err) { + catf("FAILED") + stop("failed to download:\n", conditionMessage(err)) + } + ) + catf("OK") + on.exit(unlink(tarball), add = TRUE) + + # now attempt to install + catf("- Installing renv ... ", appendLF = FALSE) + withCallingHandlers( + status <- renv_bootstrap_install(version, tarball, library), + error = function(err) { + catf("FAILED") + stop("failed to install:\n", conditionMessage(err)) + } + ) + catf("OK") + + # add empty line to break up bootstrapping from normal output + catf("") + + return(invisible()) + } + + renv_bootstrap_tests_running <- function() { + getOption("renv.tests.running", default = FALSE) + } + + renv_bootstrap_repos <- function() { + + # get CRAN repository + cran <- getOption("renv.repos.cran", "https://cloud.r-project.org") + + # check for repos override + repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) + if (!is.na(repos)) { + + # check for RSPM; if set, use a fallback repository for renv + rspm <- Sys.getenv("RSPM", unset = NA) + if (identical(rspm, repos)) + repos <- c(RSPM = rspm, CRAN = cran) + + return(repos) + + } + + # check for lockfile repositories + repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity) + if (!inherits(repos, "error") && length(repos)) + return(repos) + + # retrieve current repos + repos <- getOption("repos") + + # ensure @CRAN@ entries are resolved + repos[repos == "@CRAN@"] <- cran + + # add in renv.bootstrap.repos if set + default <- c(FALLBACK = "https://cloud.r-project.org") + extra <- getOption("renv.bootstrap.repos", default = default) + repos <- c(repos, extra) + + # remove duplicates that might've snuck in + dupes <- duplicated(repos) | duplicated(names(repos)) + repos[!dupes] + + } + + renv_bootstrap_repos_lockfile <- function() { + + lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock") + if (!file.exists(lockpath)) + return(NULL) + + lockfile <- tryCatch(renv_json_read(lockpath), error = identity) + if (inherits(lockfile, "error")) { + warning(lockfile) + return(NULL) + } + + repos <- lockfile$R$Repositories + if (length(repos) == 0) + return(NULL) + + keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1)) + vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1)) + names(vals) <- keys + + return(vals) + + } + + renv_bootstrap_download <- function(version) { + + sha <- attr(version, "sha", exact = TRUE) + + methods <- if (!is.null(sha)) { + + # attempting to bootstrap a development version of renv + c( + function() renv_bootstrap_download_tarball(sha), + function() renv_bootstrap_download_github(sha) + ) + + } else { + + # attempting to bootstrap a release version of renv + c( + function() renv_bootstrap_download_tarball(version), + function() renv_bootstrap_download_cran_latest(version), + function() renv_bootstrap_download_cran_archive(version) + ) + + } + + for (method in methods) { + path <- tryCatch(method(), error = identity) + if (is.character(path) && file.exists(path)) + return(path) + } + + stop("All download methods failed") + + } + + renv_bootstrap_download_impl <- function(url, destfile) { + + mode <- "wb" + + # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715 + fixup <- + Sys.info()[["sysname"]] == "Windows" && + substring(url, 1L, 5L) == "file:" + + if (fixup) + mode <- "w+b" + + args <- list( + url = url, + destfile = destfile, + mode = mode, + quiet = TRUE + ) + + if ("headers" %in% names(formals(utils::download.file))) { + headers <- renv_bootstrap_download_custom_headers(url) + if (length(headers) && is.character(headers)) + args$headers <- headers + } + + do.call(utils::download.file, args) + + } + + renv_bootstrap_download_custom_headers <- function(url) { + + headers <- getOption("renv.download.headers") + if (is.null(headers)) + return(character()) + + if (!is.function(headers)) + stopf("'renv.download.headers' is not a function") + + headers <- headers(url) + if (length(headers) == 0L) + return(character()) + + if (is.list(headers)) + headers <- unlist(headers, recursive = FALSE, use.names = TRUE) + + ok <- + is.character(headers) && + is.character(names(headers)) && + all(nzchar(names(headers))) + + if (!ok) + stop("invocation of 'renv.download.headers' did not return a named character vector") + + headers + + } + + renv_bootstrap_download_cran_latest <- function(version) { + + spec <- renv_bootstrap_download_cran_latest_find(version) + type <- spec$type + repos <- spec$repos + + baseurl <- utils::contrib.url(repos = repos, type = type) + ext <- if (identical(type, "source")) + ".tar.gz" + else if (Sys.info()[["sysname"]] == "Windows") + ".zip" + else + ".tgz" + name <- sprintf("renv_%s%s", version, ext) + url <- paste(baseurl, name, sep = "/") + + destfile <- file.path(tempdir(), name) + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (inherits(status, "condition")) + return(FALSE) + + # report success and return + destfile + + } + + renv_bootstrap_download_cran_latest_find <- function(version) { + + # check whether binaries are supported on this system + binary <- + getOption("renv.bootstrap.binary", default = TRUE) && + !identical(.Platform$pkgType, "source") && + !identical(getOption("pkgType"), "source") && + Sys.info()[["sysname"]] %in% c("Darwin", "Windows") + + types <- c(if (binary) "binary", "source") + + # iterate over types + repositories + for (type in types) { + for (repos in renv_bootstrap_repos()) { + + # build arguments for utils::available.packages() call + args <- list(type = type, repos = repos) + + # add custom headers if available -- note that + # utils::available.packages() will pass this to download.file() + if ("headers" %in% names(formals(utils::download.file))) { + headers <- renv_bootstrap_download_custom_headers(repos) + if (length(headers) && is.character(headers)) + args$headers <- headers + } + + # retrieve package database + db <- tryCatch( + as.data.frame( + do.call(utils::available.packages, args), + stringsAsFactors = FALSE + ), + error = identity + ) + + if (inherits(db, "error")) + next + + # check for compatible entry + entry <- db[db$Package %in% "renv" & db$Version %in% version, ] + if (nrow(entry) == 0) + next + + # found it; return spec to caller + spec <- list(entry = entry, type = type, repos = repos) + return(spec) + + } + } + + # if we got here, we failed to find renv + fmt <- "renv %s is not available from your declared package repositories" + stop(sprintf(fmt, version)) + + } + + renv_bootstrap_download_cran_archive <- function(version) { + + name <- sprintf("renv_%s.tar.gz", version) + repos <- renv_bootstrap_repos() + urls <- file.path(repos, "src/contrib/Archive/renv", name) + destfile <- file.path(tempdir(), name) + + for (url in urls) { + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (identical(status, 0L)) + return(destfile) + + } + + return(FALSE) + + } + + renv_bootstrap_download_tarball <- function(version) { + + # if the user has provided the path to a tarball via + # an environment variable, then use it + tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA) + if (is.na(tarball)) + return() + + # allow directories + if (dir.exists(tarball)) { + name <- sprintf("renv_%s.tar.gz", version) + tarball <- file.path(tarball, name) + } + + # bail if it doesn't exist + if (!file.exists(tarball)) { + + # let the user know we weren't able to honour their request + fmt <- "- RENV_BOOTSTRAP_TARBALL is set (%s) but does not exist." + msg <- sprintf(fmt, tarball) + warning(msg) + + # bail + return() + + } + + catf("- Using local tarball '%s'.", tarball) + tarball + + } + + renv_bootstrap_github_token <- function() { + for (envvar in c("GITHUB_TOKEN", "GITHUB_PAT", "GH_TOKEN")) { + envval <- Sys.getenv(envvar, unset = NA) + if (!is.na(envval)) + return(envval) + } + } + + renv_bootstrap_download_github <- function(version) { + + enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") + if (!identical(enabled, "TRUE")) + return(FALSE) + + # prepare download options + token <- renv_bootstrap_github_token() + if (nzchar(Sys.which("curl")) && nzchar(token)) { + fmt <- "--location --fail --header \"Authorization: token %s\"" + extra <- sprintf(fmt, token) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "curl", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } else if (nzchar(Sys.which("wget")) && nzchar(token)) { + fmt <- "--header=\"Authorization: token %s\"" + extra <- sprintf(fmt, token) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "wget", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } + + url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version) + name <- sprintf("renv_%s.tar.gz", version) + destfile <- file.path(tempdir(), name) + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (!identical(status, 0L)) + return(FALSE) + + renv_bootstrap_download_augment(destfile) + + return(destfile) + + } + + # Add Sha to DESCRIPTION. This is stop gap until #890, after which we + # can use renv::install() to fully capture metadata. + renv_bootstrap_download_augment <- function(destfile) { + sha <- renv_bootstrap_git_extract_sha1_tar(destfile) + if (is.null(sha)) { + return() + } + + # Untar + tempdir <- tempfile("renv-github-") + on.exit(unlink(tempdir, recursive = TRUE), add = TRUE) + untar(destfile, exdir = tempdir) + pkgdir <- dir(tempdir, full.names = TRUE)[[1]] + + # Modify description + desc_path <- file.path(pkgdir, "DESCRIPTION") + desc_lines <- readLines(desc_path) + remotes_fields <- c( + "RemoteType: github", + "RemoteHost: api.github.com", + "RemoteRepo: renv", + "RemoteUsername: rstudio", + "RemotePkgRef: rstudio/renv", + paste("RemoteRef: ", sha), + paste("RemoteSha: ", sha) + ) + writeLines(c(desc_lines[desc_lines != ""], remotes_fields), con = desc_path) + + # Re-tar + local({ + old <- setwd(tempdir) + on.exit(setwd(old), add = TRUE) + + tar(destfile, compression = "gzip") + }) + invisible() + } + + # Extract the commit hash from a git archive. Git archives include the SHA1 + # hash as the comment field of the tarball pax extended header + # (see https://www.kernel.org/pub/software/scm/git/docs/git-archive.html) + # For GitHub archives this should be the first header after the default one + # (512 byte) header. + renv_bootstrap_git_extract_sha1_tar <- function(bundle) { + + # open the bundle for reading + # We use gzcon for everything because (from ?gzcon) + # > Reading from a connection which does not supply a 'gzip' magic + # > header is equivalent to reading from the original connection + conn <- gzcon(file(bundle, open = "rb", raw = TRUE)) + on.exit(close(conn)) + + # The default pax header is 512 bytes long and the first pax extended header + # with the comment should be 51 bytes long + # `52 comment=` (11 chars) + 40 byte SHA1 hash + len <- 0x200 + 0x33 + res <- rawToChar(readBin(conn, "raw", n = len)[0x201:len]) + + if (grepl("^52 comment=", res)) { + sub("52 comment=", "", res) + } else { + NULL + } + } + + renv_bootstrap_install <- function(version, tarball, library) { + + # attempt to install it into project library + dir.create(library, showWarnings = FALSE, recursive = TRUE) + output <- renv_bootstrap_install_impl(library, tarball) + + # check for successful install + status <- attr(output, "status") + if (is.null(status) || identical(status, 0L)) + return(status) + + # an error occurred; report it + header <- "installation of renv failed" + lines <- paste(rep.int("=", nchar(header)), collapse = "") + text <- paste(c(header, lines, output), collapse = "\n") + stop(text) + + } + + renv_bootstrap_install_impl <- function(library, tarball) { + + # invoke using system2 so we can capture and report output + bin <- R.home("bin") + exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" + R <- file.path(bin, exe) + + args <- c( + "--vanilla", "CMD", "INSTALL", "--no-multiarch", + "-l", shQuote(path.expand(library)), + shQuote(path.expand(tarball)) + ) + + system2(R, args, stdout = TRUE, stderr = TRUE) + + } + + renv_bootstrap_platform_prefix <- function() { + + # construct version prefix + version <- paste(R.version$major, R.version$minor, sep = ".") + prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") + + # include SVN revision for development versions of R + # (to avoid sharing platform-specific artefacts with released versions of R) + devel <- + identical(R.version[["status"]], "Under development (unstable)") || + identical(R.version[["nickname"]], "Unsuffered Consequences") + + if (devel) + prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") + + # build list of path components + components <- c(prefix, R.version$platform) + + # include prefix if provided by user + prefix <- renv_bootstrap_platform_prefix_impl() + if (!is.na(prefix) && nzchar(prefix)) + components <- c(prefix, components) + + # build prefix + paste(components, collapse = "/") + + } + + renv_bootstrap_platform_prefix_impl <- function() { + + # if an explicit prefix has been supplied, use it + prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA) + if (!is.na(prefix)) + return(prefix) + + # if the user has requested an automatic prefix, generate it + auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) + if (is.na(auto) && getRversion() >= "4.4.0") + auto <- "TRUE" + + if (auto %in% c("TRUE", "True", "true", "1")) + return(renv_bootstrap_platform_prefix_auto()) + + # empty string on failure + "" + + } + + renv_bootstrap_platform_prefix_auto <- function() { + + prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity) + if (inherits(prefix, "error") || prefix %in% "unknown") { + + msg <- paste( + "failed to infer current operating system", + "please file a bug report at https://github.com/rstudio/renv/issues", + sep = "; " + ) + + warning(msg) + + } + + prefix + + } + + renv_bootstrap_platform_os <- function() { + + sysinfo <- Sys.info() + sysname <- sysinfo[["sysname"]] + + # handle Windows + macOS up front + if (sysname == "Windows") + return("windows") + else if (sysname == "Darwin") + return("macos") + + # check for os-release files + for (file in c("/etc/os-release", "/usr/lib/os-release")) + if (file.exists(file)) + return(renv_bootstrap_platform_os_via_os_release(file, sysinfo)) + + # check for redhat-release files + if (file.exists("/etc/redhat-release")) + return(renv_bootstrap_platform_os_via_redhat_release()) + + "unknown" + + } + + renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) { + + # read /etc/os-release + release <- utils::read.table( + file = file, + sep = "=", + quote = c("\"", "'"), + col.names = c("Key", "Value"), + comment.char = "#", + stringsAsFactors = FALSE + ) + + vars <- as.list(release$Value) + names(vars) <- release$Key + + # get os name + os <- tolower(sysinfo[["sysname"]]) + + # read id + id <- "unknown" + for (field in c("ID", "ID_LIKE")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + id <- vars[[field]] + break + } + } + + # read version + version <- "unknown" + for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + version <- vars[[field]] + break + } + } + + # join together + paste(c(os, id, version), collapse = "-") + + } + + renv_bootstrap_platform_os_via_redhat_release <- function() { + + # read /etc/redhat-release + contents <- readLines("/etc/redhat-release", warn = FALSE) + + # infer id + id <- if (grepl("centos", contents, ignore.case = TRUE)) + "centos" + else if (grepl("redhat", contents, ignore.case = TRUE)) + "redhat" + else + "unknown" + + # try to find a version component (very hacky) + version <- "unknown" + + parts <- strsplit(contents, "[[:space:]]")[[1L]] + for (part in parts) { + + nv <- tryCatch(numeric_version(part), error = identity) + if (inherits(nv, "error")) + next + + version <- nv[1, 1] + break + + } + + paste(c("linux", id, version), collapse = "-") + + } + + renv_bootstrap_library_root_name <- function(project) { + + # use project name as-is if requested + asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE") + if (asis) + return(basename(project)) + + # otherwise, disambiguate based on project's path + id <- substring(renv_bootstrap_hash_text(project), 1L, 8L) + paste(basename(project), id, sep = "-") + + } + + renv_bootstrap_library_root <- function(project) { + + prefix <- renv_bootstrap_profile_prefix() + + path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) + if (!is.na(path)) + return(paste(c(path, prefix), collapse = "/")) + + path <- renv_bootstrap_library_root_impl(project) + if (!is.null(path)) { + name <- renv_bootstrap_library_root_name(project) + return(paste(c(path, prefix, name), collapse = "/")) + } + + renv_bootstrap_paths_renv("library", project = project) + + } + + renv_bootstrap_library_root_impl <- function(project) { + + root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) + if (!is.na(root)) + return(root) + + type <- renv_bootstrap_project_type(project) + if (identical(type, "package")) { + userdir <- renv_bootstrap_user_dir() + return(file.path(userdir, "library")) + } + + } + + renv_bootstrap_validate_version <- function(version, description = NULL) { + + # resolve description file + # + # avoid passing lib.loc to `packageDescription()` below, since R will + # use the loaded version of the package by default anyhow. note that + # this function should only be called after 'renv' is loaded + # https://github.com/rstudio/renv/issues/1625 + description <- description %||% packageDescription("renv") + + # check whether requested version 'version' matches loaded version of renv + sha <- attr(version, "sha", exact = TRUE) + valid <- if (!is.null(sha)) + renv_bootstrap_validate_version_dev(sha, description) + else + renv_bootstrap_validate_version_release(version, description) + + if (valid) + return(TRUE) + + # the loaded version of renv doesn't match the requested version; + # give the user instructions on how to proceed + dev <- identical(description[["RemoteType"]], "github") + remote <- if (dev) + paste("rstudio/renv", description[["RemoteSha"]], sep = "@") + else + paste("renv", description[["Version"]], sep = "@") + + # display both loaded version + sha if available + friendly <- renv_bootstrap_version_friendly( + version = description[["Version"]], + sha = if (dev) description[["RemoteSha"]] + ) + + fmt <- heredoc(" + renv %1$s was loaded from project library, but this project is configured to use renv %2$s. + - Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile. + - Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library. + ") + catf(fmt, friendly, renv_bootstrap_version_friendly(version), remote) + + FALSE + + } + + renv_bootstrap_validate_version_dev <- function(version, description) { + expected <- description[["RemoteSha"]] + is.character(expected) && startswith(expected, version) + } + + renv_bootstrap_validate_version_release <- function(version, description) { + expected <- description[["Version"]] + is.character(expected) && identical(expected, version) + } + + renv_bootstrap_hash_text <- function(text) { + + hashfile <- tempfile("renv-hash-") + on.exit(unlink(hashfile), add = TRUE) + + writeLines(text, con = hashfile) + tools::md5sum(hashfile) + + } + + renv_bootstrap_load <- function(project, libpath, version) { + + # try to load renv from the project library + if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) + return(FALSE) + + # warn if the version of renv loaded does not match + renv_bootstrap_validate_version(version) + + # execute renv load hooks, if any + hooks <- getHook("renv::autoload") + for (hook in hooks) + if (is.function(hook)) + tryCatch(hook(), error = warnify) + + # load the project + renv::load(project) + + TRUE + + } + + renv_bootstrap_profile_load <- function(project) { + + # if RENV_PROFILE is already set, just use that + profile <- Sys.getenv("RENV_PROFILE", unset = NA) + if (!is.na(profile) && nzchar(profile)) + return(profile) + + # check for a profile file (nothing to do if it doesn't exist) + path <- renv_bootstrap_paths_renv("profile", profile = FALSE, project = project) + if (!file.exists(path)) + return(NULL) + + # read the profile, and set it if it exists + contents <- readLines(path, warn = FALSE) + if (length(contents) == 0L) + return(NULL) + + # set RENV_PROFILE + profile <- contents[[1L]] + if (!profile %in% c("", "default")) + Sys.setenv(RENV_PROFILE = profile) + + profile + + } + + renv_bootstrap_profile_prefix <- function() { + profile <- renv_bootstrap_profile_get() + if (!is.null(profile)) + return(file.path("profiles", profile, "renv")) + } + + renv_bootstrap_profile_get <- function() { + profile <- Sys.getenv("RENV_PROFILE", unset = "") + renv_bootstrap_profile_normalize(profile) + } + + renv_bootstrap_profile_set <- function(profile) { + profile <- renv_bootstrap_profile_normalize(profile) + if (is.null(profile)) + Sys.unsetenv("RENV_PROFILE") + else + Sys.setenv(RENV_PROFILE = profile) + } + + renv_bootstrap_profile_normalize <- function(profile) { + + if (is.null(profile) || profile %in% c("", "default")) + return(NULL) + + profile + + } + + renv_bootstrap_path_absolute <- function(path) { + + substr(path, 1L, 1L) %in% c("~", "/", "\\") || ( + substr(path, 1L, 1L) %in% c(letters, LETTERS) && + substr(path, 2L, 3L) %in% c(":/", ":\\") + ) + + } + + renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) { + renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv") + root <- if (renv_bootstrap_path_absolute(renv)) NULL else project + prefix <- if (profile) renv_bootstrap_profile_prefix() + components <- c(root, renv, prefix, ...) + paste(components, collapse = "/") + } + + renv_bootstrap_project_type <- function(path) { + + descpath <- file.path(path, "DESCRIPTION") + if (!file.exists(descpath)) + return("unknown") + + desc <- tryCatch( + read.dcf(descpath, all = TRUE), + error = identity + ) + + if (inherits(desc, "error")) + return("unknown") + + type <- desc$Type + if (!is.null(type)) + return(tolower(type)) + + package <- desc$Package + if (!is.null(package)) + return("package") + + "unknown" + + } + + renv_bootstrap_user_dir <- function() { + dir <- renv_bootstrap_user_dir_impl() + path.expand(chartr("\\", "/", dir)) + } + + renv_bootstrap_user_dir_impl <- function() { + + # use local override if set + override <- getOption("renv.userdir.override") + if (!is.null(override)) + return(override) + + # use R_user_dir if available + tools <- asNamespace("tools") + if (is.function(tools$R_user_dir)) + return(tools$R_user_dir("renv", "cache")) + + # try using our own backfill for older versions of R + envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME") + for (envvar in envvars) { + root <- Sys.getenv(envvar, unset = NA) + if (!is.na(root)) + return(file.path(root, "R/renv")) + } + + # use platform-specific default fallbacks + if (Sys.info()[["sysname"]] == "Windows") + file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv") + else if (Sys.info()[["sysname"]] == "Darwin") + "~/Library/Caches/org.R-project.R/R/renv" + else + "~/.cache/R/renv" + + } + + renv_bootstrap_version_friendly <- function(version, shafmt = NULL, sha = NULL) { + sha <- sha %||% attr(version, "sha", exact = TRUE) + parts <- c(version, sprintf(shafmt %||% " [sha: %s]", substring(sha, 1L, 7L))) + paste(parts, collapse = "") + } + + renv_bootstrap_exec <- function(project, libpath, version) { + if (!renv_bootstrap_load(project, libpath, version)) + renv_bootstrap_run(version, libpath) + } + + renv_bootstrap_run <- function(version, libpath) { + + # perform bootstrap + bootstrap(version, libpath) + + # exit early if we're just testing bootstrap + if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA))) + return(TRUE) + + # try again to load + if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) { + return(renv::load(project = getwd())) + } + + # failed to download or load renv; warn the user + msg <- c( + "Failed to find an renv installation: the project will not be loaded.", + "Use `renv::activate()` to re-initialize the project." + ) + + warning(paste(msg, collapse = "\n"), call. = FALSE) + + } + + renv_json_read <- function(file = NULL, text = NULL) { + + jlerr <- NULL + + # if jsonlite is loaded, use that instead + if ("jsonlite" %in% loadedNamespaces()) { + + json <- tryCatch(renv_json_read_jsonlite(file, text), error = identity) + if (!inherits(json, "error")) + return(json) + + jlerr <- json + + } + + # otherwise, fall back to the default JSON reader + json <- tryCatch(renv_json_read_default(file, text), error = identity) + if (!inherits(json, "error")) + return(json) + + # report an error + if (!is.null(jlerr)) + stop(jlerr) + else + stop(json) + + } + + renv_json_read_jsonlite <- function(file = NULL, text = NULL) { + text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n") + jsonlite::fromJSON(txt = text, simplifyVector = FALSE) + } + + renv_json_read_default <- function(file = NULL, text = NULL) { + + # find strings in the JSON + text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n") + pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]' + locs <- gregexpr(pattern, text, perl = TRUE)[[1]] + + # if any are found, replace them with placeholders + replaced <- text + strings <- character() + replacements <- character() + + if (!identical(c(locs), -1L)) { + + # get the string values + starts <- locs + ends <- locs + attr(locs, "match.length") - 1L + strings <- substring(text, starts, ends) + + # only keep those requiring escaping + strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE) + + # compute replacements + replacements <- sprintf('"\032%i\032"', seq_along(strings)) + + # replace the strings + mapply(function(string, replacement) { + replaced <<- sub(string, replacement, replaced, fixed = TRUE) + }, strings, replacements) + + } + + # transform the JSON into something the R parser understands + transformed <- replaced + transformed <- gsub("{}", "`names<-`(list(), character())", transformed, fixed = TRUE) + transformed <- gsub("[[{]", "list(", transformed, perl = TRUE) + transformed <- gsub("[]}]", ")", transformed, perl = TRUE) + transformed <- gsub(":", "=", transformed, fixed = TRUE) + text <- paste(transformed, collapse = "\n") + + # parse it + json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]] + + # construct map between source strings, replaced strings + map <- as.character(parse(text = strings)) + names(map) <- as.character(parse(text = replacements)) + + # convert to list + map <- as.list(map) + + # remap strings in object + remapped <- renv_json_read_remap(json, map) + + # evaluate + eval(remapped, envir = baseenv()) + + } + + renv_json_read_remap <- function(json, map) { + + # fix names + if (!is.null(names(json))) { + lhs <- match(names(json), names(map), nomatch = 0L) + rhs <- match(names(map), names(json), nomatch = 0L) + names(json)[rhs] <- map[lhs] + } + + # fix values + if (is.character(json)) + return(map[[json]] %||% json) + + # handle true, false, null + if (is.name(json)) { + text <- as.character(json) + if (text == "true") + return(TRUE) + else if (text == "false") + return(FALSE) + else if (text == "null") + return(NULL) + } + + # recurse + if (is.recursive(json)) { + for (i in seq_along(json)) { + json[i] <- list(renv_json_read_remap(json[[i]], map)) + } + } + + json + + } + + # load the renv profile, if any + renv_bootstrap_profile_load(project) + + # construct path to library root + root <- renv_bootstrap_library_root(project) + + # construct library prefix for platform + prefix <- renv_bootstrap_platform_prefix() + + # construct full libpath + libpath <- file.path(root, prefix) + + # run bootstrap code + renv_bootstrap_exec(project, libpath, version) + + invisible() + +}) diff --git a/analyses/cell-type-dsrct/renv/settings.json b/analyses/cell-type-dsrct/renv/settings.json new file mode 100644 index 000000000..ffdbb3200 --- /dev/null +++ b/analyses/cell-type-dsrct/renv/settings.json @@ -0,0 +1,19 @@ +{ + "bioconductor.version": null, + "external.libraries": [], + "ignored.packages": [], + "package.dependency.fields": [ + "Imports", + "Depends", + "LinkingTo" + ], + "ppm.enabled": null, + "ppm.ignored.urls": [], + "r.version": null, + "snapshot.type": "implicit", + "use.cache": true, + "vcs.ignore.cellar": true, + "vcs.ignore.library": true, + "vcs.ignore.local": true, + "vcs.manage.ignores": true +} diff --git a/analyses/cell-type-wilms-tumor-14/Dockerfile b/analyses/cell-type-wilms-tumor-14/Dockerfile index 80010ba65..36b0900c1 100644 --- a/analyses/cell-type-wilms-tumor-14/Dockerfile +++ b/analyses/cell-type-wilms-tumor-14/Dockerfile @@ -1,10 +1,35 @@ -# A template docker file for creating a new analysis -FROM ubuntu:22.04 + +FROM bioconductor/r-ver:3.19 # Labels following the Open Containers Initiative (OCI) recommendations # For more information, see https://specs.opencontainers.org/image-spec/annotations/?v=v1.0.1 +LABEL org.opencontainers.image.title="openscpca/cell-type-wilms-tumor-14" +LABEL org.opencontainers.image.description="Docker image for the OpenScPCA analysis module 'cell-type-wilms-tumor-14'" LABEL org.opencontainers.image.authors="OpenScPCA scpca@ccdatalab.org" -LABEL org.opencontainers.image.source="https://github.com/AlexsLemonade/OpenScPCA-analysis/tree/main/templates/analysis-module" +LABEL org.opencontainers.image.source="https://github.com/AlexsLemonade/OpenScPCA-analysis/tree/main/analyses/cell-type-wilms-tumor-14" # Set an environment variable to allow checking if we are in an OpenScPCA container ENV OPENSCPCA_DOCKER=TRUE + +# Install renv +RUN Rscript -e "install.packages('renv')" + +# Disable the renv cache to install packages directly into the R library +ENV RENV_CONFIG_CACHE_ENABLED=FALSE + +# Copy the renv.lock file from the host environment to the image +COPY renv.lock renv.lock + +# Temporarily install Rhtslib separately +RUN Rscript -e 'BiocManager::install("Rhtslib")' + +# restore from renv.lock file and clean up to reduce image size +RUN Rscript -e 'renv::restore()' \ + && rm -rf ~/.cache/R/renv \ + && rm -rf /tmp/downloaded_packages \ + && rm -rf /tmp/Rtmp* + +# Complete installation of zellkonverter conda env +ENV BASILISK_EXTERNAL_DIR=/usr/local/renv/basilisk +RUN Rscript -e "proc <- basilisk::basiliskStart(env = zellkonverter::zellkonverterAnnDataEnv(), testload = 'anndata'); \ + basilisk::basiliskStop(proc)" diff --git a/analyses/cell-type-wilms-tumor-14/renv.lock b/analyses/cell-type-wilms-tumor-14/renv.lock index 1705449df..4aa13b3e2 100644 --- a/analyses/cell-type-wilms-tumor-14/renv.lock +++ b/analyses/cell-type-wilms-tumor-14/renv.lock @@ -159,6 +159,22 @@ ], "Hash": "395472c65cd9d606a1a345687102f299" }, + "DelayedMatrixStats": { + "Package": "DelayedMatrixStats", + "Version": "1.26.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "DelayedArray", + "IRanges", + "Matrix", + "MatrixGenerics", + "S4Vectors", + "methods", + "sparseMatrixStats" + ], + "Hash": "5d9536664ccddb0eaa68a90afe4ee76e" + }, "Deriv": { "Package": "Deriv", "Version": "4.1.6", @@ -248,6 +264,29 @@ ], "Hash": "a3c822ef3c124828e25e7a9611beeb50" }, + "HDF5Array": { + "Package": "HDF5Array", + "Version": "1.32.1", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "DelayedArray", + "IRanges", + "Matrix", + "R", + "Rhdf5lib", + "S4Arrays", + "S4Vectors", + "methods", + "rhdf5", + "rhdf5filters", + "stats", + "tools", + "utils" + ], + "Hash": "420012f82591a2a20156ef65d4aa210a" + }, "HiddenMarkov": { "Package": "HiddenMarkov", "Version": "1.8-13", @@ -531,6 +570,16 @@ ], "Hash": "c232938949fcd8126034419cc529333a" }, + "Rhdf5lib": { + "Package": "Rhdf5lib", + "Version": "1.26.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "R" + ], + "Hash": "c92ba8b9a2c5c9ff600a1062a3b7b727" + }, "RhpcBLASctl": { "Package": "RhpcBLASctl", "Version": "0.23-42", @@ -944,6 +993,21 @@ ], "Hash": "39d6ecdea862d961c3dfe4d4d7c57920" }, + "beachmat": { + "Package": "beachmat", + "Version": "2.20.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "DelayedArray", + "Matrix", + "Rcpp", + "SparseArray", + "methods" + ], + "Hash": "10e94b1bce9070632a40c6b873f8b2d4" + }, "bit": { "Package": "bit", "Version": "4.5.0", @@ -1753,6 +1817,32 @@ ], "Hash": "a57f0f5dbcfd0d77ad4ff33032f5dc79" }, + "glmGamPoi": { + "Package": "glmGamPoi", + "Version": "1.16.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "BiocGenerics", + "DelayedArray", + "DelayedMatrixStats", + "HDF5Array", + "MatrixGenerics", + "Rcpp", + "RcppArmadillo", + "SingleCellExperiment", + "SummarizedExperiment", + "beachmat", + "matrixStats", + "methods", + "rlang", + "splines", + "stats", + "utils", + "vctrs" + ], + "Hash": "21e305cf5faebb13bee698a5a1c4bced" + }, "globals": { "Package": "globals", "Version": "0.16.3", @@ -2910,6 +3000,29 @@ ], "Hash": "e1a5d04397edc1580c5e0ed1dbdccf76" }, + "rhdf5": { + "Package": "rhdf5", + "Version": "2.48.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "R", + "Rhdf5lib", + "methods", + "rhdf5filters" + ], + "Hash": "74d8c5aeb96d090ce8efc9ffd16afa2b" + }, + "rhdf5filters": { + "Package": "rhdf5filters", + "Version": "1.16.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "Rhdf5lib" + ], + "Hash": "99e15369f8fb17dc188377234de13fc6" + }, "rjags": { "Package": "rjags", "Version": "4-16", @@ -3231,6 +3344,20 @@ ], "Hash": "ffe1f9e95a4375530747b268f82b5086" }, + "sparseMatrixStats": { + "Package": "sparseMatrixStats", + "Version": "1.16.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.19", + "Requirements": [ + "Matrix", + "MatrixGenerics", + "Rcpp", + "matrixStats", + "methods" + ], + "Hash": "7e500a5a527460ca0406473bdcade286" + }, "spatstat.data": { "Package": "spatstat.data", "Version": "3.1-2", diff --git a/analyses/cell-type-wilms-tumor-14/scripts/00_preprocess_reference.R b/analyses/cell-type-wilms-tumor-14/scripts/00_preprocess_reference.R index fc9908925..379987072 100644 --- a/analyses/cell-type-wilms-tumor-14/scripts/00_preprocess_reference.R +++ b/analyses/cell-type-wilms-tumor-14/scripts/00_preprocess_reference.R @@ -32,6 +32,7 @@ library(Seurat) library(ggpubr) library(zellkonverter) library(SingleCellExperiment) +library(glmGamPoi) prepare_fetal_atlas <- function(in_fetal_atlas = in_fetal_atlas, out_fetal_atlas = out_fetal_atlas, diff --git a/analyses/cell-type-wilms-tumor-14/scripts/utils/00_preprocessing_rds_functions.R b/analyses/cell-type-wilms-tumor-14/scripts/utils/00_preprocessing_rds_functions.R index 184da9f0e..17c02930c 100644 --- a/analyses/cell-type-wilms-tumor-14/scripts/utils/00_preprocessing_rds_functions.R +++ b/analyses/cell-type-wilms-tumor-14/scripts/utils/00_preprocessing_rds_functions.R @@ -1,6 +1,8 @@ library(dplyr) library(Seurat) library(ggpubr) +library(glmGamPoi) + pre_seuratobj <- function(obj, nfeatures = 500, run_harmony = TRUE, reduction = "harmony", ndims = 50, skip_logNorm = TRUE){ ######## Normalize, scale, feature selection