Skip to content

Commit

Permalink
Merge pull request #58 from AllenInstitute/update_mappings
Browse files Browse the repository at this point in the history
fix hierarchical and Seurat mappings
  • Loading branch information
UCDNJJ authored Nov 18, 2024
2 parents 1acbd66 + 5a9f2c1 commit 030f078
Show file tree
Hide file tree
Showing 27 changed files with 768 additions and 78 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: scrattch.mapping
Title: Generalized mapping of annotations from shiny taxonomy to query
data.
Version: 0.55.5
Version: 0.55.6
Authors@R:
person(given = "Nelson",
family = "Johansen",
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

export(corrMap)
export(getMappingResults)
export(get_hierarchical_extended_results)
export(hierarchicalMapMyCells)
export(map_by_cor)
export(map_dend)
export(mappingClass)
export(mappingMode)
Expand All @@ -15,8 +17,10 @@ exportClasses(mappingClass)
exportMethods(getMappingResults)
import(MatrixGenerics)
import(WGCNA)
import(anndata)
import(doMC)
import(foreach)
import(jsonlite)
import(mfishtools)
import(randomForest)
import(scrattch.hicat)
98 changes: 94 additions & 4 deletions R/hierarchicalMapMyCells.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ hierarchicalMapMyCells = function(AIT_anndata,
}

if (file.exists(extended_result_path)) {
stop(paste("ERROR. Extended result path already exists:", extended_result_path))
old_result_path <- gsub(".json","_OLD.json",extended_result_path)
file.rename(extended_result_path,old_result_path)
warning(paste0("WARNING. Extended result path already exists: ", extended_result_path,
". Overwriting this file after moving existing file to ",gsub(".json","_OLD.json",extended_result_path)))
}

taxonomy_anndata_path = file.path(AIT_anndata$uns$taxonomyDir, paste0(AIT_anndata$uns$title, ".h5ad"))
Expand Down Expand Up @@ -80,9 +83,13 @@ hierarchicalMapMyCells = function(AIT_anndata,
mapmycells_results_json$results[[hierarcy_level]]$avg_correlation
}
}

## Extract additional hierarchical results from top 5 runner-ups
runnerUps = get_hierarchical_extended_results(extended_result_path)
runnerUps = runnerUps[,unique(colnames(runnerUps))] # If there are duplicates, take the first

## Return annotations and detailed model results
return(mappingResults)
return(list("result"=mappingResults, "detail"=runnerUps))
},
error = function(e) {
errorMessage <- conditionMessage(e)
Expand Down Expand Up @@ -209,7 +216,7 @@ get_query_data_path = function(query_data, temp_folder) {
#' @param n_processors Number of independent worker processes to spin up.
#' @param normalization Normalization of the h5ad files; must be either 'raw' or 'log2CPM'.
#'
#' @return
#' @return resuls
#'
#' @keywords internal
get_mapmycells_results = function(query_data_output_path, extended_result_path,
Expand Down Expand Up @@ -242,4 +249,87 @@ list_function_params = function() {
command <- "python -m cell_type_mapper.cli.from_specified_markers --help"
output <- system(command, intern = TRUE)
cat(output, sep = "\n")
}
}



#' This function returns names and bootstrap probabilities from all top mapped cell sets
#'
#' @param extended_result_path Full file path and name where the original mapping results will be saved.
#'
#' @import jsonlite
#'
#' @return A table of the top cell set names and bootstrap probabilities from top results and runner-up results for each level of the hierarchy included in the extended_result_path file. By default, this is the top 5 most likely cell sets for either (1) multiple hierachy levels in hierarchical mapping or (2) a single hierarchy level for correlation mapping.
#'
#' @export
get_hierarchical_extended_results <- function(extended_result_path){

## Extract mapping results
mapmycells_results_json = fromJSON(extended_result_path)
cell_id = as.character(as.matrix(mapmycells_results_json$results$cell_id))

## Build mapping results dataframe
results_all=NULL
for (hierarcy_level in names(mapmycells_results_json$results)) {
if (hierarcy_level != "cell_id") {
# Pull in the information
results = list()
results[["assignment"]] =
mapmycells_results_json$results[[hierarcy_level]]$runner_up_assignment
results[["probability"]] =
mapmycells_results_json$results[[hierarcy_level]]$runner_up_probability
results[["correlation"]] =
mapmycells_results_json$results[[hierarcy_level]]$runner_up_correlation

# Determine how many runner up slots are needed
maxLen <- 0
for (i in 1:length(results[["assignment"]])){
maxLen <- max(maxLen,length(results[["assignment"]][[i]]))
}

# Create matrices for relevant info
assignment <- matrix(nrow=length(results[["assignment"]]),ncol=maxLen)
rownames(assignment) <- cell_id
probability <- correlation <- assignment
colnames(assignment) <- paste0(hierarcy_level,"_assignment_runner_up_",1:maxLen)
colnames(correlation) <- paste0(hierarcy_level,"_avg_correlation_runner_up_",1:maxLen)
colnames(probability) <- paste0(hierarcy_level,"_bootstrap_probability_runner_up_",1:maxLen)

for (i in 1:length(results[["assignment"]])){
len = length(results[["assignment"]][[i]])
if(len>0){
assignment[i,1:len] = results[["assignment"]][[i]]
correlation[i,1:len] = results[["correlation"]][[i]]
probability[i,1:len] = results[["probability"]][[i]]
}
}
assignment[is.na(assignment)] = ""
correlation[is.na(correlation)] = 0
probability[is.na(probability)] = 0

# Merge top results and runner-up results into a single data.frame
results_current <- data.frame(
XXXX = as.vector(mapmycells_results_json$results[[hierarcy_level]]$assignment),
assignment,
YYYY = as.vector(mapmycells_results_json$results[[hierarcy_level]]$bootstrapping_probability),
probability,
ZZZZ = as.vector(mapmycells_results_json$results[[hierarcy_level]]$avg_correlation),
correlation
)
colnames(results_current) <- gsub("XXXX",paste0(hierarcy_level,"_assignment_winner"),colnames(results_current))
colnames(results_current) <- gsub("YYYY",paste0(hierarcy_level,"_bootstrapping_probability_winner"),colnames(results_current))
colnames(results_current) <- gsub("ZZZZ",paste0(hierarcy_level,"_avg_correlation_winner"),colnames(results_current))
}

# Combine current results into previous results data frame
if (length(results_all)==0) {
results_all = results_current
} else {
results_all = cbind(results_all,results_current)
}
}

# Output results for the whole hierarchy
return(results_all)
}

28 changes: 25 additions & 3 deletions R/mappingClass.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ setClass(
#' This function instantiates a mappingClass S4 class object.
#'
#' @param annotations A reference taxonomy anndata object.
#' @param detailed_results The number of cells to retain per cluster (default = 100).
#' @param detailed_results a method-specific set of additional mapping output
#'
#' @examples
#' resultAnno <- mappingClass(
Expand Down Expand Up @@ -48,7 +48,7 @@ mappingClass <- function(annotations, detailed_results) {
#'
#' @export
setGeneric("getMappingResults",
function(object, scores = FALSE) standardGeneric("getMappingResults")
function(object, scores = TRUE) standardGeneric("getMappingResults")
)

#' Get cell type annotations
Expand All @@ -61,7 +61,29 @@ setGeneric("getMappingResults",
setMethod(
"getMappingResults",
signature(object="mappingClass", scores="logical"),
definition = function(object, scores = FALSE) {
definition = function(object, scores = TRUE) {
mapping.anno = object@annotations
if (!scores) {
score.cols = sapply(mapping.anno, is.numeric)
mapping.anno = mapping.anno[, !score.cols]
}
return(mapping.anno)
}
)

#' Get cell type annotations
#'
#' Extract cell type annotations from mappingClass S4 class
#'
#' @return mapping results as a data.frame with labels in map.Method and scores in score.Method
#'
#' @export
setMethod(
"getMappingResults",
signature(object="mappingClass", scores="missing"),
definition = function(object) {
# If scores not provided, set as TRUE by default. Unclear why we need a separate solution for this.
scores = TRUE
mapping.anno = object@annotations
if (!scores) {
score.cols = sapply(mapping.anno, is.numeric)
Expand Down
27 changes: 12 additions & 15 deletions R/seuratMap.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,22 @@ seuratMap = function(AIT.anndata, query.data, dims=30, k.weight=5){
expr = {

## Build Query Seruat object
query.seurat = CreateSeuratObject(query.data[AIT.anndata$var_names[AIT.anndata$var$highly_variable_genes],])
query.seurat = SetAssayData(query.seurat, slot = "data", new.data = query.data[AIT.anndata$var_names[AIT.anndata$var$highly_variable_genes],], assay = "RNA")
use.genes = intersect(rownames(query.data),AIT.anndata$var_names[AIT.anndata$var$highly_variable_genes])
query.seurat = suppressWarnings(CreateSeuratObject(query.data[use.genes,]))
query.seurat = suppressWarnings(SetAssayData(query.seurat, slot = "data", new.data = query.data[use.genes,], assay = "RNA"))
VariableFeatures(query.seurat) <- use.genes

## Build Ref Seurat object
ref.seurat = suppressWarnings(CreateSeuratObject(t(AIT.anndata$X[,AIT.anndata$var$highly_variable_genes]), meta.data=as.data.frame(AIT.anndata$obs)));
ref.seurat = SetAssayData(ref.seurat, slot = "data", new.data = t(AIT.anndata$X[,AIT.anndata$var$highly_variable_genes]), assay = "RNA")

## Create a data list for label transfer
seurat.list <- list(ref.seurat, query.seurat)
names(seurat.list) <- c("Reference", "Query")

## Compute variable features for each object
for (i in 1:length(x = seurat.list)) VariableFeatures(seurat.list[[i]]) <- AIT.anndata$var_names[AIT.anndata$var$highly_variable_genes]
ref.data = as.matrix(BiocGenerics::t(AIT.anndata$X[,use.genes]))
ref.seurat = suppressWarnings(CreateSeuratObject(ref.data, meta.data=as.data.frame(AIT.anndata$obs)));
ref.seurat = suppressWarnings(SetAssayData(ref.seurat, slot = "data", new.data = ref.data, assay = "RNA"))
VariableFeatures(ref.seurat) <- use.genes

## Seurat label transfer (celltype)
Target.anchors <- FindTransferAnchors(reference = seurat.list[["Reference"]], query = seurat.list[["Query"]],
dims = 1:dims, verbose=FALSE, npcs=dims)
predictions <- TransferData(anchorset = Target.anchors, refdata = seurat.list[["Reference"]]$cluster_label,
dims = 1:dims, verbose=FALSE, k.weight=k.weight)
Target.anchors <- suppressWarnings(FindTransferAnchors(reference = ref.seurat, query = query.seurat,
dims = 1:dims, verbose=FALSE, npcs=dims))
predictions <- suppressWarnings(TransferData(anchorset = Target.anchors, refdata = ref.seurat$cluster_label,
dims = 1:dims, verbose=FALSE, k.weight=k.weight))
## Create results data.frame
mappingTarget = data.frame(map.Seurat=as.character(predictions$predicted.id),
score.Seurat=predictions$prediction.score.max)
Expand Down
31 changes: 24 additions & 7 deletions R/taxonomyMapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#'
#' @param AIT.anndata A reference taxonomy object.
#' @param query.data A logCPM normalized matrix to be annotated.
#' @param label.cols Column names of annotations to map against. Note that this only works for metadata that represent clusters or groups of clusters (e.g., subclass, supertype, neighborhood, class)
#' @param label.cols Column names of annotations to map against. Note that this only works for metadata that represent clusters or groups of clusters (e.g., subclass, supertype, neighborhood, class) and will default to whatever is included in AIT.anndata$uns$hierarchy
#' @param corr.map Should correlation mapping be performed?
#' @param tree.map Should tree mapping be performed?
#' @param seurat.map Should seurat mapping be performed?
Expand All @@ -14,11 +14,15 @@
#' @export
taxonomy_mapping = function(AIT.anndata, query.data,
corr.map=TRUE, tree.map=TRUE, hierarchical.map=TRUE, seurat.map=TRUE,
label.cols = c("cluster_label","subclass_label", "class_label")){
label.cols = AIT.anndata$uns$hierarchy){ # NOTE THE NEW DEFAULT
#label.cols = c("cluster_label","subclass_label", "class_label")){

suppressWarnings({ # wrapping the whole function in suppressWarnings to avoid having this printed a zillion times: 'useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.'

print(paste("==============================","Mapping","======================="))
print(date())
mappingResults=list()
if(sum(class(label.cols)=="list")<1) label.cols = as.character(hierarchy) # Convert from list to character for this function

## Sanity check on user input and taxonomy/reference annotations
if(!all(label.cols %in% colnames(AIT.anndata$uns$clusterInfo))){
Expand Down Expand Up @@ -50,15 +54,25 @@ taxonomy_mapping = function(AIT.anndata, query.data,

#############
## ----- Tree mapping -------------------------------------------------------------------------------
if(tree.map == TRUE & !is.null(AIT.anndata$uns$dend)){ mappingTree = treeMap(AIT.anndata, query.data); mappingResults[["Tree"]] = mappingTree[["result"]] } else{ mappingTree = NULL }
if(tree.map == TRUE & !is.null(AIT.anndata$uns$dend)){
mappingTree = treeMap(AIT.anndata, query.data);
mappingResults[["Tree"]] = mappingTree[["result"]]
} else {
mappingTree = NULL
}

#############
## ----- Seurat mapping ------------------------------------------------------------------------------
if(seurat.map == TRUE){ mappingResults[["Seurat"]] = seuratMap(AIT.anndata, query.data) } else{ mappingResults[["Seurat"]] = NULL }

#############
## ----- Hierarchical mapping ------------------------------------------------------------------------------
if(hierarchical.map == TRUE){ mappingResults[["hierarchical"]] = hierarchicalMapMyCells(AIT.anndata, query.data) } else{ mappingResults[["hierarchical"]] = NULL }
if(hierarchical.map == TRUE){
mappingHierarchical <- hierarchicalMapMyCells(AIT.anndata, query.data)
mappingResults[["hierarchical"]] <- mappingHierarchical[["result"]]
} else {
mappingHierarchical = NULL
}

#############
## Combine mapping results
Expand All @@ -80,10 +94,13 @@ taxonomy_mapping = function(AIT.anndata, query.data,

## Build mapping class object
resultAnno <- mappingClass(annotations = mappingAnno,
detailed_results = list("corr" = NA,
"tree" = mappingTree[["detail"]],
"seurat" = NA))
detailed_results = list("corr" = NA,
"tree" = mappingTree[["detail"]],
"seurat" = NA,
"hierarchical" = mappingHierarchical[["detail"]]))

## Return annotations and detailed model results
return(resultAnno)

}) # End suppressWarnings
}
28 changes: 7 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,20 @@ You can find a detail description of all scrattch.mapping functions here: ![Docu

Update notes are here: ![Versions](https://github.com/AllenInstitute/scrattch-mapping/blob/dev_njj/VERSIONS.md)

## Docker

We have setup a docker environemnt for scrattch.taxonomy and scrattch.mapping that contains all the required dependencies and the current version of all scrattch packages. This docker is accessible through docker hub via: `njjai/scrattch_mapping:0.6.6`.

#### HPC usage:

##### Non-interactive
`singularity exec --cleanenv docker://njjai/scrattch_mapping:0.6.6 Rscript YOUR_CODE.R`

##### Interactive
`singularity shell --cleanenv docker://njjai/scrattch_mapping:0.6.6`


## Installation

While we advice using the provided docker, you can also install scrattch.mapping directly from github as follows:
### Using docker (recommended)
We have setup a docker environment for scrattch.taxonomy, scrattch.mapping, and scrattch.patchseq that contains all the required dependencies and the current version of all scrattch packages. **See [the readme](https://github.com/AllenInstitute/scrattch/blob/master/README.md#using-docker) for [the parent scrattch package](https://github.com/AllenInstitute/scrattch) for the most up-to-date docker information.**

*Note: slight edits to installation will be needed while repo is private. Also note that `doMC` may need to be installed manually from the download at https://r-forge.r-project.org/R/?group_id=947 if you use Windows.*
### Directly from GitHub (strongly discouraged)

```
# Quickly, but without the vignettes:
devtools::install_github("AllenInstitute/scrattch-mapping")
While we advise using the provided docker, you can also install scrattch.mapping directly from GitHub as follows:

# More slowly, but with the vignettes:
devtools::install_github("AllenInstitute/scrattch-mapping",build_vignettes=TRUE, force=TRUE)
```
devtools::install_github("AllenInstitute/scrattch.mapping")
```

Note that this strategy might not work outside the docker due to complicated dependencies. Vignettes are provided below.
This strategy **might not work** due to complicated dependencies. Also note that `doMC` may need to be installed manually from [HERE](https://r-forge.r-project.org/R/?group_id=947) if you use Windows. Vignettes are provided below.

## Usage examples

Expand Down
29 changes: 29 additions & 0 deletions VERSIONS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
## scrattch.mapping v0.55.6

Updates to streamline and fix the examples

### Major changes

### Minor changes
* Updated examples
* Related minor bug fixes and function edits
* Updated help files

--

## scrattch.mapping v0.55.5

Major updates correcting issues with hierarchical and Seurat mapping

### Major changes
* Update `seuratMap.R` to correct Seurat mapping, by rolling back to Seurat v4.4 and largely rewriting the code
* Allowing hierarchical mapping to work on modes other than 'standard'
* Returning detailed information for hierarchical mapping (e.g., top 5 matches and bootstrap probabilities) in main results

### Minor changes
* Additional minor bug fixes
* Updated help files

--


## scrattch.mapping v0.52.2

Major change to how we return mapping results.
Expand Down
Loading

0 comments on commit 030f078

Please sign in to comment.