From 75c1ffcdb3352f9ea29d28441e02687f6942eaee Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Fri, 22 Nov 2024 15:16:32 -0600 Subject: [PATCH 1/8] add an Rproj file --- .../cell-type-consensus/cell-type-consensus.Rproj | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 analyses/cell-type-consensus/cell-type-consensus.Rproj diff --git a/analyses/cell-type-consensus/cell-type-consensus.Rproj b/analyses/cell-type-consensus/cell-type-consensus.Rproj new file mode 100644 index 000000000..8e3c2ebc9 --- /dev/null +++ b/analyses/cell-type-consensus/cell-type-consensus.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX From b4a681efea0f518c38fb53b48cb1527029fcc904 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Fri, 22 Nov 2024 15:28:09 -0600 Subject: [PATCH 2/8] update lock file --- analyses/cell-type-consensus/renv.lock | 602 +++++++++++++++++++++++++ 1 file changed, 602 insertions(+) diff --git a/analyses/cell-type-consensus/renv.lock b/analyses/cell-type-consensus/renv.lock index c0c649b29..4542a7000 100644 --- a/analyses/cell-type-consensus/renv.lock +++ b/analyses/cell-type-consensus/renv.lock @@ -2,6 +2,26 @@ "R": { "Version": "4.4.0", "Repositories": [ + { + "Name": "BioCsoft", + "URL": "https://bioconductor.org/packages/3.19/bioc" + }, + { + "Name": "BioCann", + "URL": "https://bioconductor.org/packages/3.19/data/annotation" + }, + { + "Name": "BioCexp", + "URL": "https://bioconductor.org/packages/3.19/data/experiment" + }, + { + "Name": "BioCworkflows", + "URL": "https://bioconductor.org/packages/3.19/workflows" + }, + { + "Name": "BioCbooks", + "URL": "https://bioconductor.org/packages/3.19/books" + }, { "Name": "CRAN", "URL": "https://p3m.dev/cran/latest" @@ -12,6 +32,384 @@ "Version": "3.19" }, "Packages": { + "Biobase": { + "Package": "Biobase", + "Version": "2.66.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.20", + "Requirements": [ + "BiocGenerics", + "R", + "methods", + "utils" + ], + "Hash": "f6e716bdfed8acfd2d4137be7d4fa8f9" + }, + "BiocGenerics": { + "Package": "BiocGenerics", + "Version": "0.52.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.20", + "Requirements": [ + "R", + "graphics", + "methods", + "stats", + "utils" + ], + "Hash": "3a1a587cfadcfcbf849dfc605cbbb965" + }, + "BiocManager": { + "Package": "BiocManager", + "Version": "1.30.25", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "utils" + ], + "Hash": "3aec5928ca10897d7a0a1205aae64627" + }, + "BiocVersion": { + "Package": "BiocVersion", + "Version": "3.20.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.20", + "Requirements": [ + "R" + ], + "Hash": "3c70eb3b78929c0ee452350cea8432a5" + }, + "R6": { + "Package": "R6", + "Version": "2.5.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "470851b6d5d0ac559e9d01bb352b4021" + }, + "askpass": { + "Package": "askpass", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "sys" + ], + "Hash": "c39f4155b3ceb1a9a2799d700fbd4b6a" + }, + "bit": { + "Package": "bit", + "Version": "4.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "5dc7b2677d65d0e874fc4aaf0e879987" + }, + "bit64": { + "Package": "bit64", + "Version": "4.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "bit", + "methods", + "stats", + "utils" + ], + "Hash": "e84984bf5f12a18628d9a02322128dfd" + }, + "cli": { + "Package": "cli", + "Version": "3.6.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "utils" + ], + "Hash": "b21916dd77a27642b447374a5d30ecf3" + }, + "clipr": { + "Package": "clipr", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "utils" + ], + "Hash": "3f038e5ac7f41d4ac41ce658c85e3042" + }, + "cpp11": { + "Package": "cpp11", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "91570bba75d0c9d3f1040c835cee8fba" + }, + "crayon": { + "Package": "crayon", + "Version": "1.5.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "grDevices", + "methods", + "utils" + ], + "Hash": "859d96e65ef198fd43e82b9628d593ef" + }, + "curl": { + "Package": "curl", + "Version": "6.0.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "e8ba62486230951fcd2b881c5be23f96" + }, + "dplyr": { + "Package": "dplyr", + "Version": "1.1.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "cli", + "generics", + "glue", + "lifecycle", + "magrittr", + "methods", + "pillar", + "rlang", + "tibble", + "tidyselect", + "utils", + "vctrs" + ], + "Hash": "fedd9d00c2944ff00a0e2696ccf048ec" + }, + "fansi": { + "Package": "fansi", + "Version": "1.0.6", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "grDevices", + "utils" + ], + "Hash": "962174cf2aeb5b9eea581522286a911f" + }, + "generics": { + "Package": "generics", + "Version": "0.1.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "15e9634c0fcd294799e9b2e929ed1b86" + }, + "glue": { + "Package": "glue", + "Version": "1.8.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "5899f1eaa825580172bb56c08266f37c" + }, + "hms": { + "Package": "hms", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "lifecycle", + "methods", + "pkgconfig", + "rlang", + "vctrs" + ], + "Hash": "b59377caa7ed00fa41808342002138f9" + }, + "httr2": { + "Package": "httr2", + "Version": "1.0.6", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "cli", + "curl", + "glue", + "lifecycle", + "magrittr", + "openssl", + "rappdirs", + "rlang", + "vctrs", + "withr" + ], + "Hash": "3ef5d07ec78803475a94367d71b40c41" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.8.9", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "methods" + ], + "Hash": "4e993b65c2c3ffbffce7bb3e2c6f832b" + }, + "lifecycle": { + "Package": "lifecycle", + "Version": "1.0.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "rlang" + ], + "Hash": "b8552d117e1b808b09a832f589b79035" + }, + "magrittr": { + "Package": "magrittr", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "7ce2733a9826b3aeb1775d56fd305472" + }, + "openssl": { + "Package": "openssl", + "Version": "2.2.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "askpass" + ], + "Hash": "d413e0fef796c9401a4419485f709ca1" + }, + "pillar": { + "Package": "pillar", + "Version": "1.9.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "cli", + "fansi", + "glue", + "lifecycle", + "rlang", + "utf8", + "utils", + "vctrs" + ], + "Hash": "15da5a8412f317beeee6175fbc76f4bb" + }, + "pkgconfig": { + "Package": "pkgconfig", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "utils" + ], + "Hash": "01f28d4278f15c76cddbea05899c5d6f" + }, + "prettyunits": { + "Package": "prettyunits", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "6b01fc98b1e86c4f705ce9dcfd2f57c7" + }, + "progress": { + "Package": "progress", + "Version": "1.2.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "crayon", + "hms", + "prettyunits" + ], + "Hash": "f4625e061cb2865f111b47ff163a5ca6" + }, + "purrr": { + "Package": "purrr", + "Version": "1.0.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "lifecycle", + "magrittr", + "rlang", + "vctrs" + ], + "Hash": "1cba04a4e9414bdefc9dcaa99649a8dc" + }, + "rappdirs": { + "Package": "rappdirs", + "Version": "0.3.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "5e3c5dc0b071b21fa128676560dbe94d" + }, + "readr": { + "Package": "readr", + "Version": "2.1.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "cli", + "clipr", + "cpp11", + "crayon", + "hms", + "lifecycle", + "methods", + "rlang", + "tibble", + "tzdb", + "utils", + "vroom" + ], + "Hash": "9de96463d2117f6ac49980577939dfb3" + }, "renv": { "Package": "renv", "Version": "1.0.11", @@ -21,6 +419,210 @@ "utils" ], "Hash": "47623f66b4e80b3b0587bc5d7b309888" + }, + "rlang": { + "Package": "rlang", + "Version": "1.1.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "utils" + ], + "Hash": "3eec01f8b1dee337674b2e34ab1f9bc1" + }, + "rols": { + "Package": "rols", + "Version": "3.2.0", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.20", + "Requirements": [ + "Biobase", + "BiocGenerics", + "httr2", + "jsonlite", + "methods", + "utils" + ], + "Hash": "e3a92b4264c68209a23397a338d7d0b0" + }, + "rprojroot": { + "Package": "rprojroot", + "Version": "2.0.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "4c8415e0ec1e29f3f4f6fc108bef0144" + }, + "stringi": { + "Package": "stringi", + "Version": "1.8.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "stats", + "tools", + "utils" + ], + "Hash": "39e1144fd75428983dc3f63aa53dfa91" + }, + "stringr": { + "Package": "stringr", + "Version": "1.5.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "magrittr", + "rlang", + "stringi", + "vctrs" + ], + "Hash": "960e2ae9e09656611e0b8214ad543207" + }, + "sys": { + "Package": "sys", + "Version": "3.4.3", + "Source": "Repository", + "Repository": "RSPM", + "Hash": "de342ebfebdbf40477d0758d05426646" + }, + "tibble": { + "Package": "tibble", + "Version": "3.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "fansi", + "lifecycle", + "magrittr", + "methods", + "pillar", + "pkgconfig", + "rlang", + "utils", + "vctrs" + ], + "Hash": "a84e2cc86d07289b3b6f5069df7a004c" + }, + "tidyr": { + "Package": "tidyr", + "Version": "1.3.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "cpp11", + "dplyr", + "glue", + "lifecycle", + "magrittr", + "purrr", + "rlang", + "stringr", + "tibble", + "tidyselect", + "utils", + "vctrs" + ], + "Hash": "915fb7ce036c22a6a33b5a8adb712eb1" + }, + "tidyselect": { + "Package": "tidyselect", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "rlang", + "vctrs", + "withr" + ], + "Hash": "829f27b9c4919c16b593794a6344d6c0" + }, + "tzdb": { + "Package": "tzdb", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "f561504ec2897f4d46f0c7657e488ae1" + }, + "utf8": { + "Package": "utf8", + "Version": "1.2.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "62b65c52671e6665f803ff02954446e9" + }, + "vctrs": { + "Package": "vctrs", + "Version": "0.6.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "rlang" + ], + "Hash": "c03fa420630029418f7e6da3667aac4a" + }, + "vroom": { + "Package": "vroom", + "Version": "1.6.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit64", + "cli", + "cpp11", + "crayon", + "glue", + "hms", + "lifecycle", + "methods", + "progress", + "rlang", + "stats", + "tibble", + "tidyselect", + "tzdb", + "vctrs", + "withr" + ], + "Hash": "390f9315bc0025be03012054103d227c" + }, + "withr": { + "Package": "withr", + "Version": "3.0.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "grDevices", + "graphics" + ], + "Hash": "cc2d62c76458d425210d1eb1478b30b4" } } } From fabf02bee1ff56c073696fc1867cc3b4e036fac3 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Fri, 22 Nov 2024 15:28:18 -0600 Subject: [PATCH 3/8] script for matching ontology IDs --- .../scripts/prepare-cell-type-ontologies.R | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R diff --git a/analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R b/analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R new file mode 100644 index 000000000..63e37f8ad --- /dev/null +++ b/analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R @@ -0,0 +1,85 @@ +#!/usr/bin/env Rscript + +# This script is used to match cell types present in PanglaoDB to cell ontology IDs +# The output is a TSV file that contains `ontology_id`, `human_readable_value`, and `panglao_cell_type` + +# If the output file already exists any cell types with assigned ontology IDs will not be modified +# Only cell types without ontology IDs will be used when matching to avoid any NA replacements + +# File paths ------------------------------------------------------------------- +module_base <- rprojroot::find_root(rprojroot::is_renv_project) + +# read in original ref file +ref_file <- file.path(module_base, "references", "PanglaoDB_markers_2020-03-27.tsv") +ref_df <- readr::read_tsv(ref_file) + +# cell ontology ref file +ontology_output_file <- file.path(module_base, "references", "panglao-cell-type-ontologies.tsv") + +# Prep ontology terms ---------------------------------------------------------- + +# get uberon ontology terms and ids +ol <- rols::Ontologies() +cell_ontology <- ol[["cl"]] +terms <- rols::Terms(cell_ontology) +labels <- rols::termLabel(terms) + +# data frame of id and human readable value +label_df <- data.frame( + ontology_id = names(labels), + human_readable_value = labels +) + +# Get existing ontology assignments -------------------------------------------- + +# check if ontology ID assignments already exist +if(file.exists(ontology_output_file)){ + + # remove any cell types without an assigned ontology + existing_ontology_df <- readr::read_tsv(ontology_output_file) |> + dplyr::filter(!is.na(ontology_id)) + + # get a list of cell types with existing ontology IDs + existing_ontology_cell_types <- existing_ontology_df$panglao_cell_type + +} else { + # if no output file just make sure these variables exist + existing_ontology_df <- NULL + existing_ontology_cell_types <- NULL +} + +# Match ontology terms --------------------------------------------------------- + +# make human readable values for all cell types present in Panglao reference file +cell_type_df <- ref_df |> + dplyr::select(panglao_cell_type = `cell type`) |> + # remove any NAs + tidyr::drop_na(panglao_cell_type) |> + # add a column for joining + # generally cell type terms are in lower case and singular + dplyr::mutate(human_readable_value = tolower(panglao_cell_type) |> + # make everything singular + # everything is either cells or plural version of cell type with an added s + stringr::str_replace("s$", "") |> + # make sure B and T stay capitalized for B and T cell + stringr::str_replace("^b ", "B ") |> + stringr::str_replace("^t ", "T ")) |> + unique() + +# join ontology terms with cell types in reference file +cell_type_terms_df <- cell_type_df |> + # remove any cell types that already have an assigned ontology ID + dplyr::filter(!(panglao_cell_type %in% existing_ontology_cell_types)) |> + # join remaining ones with ontology terms + dplyr::left_join(label_df, by = "human_readable_value") |> + # make sure ontology id and human readable columns are first + dplyr::relocate(ontology_id, human_readable_value) |> + dplyr::mutate(human_readable_value = ifelse(is.na(ontology_id), + NA_character_, + human_readable_value)) |> + # add back existing ontologies + dplyr::bind_rows(existing_ontology_df) |> + dplyr::arrange(human_readable_value) + +# export to ontology tsv +readr::write_tsv(cell_type_terms_df, ontology_output_file) From 6fddbf466aef3b015a5fc104acd9fe269d4b439b Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Fri, 22 Nov 2024 15:28:31 -0600 Subject: [PATCH 4/8] matched ontology IDs --- .../panglao-cell-type-ontologies.tsv | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv diff --git a/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv b/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv new file mode 100644 index 000000000..a95e13c29 --- /dev/null +++ b/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv @@ -0,0 +1,179 @@ +ontology_id human_readable_value panglao_cell_type +CL:0000236 B cell B cells +CL:0000084 T cell T cells +CL:0002038 T follicular helper cell T follicular helper cells +CL:0000622 acinar cell Acinar cells +CL:0000136 adipocyte Adipocytes +CL:0000109 adrenergic neuron Adrenergic neurons +CL:0000583 alveolar macrophage Alveolar macrophages +CL:0000127 astrocyte Astrocytes +CL:0000646 basal cell Basal cells +CL:0000767 basophil Basophils +CL:1000488 cholangiocyte Cholangiocytes +CL:0000108 cholinergic neuron Cholinergic neurons +CL:0000138 chondrocyte Chondrocytes +CL:0000166 chromaffin cell Chromaffin cells +CL:0000064 ciliated cell Ciliated cells +CL:2000002 decidual cell Decidual cells +CL:0000451 dendritic cell Dendritic cells +CL:0000700 dopaminergic neuron Dopaminergic neurons +CL:0002322 embryonic stem cell Embryonic stem cells +CL:0000115 endothelial cell Endothelial cells +CL:0007011 enteric neuron Enteric neurons +CL:0000584 enterocyte Enterocytes +CL:0000164 enteroendocrine cell Enteroendocrine cells +CL:0000771 eosinophil Eosinophils +CL:0000065 ependymal cell Ependymal cells +CL:0000352 epiblast cell Epiblast cells +CL:0000066 epithelial cell Epithelial cells +CL:0000765 erythroblast Erythroblasts +CL:0000057 fibroblast Fibroblasts +CL:0000586 germ cell Germ cells +CL:1001509 glycinergic neuron Glycinergic neurons +CL:0000160 goblet cell Goblet cells +CL:0000501 granulosa cell Granulosa cells +CL:0002418 hemangioblast Hemangioblasts +CL:0000037 hematopoietic stem cell Hematopoietic stem cells +CL:0000632 hepatic stellate cell Hepatic stellate cells +CL:0005026 hepatoblast Hepatoblasts +CL:0000182 hepatocyte Hepatocytes +CL:0000099 interneuron Interneurons +CL:0005006 ionocyte Ionocytes +CL:0000312 keratinocyte Keratinocytes +CL:0000175 luteal cell Luteal cells +CL:0000235 macrophage Macrophages +CL:0000097 mast cell Mast cells +CL:0000556 megakaryocyte Megakaryocytes +CL:0000148 melanocyte Melanocytes +CL:0000650 mesangial cell Mesangial cells +CL:0000077 mesothelial cell Mesothelial cells +CL:0000576 monocyte Monocytes +CL:0000100 motor neuron Motor neurons +CL:0000056 myoblast Myoblasts +CL:0000185 myoepithelial cell Myoepithelial cells +CL:0000165 neuroendocrine cell Neuroendocrine cells +CL:0000540 neuron Neurons +CL:0000775 neutrophil Neutrophils +CL:0008025 noradrenergic neuron Noradrenergic neurons +CL:0002167 olfactory epithelial cell Olfactory epithelial cells +CL:0000128 oligodendrocyte Oligodendrocytes +CL:0000062 osteoblast Osteoblasts +CL:0000092 osteoclast Osteoclasts +CL:0000137 osteocyte Osteocytes +CL:0002410 pancreatic stellate cell Pancreatic stellate cells +CL:0000510 paneth cell Paneth cells +CL:0000162 parietal cell Parietal cells +CL:0000669 pericyte Pericytes +CL:0002481 peritubular myoid cell Peritubular myoid cells +CL:0000210 photoreceptor cell Photoreceptor cells +CL:0000652 pinealocyte Pinealocytes +CL:0000786 plasma cell Plasma cells +CL:0000784 plasmacytoid dendritic cell Plasmacytoid dendritic cells +CL:0000233 platelet Platelets +CL:0002248 pluripotent stem cell Pluripotent stem cells +CL:0000653 podocyte Podocytes +CL:0000558 reticulocyte Reticulocytes +CL:0000740 retinal ganglion cell Retinal ganglion cells +CL:0002672 retinal progenitor cell Retinal progenitor cells +CL:0000850 serotonergic neuron Serotonergic neurons +CL:0000192 smooth muscle cell Smooth muscle cells +CL:0000017 spermatocyte Spermatocytes +CL:0000499 stromal cell Stromal cells +CL:0002085 tanycyte Tanycytes +CL:0000209 taste receptor cell Taste receptor cells +CL:0000893 thymocyte Thymocytes +CL:4023169 trigeminal neuron Trigeminal neurons +CL:0000351 trophoblast cell Trophoblast cells +CL:0000731 urothelial cell Urothelial cells +NA NA Adipocyte progenitor cells +NA NA Airway epithelial cells +NA NA Airway goblet cells +NA NA Airway smooth muscle cells +NA NA Alpha cells +NA NA Anterior pituitary gland cells +NA NA B cells memory +NA NA B cells naive +NA NA Bergmann glia +NA NA Beta cells +NA NA Cajal-Retzius cells +NA NA Cardiac stem and precursor cells +NA NA Cardiomyocytes +NA NA Choroid plexus cells +NA NA Clara cells +NA NA Crypt cells +NA NA Delta cells +NA NA Distal tubule cells +NA NA Ductal cells +NA NA Endothelial cells (aorta) +NA NA Endothelial cells (blood brain barrier) +NA NA Enteric glia cells +NA NA Enterochromaffin cells +NA NA Epsilon cells +NA NA Erythroid-like and erythroid precursor cells +NA NA Follicular cells +NA NA Foveolar cells +NA NA GABAergic neurons +NA NA Gamma (PP) cells +NA NA Gamma delta T cells +NA NA Gastric chief cells +NA NA Glomus cells +NA NA Glutaminergic neurons +NA NA His bundle cells +NA NA Immature neurons +NA NA Intercalated cells +NA NA Juxtaglomerular cells +NA NA Kidney progenitor cells +NA NA Kupffer cells +NA NA Langerhans cells +NA NA Leydig cells +NA NA Loop of Henle cells +NA NA Luminal epithelial cells +NA NA Mammary epithelial cells +NA NA Meningeal cells +NA NA Merkel cells +NA NA Microfold cells +NA NA Microglia +NA NA Müller cells +NA NA Myeloid-derived suppressor cells +NA NA Myocytes +NA NA Myofibroblasts +NA NA Natural killer T cells +NA NA Neural stem/precursor cells +NA NA Neuroblasts +NA NA NK cells +NA NA Nuocytes +NA NA Oligodendrocyte progenitor cells +NA NA Osteoclast precursor cells +NA NA Oxyphil cells +NA NA Pancreatic progenitor cells +NA NA Parathyroid chief cells +NA NA Peri-islet Schwann cells +NA NA Principal cells +NA NA Proximal tubule cells +NA NA Pulmonary alveolar type I cells +NA NA Pulmonary alveolar type II cells +NA NA Pulmonary vascular smooth muscle cells +NA NA Purkinje fiber cells +NA NA Purkinje neurons +NA NA Pyramidal cells +NA NA Radial glia cells +NA NA Red pulp macrophages +NA NA Salivary mucous cells +NA NA Satellite cells +NA NA Satellite glial cells +NA NA Schwann cells +NA NA Sebocytes +NA NA Sertoli cells +NA NA Spermatozoa +NA NA T cells naive +NA NA T cytotoxic cells +NA NA T helper cells +NA NA T memory cells +NA NA T regulatory cells +NA NA Transient cells +NA NA Trichocytes +NA NA Trophoblast progenitor cells +NA NA Trophoblast stem cells +NA NA Tuft cells +NA NA Undefined placental cells +NA NA Vascular smooth muscle cells From f75eee7be2757482201d28f44e011dbe726d35cc Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Mon, 25 Nov 2024 16:43:57 -0600 Subject: [PATCH 5/8] ignore panglao ref --- analyses/cell-type-consensus/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/analyses/cell-type-consensus/.gitignore b/analyses/cell-type-consensus/.gitignore index 418755f52..8b10d83e4 100644 --- a/analyses/cell-type-consensus/.gitignore +++ b/analyses/cell-type-consensus/.gitignore @@ -5,3 +5,6 @@ # Ignore the scratch directory (but keep it present) /scratch/* !/scratch/.gitkeep + +# Ignore the panglao ref file +/references/PanglaoDB_markers_2020-03-27.tsv From fb65c8b6f7f4f9a14b58588057521f1163c98318 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Mon, 25 Nov 2024 16:44:11 -0600 Subject: [PATCH 6/8] script to download panglao ref --- .../scripts/download-panglao-ref.sh | 19 +++++++++++++++++++ .../scripts/prepare-cell-type-ontologies.R | 1 + 2 files changed, 20 insertions(+) create mode 100644 analyses/cell-type-consensus/scripts/download-panglao-ref.sh diff --git a/analyses/cell-type-consensus/scripts/download-panglao-ref.sh b/analyses/cell-type-consensus/scripts/download-panglao-ref.sh new file mode 100644 index 000000000..3021f1b76 --- /dev/null +++ b/analyses/cell-type-consensus/scripts/download-panglao-ref.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# This script is used to download the reference file for PanglaoDB from scpca-nf +# The downloaded file will be saved to `references/panglao-cell-type-ontologies.tsv` + +set -euo pipefail + +# navigate to where script lives +cd $(dirname "$0") +scripts_dir=$(pwd) + +# define path to ref file and url +ref_file="${scripts_dir}/../references/PanglaoDB_markers_2020-03-27.tsv" +ref_url="https://raw.githubusercontent.com/AlexsLemonade/scpca-nf/refs/heads/main/references/PanglaoDB_markers_2020-03-27.tsv" + +# if ref file doesn't exist download from scpca-nf repository +if [[ ! -f $ref_file ]]; then + curl -o $ref_file $ref_url +fi diff --git a/analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R b/analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R index 63e37f8ad..c1e692c10 100644 --- a/analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R +++ b/analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R @@ -2,6 +2,7 @@ # This script is used to match cell types present in PanglaoDB to cell ontology IDs # The output is a TSV file that contains `ontology_id`, `human_readable_value`, and `panglao_cell_type` +# running this script requires running the `download-panglao-ref.sh` script first # If the output file already exists any cell types with assigned ontology IDs will not be modified # Only cell types without ontology IDs will be used when matching to avoid any NA replacements From c9e305138cabcb2db2688d053dd4a51b31b833ce Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Mon, 25 Nov 2024 17:00:02 -0600 Subject: [PATCH 7/8] add numbers to scripts --- .../{download-panglao-ref.sh => 00-download-panglao-ref.sh} | 0 ...e-cell-type-ontologies.R => 01-prepare-cell-type-ontologies.R} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename analyses/cell-type-consensus/scripts/{download-panglao-ref.sh => 00-download-panglao-ref.sh} (100%) rename analyses/cell-type-consensus/scripts/{prepare-cell-type-ontologies.R => 01-prepare-cell-type-ontologies.R} (100%) diff --git a/analyses/cell-type-consensus/scripts/download-panglao-ref.sh b/analyses/cell-type-consensus/scripts/00-download-panglao-ref.sh similarity index 100% rename from analyses/cell-type-consensus/scripts/download-panglao-ref.sh rename to analyses/cell-type-consensus/scripts/00-download-panglao-ref.sh diff --git a/analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R b/analyses/cell-type-consensus/scripts/01-prepare-cell-type-ontologies.R similarity index 100% rename from analyses/cell-type-consensus/scripts/prepare-cell-type-ontologies.R rename to analyses/cell-type-consensus/scripts/01-prepare-cell-type-ontologies.R From f26e66599d804cd5fe83ee8c486dbebe8aa6b1fb Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Mon, 25 Nov 2024 17:00:13 -0600 Subject: [PATCH 8/8] add some initial readmes --- .../cell-type-consensus/references/README.md | 18 ++++++++++++++++++ analyses/cell-type-consensus/scripts/README.md | 11 +++++++++++ 2 files changed, 29 insertions(+) create mode 100644 analyses/cell-type-consensus/references/README.md create mode 100644 analyses/cell-type-consensus/scripts/README.md diff --git a/analyses/cell-type-consensus/references/README.md b/analyses/cell-type-consensus/references/README.md new file mode 100644 index 000000000..e45cc0bc8 --- /dev/null +++ b/analyses/cell-type-consensus/references/README.md @@ -0,0 +1,18 @@ +# References + +This folder contains all reference files used for generating consensus cell types. + +1. `panglao-cell-type-ontologies.tsv`: This file contains a table with all possible cell types in the reference used when running `CellAssign`. +The table includes the following columns: + +| | | +| --- | --- | +| `ontology_id` | [cell type (CL) ontology identifier term](https://www.ebi.ac.uk/ols4/ontologies/cl) | +| `human_readable_value` | Label associated with the cell type ontology term | +| `panglao_cell_type` | Original name for the cell type as set by `PanglaoDB` | + +To generate this file follow these steps: + +- Download the original reference file with `00-download-panglao-ref.sh`. +- Programmatically assign ontology lables with `01-prepare-cell-type-ontologies.sh`. +- Assign any ontology values manually by finding the most representive [cell type ontology (CL) identifier term](https://www.ebi.ac.uk/ols4/ontologies/cl). diff --git a/analyses/cell-type-consensus/scripts/README.md b/analyses/cell-type-consensus/scripts/README.md new file mode 100644 index 000000000..594e00a7d --- /dev/null +++ b/analyses/cell-type-consensus/scripts/README.md @@ -0,0 +1,11 @@ +# Scripts + +This folder contains all scripts used for generating consensus cell types. + +1. `00-download-panglao-ref.sh`: This script is used to download the `PanglaoDB` reference file from `AlexsLemonade/scpca-nf`. +This reference file was originally obtained from `PanglaoDB` and contains a table with all marker genes for all cell types that were used to build the references used when running `CellAssign`. +The file will be stored in `references/PanglaoDB_markers_2020-03-27.tsv`. + +2. `01-prepare-cell-type-ontologies.sh`: This script is used to assign [cell type ontologies](https://www.ebi.ac.uk/ols4/ontologies/cl) to cell types in the `PanglaoDB` reference file. +Any cell types whose human readable label matches the value in the `cell type` column of the reference file (downloaded using the `00-download-panglao-ref.sh` file) are programmatically assigned. +Ontology terms and labels along with the `cell type` label from the reference file are saved to a new file, `references/panglao-cell-type-ontologies.tsv`.