From c13f25f0d7b4087292511d7a85f47e34aba28ed1 Mon Sep 17 00:00:00 2001 From: Dimitri RODARIE Date: Thu, 8 Feb 2024 08:48:12 +0100 Subject: [PATCH] Drop literature values from regions that are not in hierarchy or not in the annotation volume (#61) * Drop literature values from regions that are not in hierarchy or not in the annotation volume, Fix #60. Regions dropped will be displayed with a warning. Update README.rst --- README.rst | 31 +++++---- atlas_densities/app/cell_densities.py | 21 +++++- atlas_densities/densities/fitting.py | 4 ++ .../densities/measurement_to_density.py | 67 ++++++++++++++++--- tests/app/test_cell_densities.py | 2 + .../densities/test_measurement_to_density.py | 36 ++++++++++ 6 files changed, 139 insertions(+), 22 deletions(-) diff --git a/README.rst b/README.rst index 8b8cc3b..8479266 100644 --- a/README.rst +++ b/README.rst @@ -10,8 +10,8 @@ The outcome of this project is a list of volumetric files that provides cell typ for each voxel of the mouse brain volume. The BBP Cell Atlas is the first model required to reconstruct BBP circuits of the mouse brain. -The tools implementation is based on the methods of `Eroe et al. (2018)`_, `Rodarie et al. (2021)`_, -and `Roussel et al. (2021)`_. +The tools implementation is based on the methods of `Eroe et al. (2018)`_, `Rodarie et al. (2022)`_, +and `Roussel et al. (2022)`_. The source code was originally written by Csaba Eroe, Dimitri Rodarie, Hugo Dictus, Lu Huanxiang, Wojciech Wajerowicz, Jonathan Lurie, and Yann Roussel. @@ -53,7 +53,7 @@ Note: Depending on the size and resolution of the atlas, it can happen that some Reference atlases ----------------- -Most the pipeline steps rely on the following AIBS reference datasets (see `Rodarie et al. (2021)`_ for more +Most the pipeline steps rely on the following AIBS reference datasets (see `Rodarie et al. (2022)`_ for more details on the different versions of these datasets): * A Nissl volume @@ -161,7 +161,7 @@ ISH datasets for inhibitory/excitatory neurons In `Eroe et al. (2018)`_ (i.e., BBP Cell Atlas version 1), the excitatory neurons are distinguished from the inhibitory neurons using the Nrn1 and GAD67 (or GAD1) genetic marker. -In `Rodarie et al. (2021)`_ (i.e., BBP Cell Atlas version 2), the authors used parvalbumin (Pvalb), +In `Rodarie et al. (2022)`_ (i.e., BBP Cell Atlas version 2), the authors used parvalbumin (Pvalb), somatostatin (SST), vasoactive intestinal peptide (VIP) and gabaergic (GAD1) markers (see also `fit_average_densities_ccfv2_config.yaml`_). @@ -207,7 +207,7 @@ The files `glia.nrrd`, `oligodendrocyte.nrrd`, `microglia.nrrd`, `astrocyte.nrrd Extract literature neuron type densities estimates -------------------------------------------------- -In `Rodarie et al. (2021)`_, the authors collected density estimates from the literature for +In `Rodarie et al. (2022)`_, the authors collected density estimates from the literature for inhibitory neurons. Some estimates are in a format that can not be directly used by the pipeline (e.g., counts instead of densities). This part of the pipeline integrates the literature values into csv files, that will be used later on for the fitting. @@ -216,7 +216,7 @@ Format literature review files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We compile here the cell density estimates related to measurements of `Kim et al. (2017)`_ density -file (`mmc3.xlsx`_) and `Rodarie et al. (2021)`_ literature +file (`mmc3.xlsx`_) and `Rodarie et al. (2022)`_ literature review file (`gaba_papers.xlsx`_) into a single CSV file. Regions known to be purely excitatory or inhibitory (in terms of neuron composition) are also listed in a separate CSV file. @@ -237,6 +237,13 @@ Convert literature measurements into average densities Compute and save average cell densities based on literature measurements and Cell Atlas data (e.g., region volumes). +WARNING: +Different versions of the annotation atlas or the hierarchy file might have different sets brain +regions (see `Rodarie et al. (2022)`_ for more details). The region names used by the literature +measurements might therefore have no match in these datasets. +Regions from the measurements that are not in the hierarchy or do not appear in the annotations will +be ignored. A warning message will display these regions, allowing us to review them. + .. code-block:: bash atlas-densities cell-densities measurements-to-average-densities \ @@ -252,7 +259,7 @@ Fit transfer functions from mean region intensity to neuron density ------------------------------------------------------------------- We fit here transfer functions that describe the relation between mean ISH expression in regions of -the mouse brain and literature regional density estimates (see `Rodarie et al. (2021)`_ for more +the mouse brain and literature regional density estimates (see `Rodarie et al. (2022)`_ for more details). This step leverages AIBS ISH marker datasets (in their expression form, see also `fit_average_densities_ccfv2_config.yaml`_) and the previously computed literature density values. @@ -281,7 +288,7 @@ Compute inhibitory/excitatory neuron densities ---------------------------------------------- The neuron subtypes are here distinguished from each other using either the pipeline from -`Eroe et al. (2018)`_ (BBP Cell Atlas version 1) or `Rodarie et al. (2021)`_ (BBP Cell Atlas version +`Eroe et al. (2018)`_ (BBP Cell Atlas version 1) or `Rodarie et al. (2022)`_ (BBP Cell Atlas version 2). BBP Cell Atlas version 1 @@ -321,8 +328,8 @@ Compute ME-types densities from a probability map ------------------------------------------------- Morphological and Electrical type densities of inhibitory neurons in the isocortex can be estimated -using Roussel et al.'s pipeline. This pipeline produces a mapping from inhibitory neuron molecular -types (here PV, SST, VIP and GAD67) to ME-types defined in `Markram et al. (2015)`_. +using `Roussel et al. (2022)`_'s pipeline. This pipeline produces a mapping from inhibitory neuron +molecular types (here PV, SST, VIP and GAD67) to ME-types defined in `Markram et al. (2015)`_. The following command creates neuron density nrrd files for the me-types listed in a probability mapping csv file (see also `mtypes_probability_map_config.yaml`_). @@ -431,8 +438,8 @@ Copyright © 2022 Blue Brain Project/EPFL .. _`Eroe et al. (2018)`: https://www.frontiersin.org/articles/10.3389/fninf.2018.00084/full .. _`Kim et al. (2017)`: https://www.sciencedirect.com/science/article/pii/S0092867417310693 .. _`Markram et al. (2015)`: https://www.cell.com/cell/fulltext/S0092-8674(15)01191-5 -.. _`Rodarie et al. (2021)`: https://www.biorxiv.org/content/10.1101/2021.11.20.469384v2 -.. _`Roussel et al. (2021)`: https://www.biorxiv.org/content/10.1101/2021.11.24.469815v1 +.. _`Rodarie et al. (2022)`: https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1010739 +.. _`Roussel et al. (2022)`: https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1010058 .. _`BBP Cell Atlas`: https://portal.bluebrain.epfl.ch/resources/models/cell-atlas/ .. _cgal-pybind: https://github.com/BlueBrain/cgal-pybind .. _`DeepAtlas`: https://github.com/BlueBrain/Deep-Atlas diff --git a/atlas_densities/app/cell_densities.py b/atlas_densities/app/cell_densities.py index 019145e..f60a87b 100644 --- a/atlas_densities/app/cell_densities.py +++ b/atlas_densities/app/cell_densities.py @@ -621,6 +621,12 @@ def compile_measurements( @app.command() @common_atlas_options +@click.option( + "--region-name", + type=str, + default="root", + help="Name of the root region in the hierarchy", +) @click.option( "--cell-density-path", type=EXISTING_FILE_PATH, @@ -654,6 +660,7 @@ def compile_measurements( def measurements_to_average_densities( annotation_path, hierarchy_path, + region_name, cell_density_path, neuron_density_path, measurements_path, @@ -666,6 +673,10 @@ def measurements_to_average_densities( `neuron_density_path`) are used to compute average cell densities in every AIBS region where sufficient information is available. + Measurements from regions which are not in the provided brain region hierarchy or not in the + provided annotation volume will be ignored. A warning with all ignored lines from the + measurements file will be displayed. + The different cell types (e.g., PV+, SST+, VIP+ or overall inhibitory neurons) and brain regions under consideration are prescribed by the input measurements. @@ -709,6 +720,7 @@ def measurements_to_average_densities( region_map = RegionMap.load_json(hierarchy_path) L.info("Loading measurements ...") measurements_df = pd.read_csv(measurements_path) + L.info("Measurement to average density: started") average_cell_densities_df = measurement_to_average_density( region_map, @@ -718,6 +730,7 @@ def measurements_to_average_densities( overall_cell_density.raw, neuron_density.raw, measurements_df, + region_name, ) remove_non_density_measurements(average_cell_densities_df) @@ -735,7 +748,7 @@ def measurements_to_average_densities( "--region-name", type=str, default="root", - help="Name of the region in the hierarchy", + help="Name of the root region in the hierarchy", ) @click.option( "--neuron-density-path", @@ -822,6 +835,10 @@ def fit_average_densities( `neuron_density_path` is used to compute the average density of inhibitory neurons (a.k.a gad67+) in every homogenous region of type "inhibitory". + Regions from the literature values and homogenous regions which are not in the provided brain + region hierarchy or not in the provided annotation volume will be ignored. A warning with all + ignored lines from the measurements file will be displayed. + Our linear fitting of density values relies on the assumption that the average cell density (number of cells per mm^3) of a cell type T in a brain region R depends linearly on the average intensity of a gene marker of T. The conversion factor is a constant which depends only @@ -932,7 +949,7 @@ def fit_average_densities( "--region-name", type=str, default="root", - help="Name of the region in the hierarchy", + help="Name of the root region in the hierarchy", ) @click.option( "--neuron-density-path", diff --git a/atlas_densities/densities/fitting.py b/atlas_densities/densities/fitting.py index 1fa8dc6..14428ad 100644 --- a/atlas_densities/densities/fitting.py +++ b/atlas_densities/densities/fitting.py @@ -29,6 +29,7 @@ from tqdm import tqdm from atlas_densities.densities import utils +from atlas_densities.densities.measurement_to_density import remove_unknown_regions from atlas_densities.exceptions import AtlasDensitiesError, AtlasDensitiesWarning if TYPE_CHECKING: # pragma: no cover @@ -625,6 +626,9 @@ def linear_fitting( # pylint: disable=too-many-arguments _check_homogenous_regions_sanity(homogenous_regions) hierarchy_info = utils.get_hierarchy_info(region_map, root=region_name) + remove_unknown_regions(average_densities, region_map, annotation, hierarchy_info) + remove_unknown_regions(homogenous_regions, region_map, annotation, hierarchy_info) + L.info("Creating a data frame from known densities ...") densities = create_dataframe_from_known_densities( hierarchy_info["brain_region"].to_list(), average_densities diff --git a/atlas_densities/densities/measurement_to_density.py b/atlas_densities/densities/measurement_to_density.py index 49b8777..65170e4 100644 --- a/atlas_densities/densities/measurement_to_density.py +++ b/atlas_densities/densities/measurement_to_density.py @@ -11,6 +11,7 @@ Densities are expressed in number of cells per mm^3. """ +import warnings from typing import Set, Tuple, Union import numpy as np @@ -20,6 +21,7 @@ from voxcell import RegionMap # type: ignore from atlas_densities.densities.utils import compute_region_volumes, get_hierarchy_info +from atlas_densities.exceptions import AtlasDensitiesWarning def get_parent_region(region_name: str, region_map: RegionMap) -> Union[str, None]: @@ -255,7 +257,59 @@ def cell_count_per_slice_to_density( measurements[mask_50um] = cell_counts_per_slice -def measurement_to_average_density( +def remove_unknown_regions( + measurements: "pd.DataFrame", + region_map: RegionMap, + annotation: AnnotationT, + hierarchy_info: "pd.DataFrame", +): + """ + Drop lines from the measurements dataframe which brain regions are not in the AIBS brain region + hierarchy or not in the annotation volume. + The data frame `measurements` is modified in place. + + Args: + measurements: dataframe whose columns are described in + :func:`atlas_densities.app.densities.compile_measurements`. + region_map: RegionMap object to navigate the brain regions hierarchy. + annotation: int array of shape (W, H, D) holding the annotation of the whole AIBS + mouse brain. (The integers W, H and D are the dimensions of the array). + hierarchy_info: data frame returned by + :func:`atlas_densities.densities.utils.get_hierarchy_info`. + """ + pd.set_option("display.max_colwidth", None) + indices_ids = measurements.index[ + ~measurements["brain_region"].isin(hierarchy_info["brain_region"]) + ] + if len(indices_ids) > 0: + warnings.warn( + "The following lines in the measurements dataframe have no equivalent in the " + "brain region hierarchy: \n" + f"{measurements.loc[indices_ids, 'brain_region'].to_string()}", + AtlasDensitiesWarning, + ) + measurements.drop(indices_ids, inplace=True) + + u_regions = np.unique(annotation) + u_regions = np.delete(u_regions, 0) # don't take 0, i.e: outside of the brain + u_regions = [ + region_map.get(u_region, "name", with_ascendants=True) + for u_region in u_regions + if region_map.find(u_region, "id") + ] + u_regions = np.unique([elem for row in u_regions for elem in row]) # flatten + + indices_ann = measurements.index[~measurements["brain_region"].isin(u_regions)] + if len(indices_ann) > 0: + warnings.warn( + "The following lines in the measurements dataframe have no equivalent in the " + f"annotation volume: \n{measurements.loc[indices_ann, 'brain_region'].to_string()}", + AtlasDensitiesWarning, + ) + measurements.drop(indices_ann, inplace=True) + + +def measurement_to_average_density( # pylint: disable=too-many-arguments region_map: RegionMap, annotation: AnnotationT, voxel_dimensions: Tuple[float, float, float], @@ -263,6 +317,7 @@ def measurement_to_average_density( cell_density: FloatArray, neuron_density: FloatArray, measurements: "pd.DataFrame", + root_region: str = "Basic cell groups and regions", ) -> "pd.DataFrame": """ Compute average cell densities in AIBS brain regions based on experimental `measurements`. @@ -274,9 +329,6 @@ def measurement_to_average_density( (or if several cell density computations are possible from measurements of different articles), the output cell density of the region is the average of the possible cell densities. - The region names in `measurements` which are not compliant with the AIBS nomenclature (1.json) - are ignored. - Args: region_map: RegionMap object to navigate the brain regions hierarchy. annotation: int array of shape (W, H, D) holding the annotation of the whole AIBS @@ -291,6 +343,7 @@ def measurement_to_average_density( in that voxel expressed in number of neurons per mm^3. measurements: dataframe whose columns are described in :func:`atlas_densities.app.densities.compile_measurements`. + root_region: name of the root region in the brain region hierarchy. Returns: dataframe of the same format as `measurements` but where all measurements of type @@ -298,10 +351,8 @@ def measurement_to_average_density( type "cell density". Densities are expressed in number of cells per mm^3. """ - # Filter out non-AIBS compliant region names - hierarchy_info = get_hierarchy_info(region_map) - indices = measurements.index[~measurements["brain_region"].isin(hierarchy_info["brain_region"])] - measurements = measurements.drop(indices) + hierarchy_info = get_hierarchy_info(region_map, root_region) + remove_unknown_regions(measurements, region_map, annotation, hierarchy_info) # Replace NaN standard deviations by measurement values nan_mask = measurements["standard_deviation"].isna() diff --git a/tests/app/test_cell_densities.py b/tests/app/test_cell_densities.py index ddc894d..df2d5dc 100644 --- a/tests/app/test_cell_densities.py +++ b/tests/app/test_cell_densities.py @@ -271,6 +271,8 @@ def _get_measurements_to_average_densities_result(runner, hierarchy_path, measur hierarchy_path, "--annotation-path", "annotation.nrrd", + "--region-name", + "Basic cell groups and regions", "--cell-density-path", "cell_density.nrrd", "--neuron-density-path", diff --git a/tests/densities/test_measurement_to_density.py b/tests/densities/test_measurement_to_density.py index 2c6bdb8..440d918 100644 --- a/tests/densities/test_measurement_to_density.py +++ b/tests/densities/test_measurement_to_density.py @@ -20,6 +20,11 @@ def region_map(): return RegionMap.from_dict(get_hierarchy()) +@pytest.fixture +def annotations(): + return np.array([[[0, 10710, 10710, 10711, 10711, 0]]], dtype=int) + + @pytest.fixture def cell_densities(): densities = np.array([5.0 / 9.0, 4.0 / 8.0, 1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0]) @@ -55,6 +60,37 @@ def volumes(voxel_volume=2): ) +def test_remove_unknown_regions(region_map, annotations): + measurements = pd.DataFrame( + { + "brain_region": [ + "Lobule Ii", + "Lobule II, granular layer", + "Lobule II, molecular layer", + ], + "measurement": [0.722, 28118.0, 31047], + "standard_deviation": [0.722, 6753.9, 5312], + "measurement_type": ["volume", "cell count", "cell count"], + "measurement_unit": ["mm^3", "number of cells", "number of cells"], + "source_title": ["Article 1", "Article 2", "Article 1"], + } + ) + tested.remove_unknown_regions(measurements, region_map, annotations, get_hierarchy_info()) + expected = pd.DataFrame( + { + "brain_region": [ + "Lobule II, molecular layer", + ], + "measurement": [31047.0], + "standard_deviation": [5312.0], + "measurement_type": ["cell count"], + "measurement_unit": ["number of cells"], + "source_title": ["Article 1"], + } + ) + pdt.assert_frame_equal(measurements.reset_index(drop=True), expected) + + def test_cell_count_to_density(region_map, volumes): measurements = pd.DataFrame( {