From a3bd72f28d42b39150f0da1a1bf1033994265f9f Mon Sep 17 00:00:00 2001 From: Joyce Yan <5653616+joyceyan@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:46:02 -0800 Subject: [PATCH 1/3] feat: Do not use backed mode when reading in anndata for CXG conversion (#7377) --- DEV_ENV_WITHOUT_DOCKER.md | 22 +++++++++++++++++++++ backend/layers/processing/h5ad_data_file.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/DEV_ENV_WITHOUT_DOCKER.md b/DEV_ENV_WITHOUT_DOCKER.md index 50f87e4d141dc..83eb05fc3ebbe 100644 --- a/DEV_ENV_WITHOUT_DOCKER.md +++ b/DEV_ENV_WITHOUT_DOCKER.md @@ -51,3 +51,25 @@ Run functional tests for WMG api against the `dev` environment. **NOTE**: `dev` environment is a remote environment. These functional tests run locally against a backend in a remote environment called `dev`. 1. `AWS_PROFILE=single-cell-dev DEPLOYMENT_STAGE=dev pytest -v tests/functional/backend/wmg/test_wmg_api.py` + +### Set up vips + +You may run into issues with finding `_libvips` if you're running a Jupyter notebook locally that calls `pyvips`, such as when running CXG conversion locally. The error may look like this: + +``` +ModuleNotFoundError Traceback (most recent call last) +File ~/miniconda3/envs/py11/lib/python3.11/site-packages/pyvips/__init__.py:19 + 18 try: +---> 19 import _libvips + 21 logger.debug('Loaded binary module _libvips') + +ModuleNotFoundError: No module named '_libvips' +``` + +To resolve this, you'll need to install `vips` with `brew install vips`, because this is a dependency that `pyvips` has. If you're using conda, you'll have to also tell your conda environment where homebrew installed `vips`. You can do this with: + +``` +mkdir -p ~/miniconda3/envs//etc/conda/activate.d +touch ~/miniconda3/envs//etc/conda/activate.d/env_vars.sh +echo 'export DYLD_LIBRARY_PATH=/opt/homebrew/lib:$DYLD_LIBRARY_PATH' >> ~/miniconda3/envs//etc/conda/activate.d/env_vars.sh +``` diff --git a/backend/layers/processing/h5ad_data_file.py b/backend/layers/processing/h5ad_data_file.py index 58e7381526462..5720b31a4a558 100644 --- a/backend/layers/processing/h5ad_data_file.py +++ b/backend/layers/processing/h5ad_data_file.py @@ -183,7 +183,7 @@ def validate_anndata(self): def extract_anndata_elements_from_file(self): logging.info(f"Reading in AnnData dataset: {path.basename(self.input_filename)}") - self.anndata = anndata.read_h5ad(self.input_filename, backed="r") + self.anndata = anndata.read_h5ad(self.input_filename) logging.info("Completed reading in AnnData dataset!") self.obs = self.transform_dataframe_index_into_column(self.anndata.obs, "obs", self.obs_index_column_name) From 8d24ee0e08cc362c3bd2a126b62da03378693463 Mon Sep 17 00:00:00 2001 From: Ronen Date: Tue, 5 Nov 2024 13:55:40 -0500 Subject: [PATCH 2/3] chore: move bottom banner to landing footer (#7378) --- frontend/src/components/LandingFooter/index.tsx | 3 +++ frontend/src/components/Layout/index.tsx | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/LandingFooter/index.tsx b/frontend/src/components/LandingFooter/index.tsx index ef2345c9bd3a7..6759eada07b4f 100644 --- a/frontend/src/components/LandingFooter/index.tsx +++ b/frontend/src/components/LandingFooter/index.tsx @@ -6,6 +6,8 @@ import Wordmark from "src/common/images/cellxgene-discover-wordmark.svg"; import CZILogo from "src/components/common/staticPages/czi-logo-white.png"; import styles from "./index.module.scss"; +import BottomBanner from "../BottomBanner"; +import { BANNER_FEEDBACK_SURVEY_LINK } from "src/common/constants/airtableLinks"; const LandingFooter = (): JSX.Element => { return ( @@ -80,6 +82,7 @@ const LandingFooter = (): JSX.Element => { + ); }; diff --git a/frontend/src/components/Layout/index.tsx b/frontend/src/components/Layout/index.tsx index 6fdc283210d56..052b0b8f7fd08 100644 --- a/frontend/src/components/Layout/index.tsx +++ b/frontend/src/components/Layout/index.tsx @@ -5,8 +5,6 @@ import Header from "../Header"; import LandingFooter from "../LandingFooter"; import LandingHeader from "../MobileFriendlyHeader"; import { Wrapper } from "./style"; -import BottomBanner from "../BottomBanner"; -import { BANNER_FEEDBACK_SURVEY_LINK } from "src/common/constants/airtableLinks"; interface Props { children: ReactNode; @@ -36,7 +34,6 @@ const Layout = ({ children }: Props) => { /> {children} - ); } else if (pathname === ROUTES.CELL_GUIDE) { From c4360399d1da35da5305fae82ed5a79a2c84caae Mon Sep 17 00:00:00 2001 From: Mim Hastie Date: Tue, 5 Nov 2024 12:11:51 -0800 Subject: [PATCH 3/3] chore: HuBMAP v7 (#7361) --- .../canonical_markers.py | 13 ++++++++--- backend/cellguide/pipeline/constants.py | 2 +- backend/common/doi.py | 23 +++++++++++++++---- .../cellguide_pipeline/requirements.txt | 1 + tests/unit/backend/common/test_doi.py | 5 ++++ 5 files changed, 36 insertions(+), 8 deletions(-) diff --git a/backend/cellguide/pipeline/canonical_marker_genes/canonical_markers.py b/backend/cellguide/pipeline/canonical_marker_genes/canonical_markers.py index 374ceabfb68fb..120f03968747e 100644 --- a/backend/cellguide/pipeline/canonical_marker_genes/canonical_markers.py +++ b/backend/cellguide/pipeline/canonical_marker_genes/canonical_markers.py @@ -131,8 +131,13 @@ def fetch_doi_info(ref): doi = clean_doi(ref.doi) if doi: if doi not in doi_to_citation: - title = self.crossref_provider.get_title_and_citation_from_doi(doi) - doi_to_citation[doi] = title + # Catch and log invalid DOIs. + try: + title = self.crossref_provider.get_title_and_citation_from_doi(doi) + doi_to_citation[doi] = title + except Exception: + logger.error(f"Error fetching title and citation for DOI {doi}") + return None # None values are filtered out. else: title = doi_to_citation[doi] return doi, title @@ -278,7 +283,9 @@ def _process_asct_table__parallel(self, tissue: str) -> list[dict[str, str]]: tissue_id = self._get_tissue_id([AnatomicalStructure(**entry) for entry in row["anatomical_structures"]]) gene_symbols, gene_names = self._get_gene_info([GeneBiomarker(**entry) for entry in row["biomarkers_gene"]]) - refs, titles = self._get_references([Reference(**entry) for entry in row["references"]], doi_to_citation) + # Protect against invalid references (i.e. references without a DOI). + references = [Reference(**entry) for entry in row["references"] if entry and "doi" in entry] + refs, titles = self._get_references(references, doi_to_citation) for cell_type in cell_types: for index in range(len(gene_symbols)): diff --git a/backend/cellguide/pipeline/constants.py b/backend/cellguide/pipeline/constants.py index 3d1b143a66a80..f271971e7f0a6 100644 --- a/backend/cellguide/pipeline/constants.py +++ b/backend/cellguide/pipeline/constants.py @@ -1,6 +1,6 @@ import os -ASCTB_MASTER_SHEET_URL = "https://ccf-ontology.hubmapconsortium.org/v2.3.0/ccf-asctb-all.json" +ASCTB_MASTER_SHEET_URL = "https://cdn.humanatlas.io/hra-asctb-json-releases/hra-asctb-all.v2.1.json" HOMO_SAPIENS_ORGANISM_ONTOLOGY_TERM_ID = "NCBITaxon:9606" diff --git a/backend/common/doi.py b/backend/common/doi.py index 989ebbd260c8a..e1ea416550ab5 100644 --- a/backend/common/doi.py +++ b/backend/common/doi.py @@ -55,7 +55,12 @@ def portal_get_normalized_doi_url(doi_node: dict, errors: list) -> Optional[str] def clean_doi(doi: str) -> str: """ - Cleans the DOI string. + Cleans the DOI string. Formats handled: + - DOI 10.1182/ bloodadvances.2017015073 + - DOI:10.1167/iovs.15-18117 + - DOI: 10.1002/biot.201200199 + - DOI: 10.1111/j.1440-1827.1995.tb03518.x. + - https://doi.org/10.1101/2021.01.02.425073 Parameters ---------- @@ -71,9 +76,19 @@ def clean_doi(doi: str) -> str: if doi == "No DOI": return "" + # Remove trailing periods from the DOI. This handles the + # "10.1111/j.1440-1827.1995.tb03518.x."-type cases. if doi != "" and doi[-1] == ".": doi = doi[:-1] - if " " in doi: - doi = doi.split(" ")[1] # this handles cases where the DOI string is "DOI: {doi}" - doi = doi.strip() + + # Remove any invalid tokens from the DOI. Invalid tokens include: + # "DOI", "DOI:", "DOI: ", and "https://doi.org/". + regex = re.compile(r"\bDOI[: ]?\s*|https://doi.org/", re.IGNORECASE) + doi = regex.sub("", doi) + + # Remove all whitespace from the DOI. This handles the + # "10.1182/ bloodadvances.2017015073"-type cases, as well as any other + # leading or trailing whitespace. + doi = re.sub(r"\s+", "", doi.strip()) + return doi diff --git a/python_dependencies/cellguide_pipeline/requirements.txt b/python_dependencies/cellguide_pipeline/requirements.txt index 6633df09a550e..fb1e903dfcaf0 100644 --- a/python_dependencies/cellguide_pipeline/requirements.txt +++ b/python_dependencies/cellguide_pipeline/requirements.txt @@ -1,4 +1,5 @@ anndata==0.8.0 +awscli==1.29.34 boto3==1.28.7 cellxgene-census>=1.10.0 cellxgene-ontology-guide~=1.0.0 diff --git a/tests/unit/backend/common/test_doi.py b/tests/unit/backend/common/test_doi.py index d98400e601657..6d19035d47004 100644 --- a/tests/unit/backend/common/test_doi.py +++ b/tests/unit/backend/common/test_doi.py @@ -10,6 +10,11 @@ def test__clean_doi(self): ("DOI: 10.1016/j.cell.2019.11.025.", "10.1016/j.cell.2019.11.025"), (" DOI: 10.1016/j.cell.2019.11.025 ", "10.1016/j.cell.2019.11.025"), ("10.1016/j.cell.2019.11.025. ", "10.1016/j.cell.2019.11.025"), + ("DOI 10.1182/ bloodadvances.2017015073", "10.1182/bloodadvances.2017015073"), + ("DOI:10.1167/iovs.15-18117", "10.1167/iovs.15-18117"), + ("DOI: 10.1002/biot.201200199", "10.1002/biot.201200199"), + ("DOI: 10.1111/j.1440-1827.1995.tb03518.x.", "10.1111/j.1440-1827.1995.tb03518.x"), + ("https://doi.org/10.1101/2021.01.02.425073", "10.1101/2021.01.02.425073"), ("", ""), ]