From a3bd72f28d42b39150f0da1a1bf1033994265f9f Mon Sep 17 00:00:00 2001
From: Joyce Yan <5653616+joyceyan@users.noreply.github.com>
Date: Mon, 4 Nov 2024 15:46:02 -0800
Subject: [PATCH 1/3] feat: Do not use backed mode when reading in anndata for
 CXG conversion (#7377)

---
 DEV_ENV_WITHOUT_DOCKER.md                   | 22 +++++++++++++++++++++
 backend/layers/processing/h5ad_data_file.py |  2 +-
 2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/DEV_ENV_WITHOUT_DOCKER.md b/DEV_ENV_WITHOUT_DOCKER.md
index 50f87e4d141dc..83eb05fc3ebbe 100644
--- a/DEV_ENV_WITHOUT_DOCKER.md
+++ b/DEV_ENV_WITHOUT_DOCKER.md
@@ -51,3 +51,25 @@ Run functional tests for WMG api against the `dev` environment.
 **NOTE**: `dev` environment is a remote environment. These functional tests run locally against a backend in a remote environment called `dev`.
 
 1. `AWS_PROFILE=single-cell-dev DEPLOYMENT_STAGE=dev pytest -v tests/functional/backend/wmg/test_wmg_api.py`
+
+### Set up vips
+
+You may run into issues with finding `_libvips` if you're running a Jupyter notebook locally that calls `pyvips`, such as when running CXG conversion locally. The error may look like this:
+
+```
+ModuleNotFoundError                       Traceback (most recent call last)
+File ~/miniconda3/envs/py11/lib/python3.11/site-packages/pyvips/__init__.py:19
+     18 try:
+---> 19     import _libvips
+     21     logger.debug('Loaded binary module _libvips')
+
+ModuleNotFoundError: No module named '_libvips'
+```
+
+To resolve this, you'll need to install `vips` with `brew install vips`, because this is a dependency that `pyvips` has. If you're using conda, you'll have to also tell your conda environment where homebrew installed `vips`. You can do this with:
+
+```
+mkdir -p ~/miniconda3/envs/<CONDA_ENV_NAME>/etc/conda/activate.d
+touch ~/miniconda3/envs/<CONDA_ENV_NAME>/etc/conda/activate.d/env_vars.sh
+echo 'export DYLD_LIBRARY_PATH=/opt/homebrew/lib:$DYLD_LIBRARY_PATH' >> ~/miniconda3/envs/<CONDA_ENV_NAME>/etc/conda/activate.d/env_vars.sh
+```
diff --git a/backend/layers/processing/h5ad_data_file.py b/backend/layers/processing/h5ad_data_file.py
index 58e7381526462..5720b31a4a558 100644
--- a/backend/layers/processing/h5ad_data_file.py
+++ b/backend/layers/processing/h5ad_data_file.py
@@ -183,7 +183,7 @@ def validate_anndata(self):
 
     def extract_anndata_elements_from_file(self):
         logging.info(f"Reading in AnnData dataset: {path.basename(self.input_filename)}")
-        self.anndata = anndata.read_h5ad(self.input_filename, backed="r")
+        self.anndata = anndata.read_h5ad(self.input_filename)
         logging.info("Completed reading in AnnData dataset!")
 
         self.obs = self.transform_dataframe_index_into_column(self.anndata.obs, "obs", self.obs_index_column_name)

From 8d24ee0e08cc362c3bd2a126b62da03378693463 Mon Sep 17 00:00:00 2001
From: Ronen <kaloster@users.noreply.github.com>
Date: Tue, 5 Nov 2024 13:55:40 -0500
Subject: [PATCH 2/3] chore: move bottom banner to landing footer (#7378)

---
 frontend/src/components/LandingFooter/index.tsx | 3 +++
 frontend/src/components/Layout/index.tsx        | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/frontend/src/components/LandingFooter/index.tsx b/frontend/src/components/LandingFooter/index.tsx
index ef2345c9bd3a7..6759eada07b4f 100644
--- a/frontend/src/components/LandingFooter/index.tsx
+++ b/frontend/src/components/LandingFooter/index.tsx
@@ -6,6 +6,8 @@ import Wordmark from "src/common/images/cellxgene-discover-wordmark.svg";
 import CZILogo from "src/components/common/staticPages/czi-logo-white.png";
 
 import styles from "./index.module.scss";
+import BottomBanner from "../BottomBanner";
+import { BANNER_FEEDBACK_SURVEY_LINK } from "src/common/constants/airtableLinks";
 
 const LandingFooter = (): JSX.Element => {
   return (
@@ -80,6 +82,7 @@ const LandingFooter = (): JSX.Element => {
           </div>
         </div>
       </footer>
+      <BottomBanner surveyLink={BANNER_FEEDBACK_SURVEY_LINK} />
     </>
   );
 };
diff --git a/frontend/src/components/Layout/index.tsx b/frontend/src/components/Layout/index.tsx
index 6fdc283210d56..052b0b8f7fd08 100644
--- a/frontend/src/components/Layout/index.tsx
+++ b/frontend/src/components/Layout/index.tsx
@@ -5,8 +5,6 @@ import Header from "../Header";
 import LandingFooter from "../LandingFooter";
 import LandingHeader from "../MobileFriendlyHeader";
 import { Wrapper } from "./style";
-import BottomBanner from "../BottomBanner";
-import { BANNER_FEEDBACK_SURVEY_LINK } from "src/common/constants/airtableLinks";
 
 interface Props {
   children: ReactNode;
@@ -36,7 +34,6 @@ const Layout = ({ children }: Props) => {
         />
         {children}
         <LandingFooter />
-        <BottomBanner surveyLink={BANNER_FEEDBACK_SURVEY_LINK} />
       </>
     );
   } else if (pathname === ROUTES.CELL_GUIDE) {

From c4360399d1da35da5305fae82ed5a79a2c84caae Mon Sep 17 00:00:00 2001
From: Mim Hastie <mim@clevercanary.com>
Date: Tue, 5 Nov 2024 12:11:51 -0800
Subject: [PATCH 3/3] chore: HuBMAP v7 (#7361)

---
 .../canonical_markers.py                      | 13 ++++++++---
 backend/cellguide/pipeline/constants.py       |  2 +-
 backend/common/doi.py                         | 23 +++++++++++++++----
 .../cellguide_pipeline/requirements.txt       |  1 +
 tests/unit/backend/common/test_doi.py         |  5 ++++
 5 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/backend/cellguide/pipeline/canonical_marker_genes/canonical_markers.py b/backend/cellguide/pipeline/canonical_marker_genes/canonical_markers.py
index 374ceabfb68fb..120f03968747e 100644
--- a/backend/cellguide/pipeline/canonical_marker_genes/canonical_markers.py
+++ b/backend/cellguide/pipeline/canonical_marker_genes/canonical_markers.py
@@ -131,8 +131,13 @@ def fetch_doi_info(ref):
             doi = clean_doi(ref.doi)
             if doi:
                 if doi not in doi_to_citation:
-                    title = self.crossref_provider.get_title_and_citation_from_doi(doi)
-                    doi_to_citation[doi] = title
+                    # Catch and log invalid DOIs.
+                    try:
+                        title = self.crossref_provider.get_title_and_citation_from_doi(doi)
+                        doi_to_citation[doi] = title
+                    except Exception:
+                        logger.error(f"Error fetching title and citation for DOI {doi}")
+                        return None  # None values are filtered out.
                 else:
                     title = doi_to_citation[doi]
                 return doi, title
@@ -278,7 +283,9 @@ def _process_asct_table__parallel(self, tissue: str) -> list[dict[str, str]]:
 
             tissue_id = self._get_tissue_id([AnatomicalStructure(**entry) for entry in row["anatomical_structures"]])
             gene_symbols, gene_names = self._get_gene_info([GeneBiomarker(**entry) for entry in row["biomarkers_gene"]])
-            refs, titles = self._get_references([Reference(**entry) for entry in row["references"]], doi_to_citation)
+            # Protect against invalid references (i.e. references without a DOI).
+            references = [Reference(**entry) for entry in row["references"] if entry and "doi" in entry]
+            refs, titles = self._get_references(references, doi_to_citation)
 
             for cell_type in cell_types:
                 for index in range(len(gene_symbols)):
diff --git a/backend/cellguide/pipeline/constants.py b/backend/cellguide/pipeline/constants.py
index 3d1b143a66a80..f271971e7f0a6 100644
--- a/backend/cellguide/pipeline/constants.py
+++ b/backend/cellguide/pipeline/constants.py
@@ -1,6 +1,6 @@
 import os
 
-ASCTB_MASTER_SHEET_URL = "https://ccf-ontology.hubmapconsortium.org/v2.3.0/ccf-asctb-all.json"
+ASCTB_MASTER_SHEET_URL = "https://cdn.humanatlas.io/hra-asctb-json-releases/hra-asctb-all.v2.1.json"
 
 HOMO_SAPIENS_ORGANISM_ONTOLOGY_TERM_ID = "NCBITaxon:9606"
 
diff --git a/backend/common/doi.py b/backend/common/doi.py
index 989ebbd260c8a..e1ea416550ab5 100644
--- a/backend/common/doi.py
+++ b/backend/common/doi.py
@@ -55,7 +55,12 @@ def portal_get_normalized_doi_url(doi_node: dict, errors: list) -> Optional[str]
 
 def clean_doi(doi: str) -> str:
     """
-    Cleans the DOI string.
+    Cleans the DOI string. Formats handled:
+    - DOI 10.1182/ bloodadvances.2017015073
+    - DOI:10.1167/iovs.15-18117
+    - DOI: 10.1002/biot.201200199
+    - DOI: 10.1111/j.1440-1827.1995.tb03518.x.
+    - https://doi.org/10.1101/2021.01.02.425073
 
     Parameters
     ----------
@@ -71,9 +76,19 @@ def clean_doi(doi: str) -> str:
     if doi == "No DOI":
         return ""
 
+    # Remove trailing periods from the DOI. This handles the
+    # "10.1111/j.1440-1827.1995.tb03518.x."-type cases.
     if doi != "" and doi[-1] == ".":
         doi = doi[:-1]
-    if " " in doi:
-        doi = doi.split(" ")[1]  # this handles cases where the DOI string is "DOI: {doi}"
-    doi = doi.strip()
+
+    # Remove any invalid tokens from the DOI. Invalid tokens include:
+    # "DOI", "DOI:", "DOI: ", and "https://doi.org/".
+    regex = re.compile(r"\bDOI[: ]?\s*|https://doi.org/", re.IGNORECASE)
+    doi = regex.sub("", doi)
+
+    # Remove all whitespace from the DOI. This handles the
+    # "10.1182/ bloodadvances.2017015073"-type cases, as well as any other
+    # leading or trailing whitespace.
+    doi = re.sub(r"\s+", "", doi.strip())
+
     return doi
diff --git a/python_dependencies/cellguide_pipeline/requirements.txt b/python_dependencies/cellguide_pipeline/requirements.txt
index 6633df09a550e..fb1e903dfcaf0 100644
--- a/python_dependencies/cellguide_pipeline/requirements.txt
+++ b/python_dependencies/cellguide_pipeline/requirements.txt
@@ -1,4 +1,5 @@
 anndata==0.8.0
+awscli==1.29.34
 boto3==1.28.7
 cellxgene-census>=1.10.0
 cellxgene-ontology-guide~=1.0.0
diff --git a/tests/unit/backend/common/test_doi.py b/tests/unit/backend/common/test_doi.py
index d98400e601657..6d19035d47004 100644
--- a/tests/unit/backend/common/test_doi.py
+++ b/tests/unit/backend/common/test_doi.py
@@ -10,6 +10,11 @@ def test__clean_doi(self):
             ("DOI: 10.1016/j.cell.2019.11.025.", "10.1016/j.cell.2019.11.025"),
             (" DOI: 10.1016/j.cell.2019.11.025 ", "10.1016/j.cell.2019.11.025"),
             ("10.1016/j.cell.2019.11.025. ", "10.1016/j.cell.2019.11.025"),
+            ("DOI 10.1182/ bloodadvances.2017015073", "10.1182/bloodadvances.2017015073"),
+            ("DOI:10.1167/iovs.15-18117", "10.1167/iovs.15-18117"),
+            ("DOI: 10.1002/biot.201200199", "10.1002/biot.201200199"),
+            ("DOI: 10.1111/j.1440-1827.1995.tb03518.x.", "10.1111/j.1440-1827.1995.tb03518.x"),
+            ("https://doi.org/10.1101/2021.01.02.425073", "10.1101/2021.01.02.425073"),
             ("", ""),
         ]