Skip to content

Commit

Permalink
feat: add oncologic disease parameter from mondo terms
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Oct 6, 2024
1 parent b43a289 commit 71ab44a
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/disease/etl/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def record_order(record: dict) -> tuple:
merged_properties["xrefs"] = list({r["concept_id"] for r in records[1:]})

set_fields = ["aliases", "associated_with"]
scalar_fields = ["label", "pediatric_disease"]
scalar_fields = ["label", "pediatric_disease", "oncologic"]
for record in records:
for field in set_fields:
if field in record:
Expand Down
5 changes: 5 additions & 0 deletions src/disease/etl/mondo.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ def _transform_data(self) -> None:
diseases = self._construct_dependency_set(dag, disease_root)
peds_neoplasm_root = "MONDO:0006517"
pediatric_diseases = self._construct_dependency_set(dag, peds_neoplasm_root)
cancer_root = "MONDO:0045024"
cancers = self._construct_dependency_set(dag, cancer_root)

reader = fastobo.iter(str(self._data_file.absolute()))
for item in tqdm(reader, ncols=80, disable=self._silent):
Expand All @@ -198,4 +200,7 @@ def _transform_data(self) -> None:
if concept_id.upper() in pediatric_diseases:
params["pediatric_disease"] = True

if concept_id.upper() in cancers:
params["oncologic"] = True

self._load_disease(params)
1 change: 1 addition & 0 deletions src/disease/etl/oncotree.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def _add_disease(self, disease_node: dict) -> None:
"label": disease_node["name"],
"xrefs": [],
"associated_with": [],
"oncologic": True,
}
refs = disease_node.get("externalReferences", [])
for prefix, codes in refs.items():
Expand Down
14 changes: 11 additions & 3 deletions src/disease/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,9 @@ def _add_disease(
:return: completed normalized response object ready to return to user
"""
disease_obj = domain_models.Disease(
id=f"normalize.disease.{record['concept_id']}", label=record["label"]
id=f"normalize.disease.{record['concept_id']}",
label=record["label"],
extensions=[],
)

source_ids = record.get("xrefs", []) + record.get("associated_with", [])
Expand All @@ -341,12 +343,18 @@ def _add_disease(
disease_obj.alternativeLabels = record["aliases"]

if "pediatric_disease" in record and record["pediatric_disease"] is not None:
disease_obj.extensions = [
disease_obj.extensions.append(
entity_models.Extension(
name="pediatric_disease",
value=record["pediatric_disease"],
)
]
)
if "oncologic" in record and record["oncologic"] is not None:
disease_obj.extensions.append(
entity_models.Extension(
name="oncologic_disease", value=record["oncologic"]
)
)

response["match_type"] = match_type
response["disease"] = disease_obj
Expand Down
1 change: 1 addition & 0 deletions src/disease/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class Disease(BaseModel):
xrefs: list[StrictStr] = []
associated_with: list[StrictStr] = []
pediatric_disease: bool | None = None
oncologic: bool | None = None

model_config = ConfigDict(
json_schema_extra={
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/test_mondo.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def neuroblastoma():
"mesh:D009447",
],
pediatric_disease=None,
oncologic=True,
)


Expand All @@ -60,6 +61,7 @@ def richter_syndrome():
"umls:C0349631",
],
pediatric_disease=None,
oncologic=True,
)


Expand All @@ -79,6 +81,7 @@ def pediatric_liposarcoma():
xrefs=["DOID:5695", "ncit:C8091"],
associated_with=["umls:C0279984"],
pediatric_disease=True,
oncologic=True,
)


Expand All @@ -94,6 +97,7 @@ def cystic_teratoma_adult():
pediatric_disease=None,
xrefs=["ncit:C9012", "DOID:7079"],
associated_with=["umls:C1368888"],
oncologic=True,
)


Expand All @@ -120,6 +124,7 @@ def nsclc():
"umls:C0007131",
"efo:0003060",
],
oncologic=True,
)


Expand Down
4 changes: 3 additions & 1 deletion tests/unit/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime

import pytest
from ga4gh.core import domain_models
from ga4gh.core import domain_models, entity_models

from disease.query import InvalidParameterException, QueryHandler
from disease.schemas import MatchType, SourceName
Expand Down Expand Up @@ -81,6 +81,7 @@ def neuroblastoma():
"neural Crest tumor, malignant",
"neuroblastoma, malignant",
],
extensions=[entity_models.Extension(name="oncologic_disease", value=True)],
)


Expand All @@ -92,6 +93,7 @@ def skin_myo():
id="normalize.disease.ncit:C167370",
label="Skin Myoepithelioma",
alternativeLabels=["Cutaneous Myoepithelioma"],
extensions=[entity_models.Extension(name="oncologic_disease", value=True)],
)


Expand Down

0 comments on commit 71ab44a

Please sign in to comment.