Skip to content

Commit

Permalink
fix: handle brand name lookup KeyError (#404)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Jan 4, 2024
1 parent 52fdb61 commit 54a1ef2
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 24 deletions.
10 changes: 9 additions & 1 deletion src/therapy/etl/hemonc.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,15 @@ def _get_rels(self, therapies: Dict, brand_names: Dict, conditions: Dict) -> Dic
_logger.warning(f"Unrecognized `Maps To` source: {src_raw}")

elif rel_type == "Has brand name":
record["trade_names"].append(brand_names[row[1]])
try:
record["trade_names"].append(brand_names[row[1]])
except KeyError:
_logger.warning(
"Unrecognized brand name ID (%s) for HemOnc concept %s",
row[1],
row[0],
)
continue

elif rel_type == "Was FDA approved yr":
try:
Expand Down
1 change: 1 addition & 0 deletions tests/data/fixtures/merged_fixtures.json
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@
],
"has_indication": [
"[\"hemonc:671\", \"Testicular cancer\", \"ncit:C7251\", {\"regulatory_body\": \"FDA\"}]",
"[\"hemonc:645\", \"Ovarian cancer\", \"ncit:C7431\", {\"regulatory_body\": \"FDA\"}]",
"[\"hemonc:569\", \"Bladder cancer\", \"ncit:C9334\", {\"regulatory_body\": \"FDA\"}]",
"[\"mesh:D018273\", \"Carcinoma, Islet Cell\", \"ncit:C3770\", {\"chembl_max_phase_for_ind\": \"chembl_phase_2\"}]",
"[\"mesh:D000077195\", \"Squamous Cell Carcinoma of Head and Neck\", \"ncit:C4044\", {\"chembl_max_phase_for_ind\": \"chembl_phase_3\"}]",
Expand Down
31 changes: 29 additions & 2 deletions tests/data/fixtures/query_fixtures.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,27 @@
],
"type": "Disease"
},
{
"id": "hemonc:645",
"label": "Ovarian cancer",
"extensions": [
{
"type": "Extension",
"name": "regulatory_body",
"value": "FDA"
}
],
"mappings": [
{
"coding": {
"system": "ncit",
"code": "C7431"
},
"relation": "relatedMatch"
}
],
"type": "Disease"
},
{
"id": "hemonc:569",
"label": "Bladder cancer",
Expand Down Expand Up @@ -3500,7 +3521,7 @@
"TX",
"TREAT",
"Therapeutic Method",
"Therapeutic",
"Therapeutic",
"Treatment",
"Therapeutic Technique",
"Therapy",
Expand Down Expand Up @@ -3532,7 +3553,7 @@
"treatment",
"TREAT",
"Therapy",
"Therapeutic",
"Therapeutic",
"Therapeutic Method",
"Therapeutic Interventions",
"Treatment",
Expand Down Expand Up @@ -3851,6 +3872,12 @@
"supplemental_info": {
"regulatory_body": "FDA"
}
},
{
"disease_id": "hemonc:645",
"disease_label": "Ovarian cancer",
"normalized_disease_id": "ncit:C7431",
"supplemental_info": { "regulatory_body": "FDA" }
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Testicular cancer,HemOnc,Condition,671,2019-05-27,2099-12-31,
1978,HemOnc,Year,738,2019-05-27,2099-12-31,
1995,HemOnc,Year,755,2019-05-27,2099-12-31,
2008,HemOnc,Year,768,2019-05-27,2099-12-31,
2018,HemOnc,Year,778,2019-05-27,2099-12-31,
Cytofos,HemOnc,Brand Name,3100,2019-05-27,2099-12-31,
Ethyol,HemOnc,Brand Name,3101,2019-05-27,2099-12-31,
Bendamax,HemOnc,Brand Name,5159,2019-05-27,2099-12-31,
Expand Down Expand Up @@ -91,6 +92,7 @@ Tecnoplatin,HemOnc,Brand Name,13531,2019-05-27,2099-12-31,
Tisplal,HemOnc,Brand Name,13532,2019-05-27,2099-12-31,
Unistin,HemOnc,Brand Name,13533,2019-05-27,2099-12-31,
Firmagon,HemOnc,Brand Name,15844,2019-05-27,2099-12-31,
Nivestim,HemOnc,Brand Name,18918,2019-05-27,2021-09-06,D
Intravenous,HemOnc,Route,44957,2019-05-27,2099-12-31,
Irritant,HemOnc,Component Class,44997,2019-05-27,2099-12-31,
WHO Essential Cancer Medicine,HemOnc,Component Class,45050,2019-05-27,2099-12-31,
Expand All @@ -99,14 +101,18 @@ Chemotherapy protective agent,HemOnc,Component Class,45069,2019-05-27,2099-12-31
Vesicant,HemOnc,Component Class,45092,2019-05-27,2099-12-31,
Subcutaneous,HemOnc,Route,45153,2019-05-27,2099-12-31,
Nitrogen mustard,HemOnc,Component Class,45230,2019-05-27,2099-12-31,
Biosimilar,HemOnc,Component Class,45268,2019-05-27,2099-12-31,
Intracavitary,HemOnc,Route,45273,2019-05-27,2099-12-31,
Light-chain (AL) amyloidosis medication,HemOnc,Component Class,45303,2019-05-27,2099-12-31,
Platinum agent,HemOnc,Component Class,45388,2019-05-27,2099-12-31,
Citosin,HemOnc,Brand Name,45480,2019-07-22,2099-12-31,
GnRH antagonist,HemOnc,Component Class,45632,2019-05-27,2099-12-31,
Granulocyte colony-stimulating factor,HemOnc,Component Class,45734,2019-05-27,2099-12-31,
Indolent lymphoma,HemOnc,Condition,46094,2019-05-27,2099-12-31,
Androgen receptor inhibitor,HemOnc,Component Class,46196,2019-05-27,2099-12-31,
Belrapzo,HemOnc,Brand Name,51576,2022-06-26,2099-12-31,
Filgrastim-aafi,HemOnc,Component,66258,2021-12-08,2099-12-31,
Nivestym,HemOnc,Brand Name,69510,2021-12-08,2099-12-31,
Cysplatyna,HemOnc,Brand Name,76972,2019-07-22,2099-12-31,
Peyrone's Chloride,HemOnc,Brand Name,93467,2019-07-22,2099-12-31,
Peyrone's Salt,HemOnc,Brand Name,93468,2019-07-22,2099-12-31,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
concept_code_1,concept_code_2,vocabulary_id_1,vocabulary_id_2,relationship_id
26,3100,HemOnc,HemOnc,Has brand name
26,3101,HemOnc,HemOnc,Has brand name
26,645,HemOnc,HemOnc,Has FDA indication
26,44957,HemOnc,HemOnc,Has route
26,45069,HemOnc,HemOnc,Is a
26,755,HemOnc,HemOnc,Was FDA approved yr
26,4126,HemOnc,RxNorm,Maps to
26,J0207,HemOnc,HCPCS,Maps to
26,45069,HemOnc,HemOnc,Has major class
26,J0207,HemOnc,HCPCS,Maps to
26,76310-017,HemOnc,NDC,Maps to
26,645,HemOnc,HemOnc,Has FDA indication
26,4126,HemOnc,RxNorm,Maps to
65,51576,HemOnc,HemOnc,Has brand name
65,5159,HemOnc,HemOnc,Has brand name
65,5160,HemOnc,HemOnc,Has brand name
Expand All @@ -24,30 +24,30 @@ concept_code_1,concept_code_2,vocabulary_id_1,vocabulary_id_2,relationship_id
65,5170,HemOnc,HemOnc,Has brand name
65,5171,HemOnc,HemOnc,Has brand name
65,5172,HemOnc,HemOnc,Has brand name
65,581,HemOnc,HemOnc,Has FDA indication
65,46094,HemOnc,HemOnc,Has FDA indication
65,44957,HemOnc,HemOnc,Has route
65,45092,HemOnc,HemOnc,Is a
65,45230,HemOnc,HemOnc,Is a
65,45058,HemOnc,HemOnc,Is a
65,45303,HemOnc,HemOnc,Is a
65,45050,HemOnc,HemOnc,Is a
65,768,HemOnc,HemOnc,Was FDA approved yr
65,134547,HemOnc,RxNorm,Maps to
65,45058,HemOnc,HemOnc,Has major class
65,45230,HemOnc,HemOnc,Has minor class
65,C9042,HemOnc,HCPCS,Maps to
65,C9243,HemOnc,HCPCS,Maps to
65,J9033,HemOnc,HCPCS,Maps to
65,J9034,HemOnc,HCPCS,Maps to
65,J9036,HemOnc,HCPCS,Maps to
65,J9056,HemOnc,HCPCS,Maps to
65,J9058,HemOnc,HCPCS,Maps to
65,J9059,HemOnc,HCPCS,Maps to
65,45058,HemOnc,HemOnc,Has major class
65,45230,HemOnc,HemOnc,Has minor class
65,J9036,HemOnc,HCPCS,Maps to
65,42367-521,HemOnc,NDC,Maps to
65,63459-348,HemOnc,NDC,Maps to
65,63459-390,HemOnc,NDC,Maps to
65,63459-391,HemOnc,NDC,Maps to
65,581,HemOnc,HemOnc,Has FDA indication
65,46094,HemOnc,HemOnc,Has FDA indication
65,134547,HemOnc,RxNorm,Maps to
105,13471,HemOnc,HemOnc,Has brand name
105,13472,HemOnc,HemOnc,Has brand name
105,13473,HemOnc,HemOnc,Has brand name
Expand Down Expand Up @@ -117,6 +117,9 @@ concept_code_1,concept_code_2,vocabulary_id_1,vocabulary_id_2,relationship_id
105,13531,HemOnc,HemOnc,Has brand name
105,13532,HemOnc,HemOnc,Has brand name
105,13533,HemOnc,HemOnc,Has brand name
105,569,HemOnc,HemOnc,Has FDA indication
105,645,HemOnc,HemOnc,Has FDA indication
105,671,HemOnc,HemOnc,Has FDA indication
105,44957,HemOnc,HemOnc,May have route
105,45273,HemOnc,HemOnc,May have route
105,45092,HemOnc,HemOnc,Is a
Expand All @@ -125,12 +128,11 @@ concept_code_1,concept_code_2,vocabulary_id_1,vocabulary_id_2,relationship_id
105,45058,HemOnc,HemOnc,Is a
105,45050,HemOnc,HemOnc,Is a
105,738,HemOnc,HemOnc,Was FDA approved yr
105,2555,HemOnc,RxNorm,Maps to
105,45058,HemOnc,HemOnc,Has major class
105,45388,HemOnc,HemOnc,Has minor class
105,C9418,HemOnc,HCPCS,Maps to
105,J9060,HemOnc,HCPCS,Maps to
105,J9062,HemOnc,HCPCS,Maps to
105,45058,HemOnc,HemOnc,Has major class
105,45388,HemOnc,HemOnc,Has minor class
105,0143-9504,HemOnc,NDC,Maps to
105,0143-9505,HemOnc,NDC,Maps to
105,0703-5747,HemOnc,NDC,Maps to
Expand All @@ -145,17 +147,29 @@ concept_code_1,concept_code_2,vocabulary_id_1,vocabulary_id_2,relationship_id
105,68083-162,HemOnc,NDC,Maps to
105,68083-163,HemOnc,NDC,Maps to
105,70860-206,HemOnc,NDC,Maps to
105,569,HemOnc,HemOnc,Has FDA indication
105,671,HemOnc,HemOnc,Has FDA indication
105,2555,HemOnc,RxNorm,Maps to
151,15844,HemOnc,HemOnc,Has brand name
151,120237,HemOnc,HemOnc,Has brand name
151,658,HemOnc,HemOnc,Has FDA indication
151,45153,HemOnc,HemOnc,Has route
151,45632,HemOnc,HemOnc,Is a
151,768,HemOnc,HemOnc,Was FDA approved yr
151,475230,HemOnc,RxNorm,Maps to
151,J9155,HemOnc,HCPCS,Maps to
151,46196,HemOnc,HemOnc,Has major class
151,45632,HemOnc,HemOnc,Has minor class
151,J9155,HemOnc,HCPCS,Maps to
151,55566-8303,HemOnc,NDC,Maps to
151,55566-8403,HemOnc,NDC,Maps to
151,658,HemOnc,HemOnc,Has FDA indication
151,475230,HemOnc,RxNorm,Maps to
66258,18918,HemOnc,HemOnc,Has brand name
66258,69510,HemOnc,HemOnc,Has brand name
66258,45153,HemOnc,HemOnc,Has route
66258,45734,HemOnc,HemOnc,Is a
66258,45268,HemOnc,HemOnc,Is a
66258,45050,HemOnc,HemOnc,Is a
66258,778,HemOnc,HemOnc,Was FDA approved yr
66258,Q5110,HemOnc,HCPCS,Maps to
66258,0069-0291,HemOnc,NDC,Maps to
66258,0069-0292,HemOnc,NDC,Maps to
66258,0069-0293,HemOnc,NDC,Maps to
66258,0069-0294,HemOnc,NDC,Maps to
66258,68442,HemOnc,RxNorm,Maps to
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ ASP-3550,151,HemOnc
FE-200486,151,HemOnc
ASP 3550,151,HemOnc
FE 200486,151,HemOnc
Filgrastim-aafi,66258,HemOnc
2 changes: 1 addition & 1 deletion tests/scripts/build_hemonc_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from therapy.database import create_db
from therapy.etl import HemOnc

TEST_IDS = ["65", "105", "151", "26"]
TEST_IDS = ["65", "105", "151", "26", "66258"]

ho = HemOnc(create_db()) # don't need to write any data
ho._extract_data(False)
Expand Down
43 changes: 40 additions & 3 deletions tests/unit/test_hemonc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ def cisplatin():
"normalized_disease_id": "ncit:C7251",
"supplemental_info": {"regulatory_body": "FDA"},
},
{
"disease_id": "hemonc:645",
"disease_label": "Ovarian cancer",
"normalized_disease_id": "ncit:C7431",
"supplemental_info": {"regulatory_body": "FDA"},
},
],
}
)
Expand Down Expand Up @@ -146,7 +152,25 @@ def degarelix():
)


def test_concept_id_match(hemonc, compare_response, cisplatin, bendamustine, degarelix):
@pytest.fixture(scope="module")
def filgrastim():
"""Create fixture for filgrastim drug (tests handling of deprecated brand name)"""
return Therapy(
**{
"label": "Filgrastim-aafi",
"concept_id": "hemonc:66258",
"aliases": [],
"xrefs": ["rxcui:68442"],
"trade_names": ["Nivestym"],
"approval_ratings": ["hemonc_approved"],
"approval_year": ["2018"],
}
)


def test_concept_id_match(
hemonc, compare_response, cisplatin, bendamustine, degarelix, filgrastim
):
"""Test that concept ID queries resolve to correct record."""
response = hemonc.search("hemonc:105")
compare_response(response, MatchType.CONCEPT_ID, cisplatin)
Expand All @@ -157,8 +181,13 @@ def test_concept_id_match(hemonc, compare_response, cisplatin, bendamustine, deg
response = hemonc.search("hemonc:151")
compare_response(response, MatchType.CONCEPT_ID, degarelix)

response = hemonc.search("hemonc:66258")
compare_response(response, MatchType.CONCEPT_ID, filgrastim)

def test_label_match(hemonc, compare_response, cisplatin, bendamustine, degarelix):

def test_label_match(
hemonc, compare_response, cisplatin, bendamustine, degarelix, filgrastim
):
"""Test that label queries resolve to correct record."""
response = hemonc.search("cisplatin")
compare_response(response, MatchType.LABEL, cisplatin)
Expand All @@ -169,6 +198,9 @@ def test_label_match(hemonc, compare_response, cisplatin, bendamustine, degareli
response = hemonc.search("DEGARELIX")
compare_response(response, MatchType.LABEL, degarelix)

response = hemonc.search("Filgrastim-aafi")
compare_response(response, MatchType.LABEL, filgrastim)


def test_alias_match(hemonc, compare_response, cisplatin, bendamustine, degarelix):
"""Test that alias queries resolve to correct record."""
Expand Down Expand Up @@ -207,7 +239,9 @@ def test_trade_name(hemonc, compare_response, bendamustine, degarelix):
assert response.match_type == MatchType.NO_MATCH


def test_xref_match(hemonc, compare_response, cisplatin, bendamustine, degarelix):
def test_xref_match(
hemonc, compare_response, cisplatin, bendamustine, degarelix, filgrastim
):
"""Test that xref query resolves to correct record."""
response = hemonc.search("rxcui:2555")
compare_response(response, MatchType.XREF, cisplatin)
Expand All @@ -218,6 +252,9 @@ def test_xref_match(hemonc, compare_response, cisplatin, bendamustine, degarelix
response = hemonc.search("rxcui:475230")
compare_response(response, MatchType.XREF, degarelix)

response = hemonc.search("rxcui:68442")
compare_response(response, MatchType.XREF, filgrastim)


def test_metadata(hemonc):
"""Test that source metadata returns correctly."""
Expand Down

0 comments on commit 54a1ef2

Please sign in to comment.