From e49c184f7a2abbd85c31849dabfec5f0cbca475e Mon Sep 17 00:00:00 2001 From: Mekhla Kapoor <54870020+mekhlakapoor@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:47:52 -0800 Subject: [PATCH] Fix: Handle Viralmaterial Breaks (#307) * handles material name nonetype, titer int * parsing titer (WIP) * parses titer to int * completed coverage --- .../sharepoint/las2020/mapping.py | 60 +++++++++++++++++-- src/aind_metadata_service/tars/mapping.py | 3 +- .../las2020/mapped/mapped_list_item2.json | 4 +- .../las2020/mapped/mapped_list_item3.json | 4 +- .../sharepoint/las2020/raw/list_item2.json | 2 +- .../sharepoint/las2020/raw/list_item3.json | 2 +- tests/sharepoint/las2020/test_mapping.py | 2 +- 7 files changed, 64 insertions(+), 13 deletions(-) diff --git a/src/aind_metadata_service/sharepoint/las2020/mapping.py b/src/aind_metadata_service/sharepoint/las2020/mapping.py index cba66fa2..f373d105 100644 --- a/src/aind_metadata_service/sharepoint/las2020/mapping.py +++ b/src/aind_metadata_service/sharepoint/las2020/mapping.py @@ -78,7 +78,7 @@ class InjectableMaterial: prep_lot_id: Optional[str] = None genome_copy: Optional[str] = None virus_volume: Optional[Decimal] = None - titer: Optional[Decimal] = None + titer: Optional[str] = None @dataclass @@ -115,6 +115,9 @@ class MappedLASList: r"^([a-zA-Z0-9\s\-\(\)]+?)\s+(\d+(\.\d+)?)?\s*([a-zA-Z%\/]+)?" ) DOSE_PAREN_REGEX = re.compile(r"\((\d+(\.\d+)?)\s*([a-zA-Z%\/]+)\)") + SCIENTIFIC_NOTATION_REGEX = re.compile(r"^[-+]?\d+(?:\.\d+)?[eE][-+]?\d+$") + VALUE_WITH_UNIT_REGEX = re.compile(r"^([\d\.eE+-]+)\s*(\S+)$") + INTEGER_REGEX = re.compile(r"^[+-]?\d+$") def __init__(self, las: LASList): """Class constructor""" @@ -172,6 +175,52 @@ def _parse_dose_sub_to_nonviral_material( name=dose_sub, ) + def _is_scientific_notation(self, value_str: str) -> bool: + """Checks whether titer field is in scientific notation.""" + return bool(re.search(self.SCIENTIFIC_NOTATION_REGEX, value_str)) + + def _is_value_with_unit(self, value_str: str) -> bool: + """Checks whether titer field is in titer with unit format.""" + return bool(re.search(self.VALUE_WITH_UNIT_REGEX, value_str)) + + def _parse_titer_str(self, titer_str: str) -> Optional[float]: + """Parse string representation of titer into float.""" + if re.match(self.INTEGER_REGEX, titer_str): + return int(float(titer_str)) + return None + + def _parse_titer(self, titer_str: Optional[str]) -> Optional[tuple]: + """Parses titer field to integer.""" + unit = "gc/mL" # default unit + if titer_str is None: + return None, unit + + titer_str = titer_str.strip() + numeric_value = self._parse_titer_str(titer_str) + if numeric_value is not None: + return numeric_value, unit + + titer_str = titer_str.strip() + # If the string matches scientific notation + if self._is_scientific_notation(titer_str): + titer = float( + re.match(self.SCIENTIFIC_NOTATION_REGEX, titer_str).group(0) + ) + return int(titer), unit # Always return an integer + + # Check if the string has a value with unit + elif self._is_value_with_unit(titer_str): + match = re.match(self.VALUE_WITH_UNIT_REGEX, titer_str) + titer = match.group(1) + unit = match.group(2) + # Convert only the numeric value part + numeric_value = self._parse_titer_str(titer) + if numeric_value is not None: + return numeric_value, unit + + # If none of the above, return None with default unit + return None, unit + @property def aind_accommodation_comment(self) -> Optional[str]: """Maps accommodation_comment to aind model""" @@ -2126,16 +2175,17 @@ class RetroOrbitalInjectionInfo: ), ) - @staticmethod - def map_viral_materials(injectable_materials: List[InjectableMaterial]): + def map_viral_materials( + self, injectable_materials: List[InjectableMaterial] + ): """Maps injectable material to viral material""" # TODO: map injectable material info in case tars gets no response viral_materials = [] for material in injectable_materials: - # Use prep_lot_id in name for tars query + titer, unit = self._parse_titer(getattr(material, "titer", None)) viral_materials.append( ViralMaterial.model_construct( - name=material.prep_lot_id, titer=material.titer + name=material.prep_lot_id, titer=titer, titer_unit=unit ) ) return viral_materials diff --git a/src/aind_metadata_service/tars/mapping.py b/src/aind_metadata_service/tars/mapping.py index 9fd93f19..b5f29ea1 100644 --- a/src/aind_metadata_service/tars/mapping.py +++ b/src/aind_metadata_service/tars/mapping.py @@ -225,6 +225,7 @@ def get_virus_strains(response: ModelResponse) -> List: virus_strains = [ getattr(material, "name").strip() for material in procedure.injection_materials + if getattr(material, "name", None) ] viruses.extend(virus_strains) return viruses @@ -252,7 +253,7 @@ def integrate_injection_materials( ): if isinstance( injection_material, ViralMaterial - ) and hasattr(injection_material, "name"): + ) and getattr(injection_material, "name", None): virus_strain = injection_material.name.strip() tars_response = tars_mapping.get(virus_strain) if ( diff --git a/tests/resources/sharepoint/las2020/mapped/mapped_list_item2.json b/tests/resources/sharepoint/las2020/mapped/mapped_list_item2.json index 2fed8e59..bff44901 100644 --- a/tests/resources/sharepoint/las2020/mapped/mapped_list_item2.json +++ b/tests/resources/sharepoint/las2020/mapped/mapped_list_item2.json @@ -17,7 +17,7 @@ "material_type": "Virus", "name": "GT340C", "tars_identifiers": null, - "titer": "50", + "titer": 50, "titer_unit": "gc/mL" }, { @@ -25,7 +25,7 @@ "material_type": "Virus", "name": null, "tars_identifiers": null, - "titer": "50 gc/mL", + "titer": 50, "titer_unit": "gc/mL" }, { diff --git a/tests/resources/sharepoint/las2020/mapped/mapped_list_item3.json b/tests/resources/sharepoint/las2020/mapped/mapped_list_item3.json index 58241619..4b24447f 100644 --- a/tests/resources/sharepoint/las2020/mapped/mapped_list_item3.json +++ b/tests/resources/sharepoint/las2020/mapped/mapped_list_item3.json @@ -17,7 +17,7 @@ "material_type": "Virus", "name": "GT340C", "tars_identifiers": null, - "titer": "50", + "titer": 700000000000, "titer_unit": "gc/mL" }, { @@ -25,7 +25,7 @@ "material_type": "Virus", "name": null, "tars_identifiers": null, - "titer": "50 gc/mL", + "titer": 50, "titer_unit": "gc/mL" }, { diff --git a/tests/resources/sharepoint/las2020/raw/list_item2.json b/tests/resources/sharepoint/las2020/raw/list_item2.json index 90175522..8dd9c1c6 100644 --- a/tests/resources/sharepoint/las2020/raw/list_item2.json +++ b/tests/resources/sharepoint/las2020/raw/list_item2.json @@ -168,7 +168,7 @@ "roVolV5d": null, "roTite1": "50", "roTite1b": "50 gc/mL", - "roTite1c": null, + "roTite1c": "abc", "roTite1d": null, "roTite2": null, "roTite2b": null, diff --git a/tests/resources/sharepoint/las2020/raw/list_item3.json b/tests/resources/sharepoint/las2020/raw/list_item3.json index fc139345..43868bf8 100644 --- a/tests/resources/sharepoint/las2020/raw/list_item3.json +++ b/tests/resources/sharepoint/las2020/raw/list_item3.json @@ -166,7 +166,7 @@ "roVolV5b": null, "roVolV5c": null, "roVolV5d": null, - "roTite1": "50", + "roTite1": "7E11", "roTite1b": "50 gc/mL", "roTite1c": null, "roTite1d": null, diff --git a/tests/sharepoint/las2020/test_mapping.py b/tests/sharepoint/las2020/test_mapping.py index f2b75437..66fd1ed6 100644 --- a/tests/sharepoint/las2020/test_mapping.py +++ b/tests/sharepoint/las2020/test_mapping.py @@ -61,7 +61,7 @@ def test_parser(self): for list_item in self.list_items: raw_data = list_item[0] expected_mapped_data = list_item[1] - raw_file_name = list_item[1] + raw_file_name = list_item[2] logging.debug(f"Processing file: {raw_file_name}") las_model = LASList.model_validate(raw_data) mapper = MappedLASList(las=las_model)