Skip to content

Commit

Permalink
Fix: Handle Viralmaterial Breaks (#307)
Browse files Browse the repository at this point in the history
* handles material name nonetype, titer int

* parsing titer (WIP)

* parses titer to int

* completed coverage
  • Loading branch information
mekhlakapoor authored Dec 17, 2024
1 parent a44d82a commit e49c184
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 13 deletions.
60 changes: 55 additions & 5 deletions src/aind_metadata_service/sharepoint/las2020/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class InjectableMaterial:
prep_lot_id: Optional[str] = None
genome_copy: Optional[str] = None
virus_volume: Optional[Decimal] = None
titer: Optional[Decimal] = None
titer: Optional[str] = None


@dataclass
Expand Down Expand Up @@ -115,6 +115,9 @@ class MappedLASList:
r"^([a-zA-Z0-9\s\-\(\)]+?)\s+(\d+(\.\d+)?)?\s*([a-zA-Z%\/]+)?"
)
DOSE_PAREN_REGEX = re.compile(r"\((\d+(\.\d+)?)\s*([a-zA-Z%\/]+)\)")
SCIENTIFIC_NOTATION_REGEX = re.compile(r"^[-+]?\d+(?:\.\d+)?[eE][-+]?\d+$")
VALUE_WITH_UNIT_REGEX = re.compile(r"^([\d\.eE+-]+)\s*(\S+)$")
INTEGER_REGEX = re.compile(r"^[+-]?\d+$")

def __init__(self, las: LASList):
"""Class constructor"""
Expand Down Expand Up @@ -172,6 +175,52 @@ def _parse_dose_sub_to_nonviral_material(
name=dose_sub,
)

def _is_scientific_notation(self, value_str: str) -> bool:
"""Checks whether titer field is in scientific notation."""
return bool(re.search(self.SCIENTIFIC_NOTATION_REGEX, value_str))

def _is_value_with_unit(self, value_str: str) -> bool:
"""Checks whether titer field is in titer with unit format."""
return bool(re.search(self.VALUE_WITH_UNIT_REGEX, value_str))

def _parse_titer_str(self, titer_str: str) -> Optional[float]:
"""Parse string representation of titer into float."""
if re.match(self.INTEGER_REGEX, titer_str):
return int(float(titer_str))
return None

def _parse_titer(self, titer_str: Optional[str]) -> Optional[tuple]:
"""Parses titer field to integer."""
unit = "gc/mL" # default unit
if titer_str is None:
return None, unit

titer_str = titer_str.strip()
numeric_value = self._parse_titer_str(titer_str)
if numeric_value is not None:
return numeric_value, unit

titer_str = titer_str.strip()
# If the string matches scientific notation
if self._is_scientific_notation(titer_str):
titer = float(
re.match(self.SCIENTIFIC_NOTATION_REGEX, titer_str).group(0)
)
return int(titer), unit # Always return an integer

# Check if the string has a value with unit
elif self._is_value_with_unit(titer_str):
match = re.match(self.VALUE_WITH_UNIT_REGEX, titer_str)
titer = match.group(1)
unit = match.group(2)
# Convert only the numeric value part
numeric_value = self._parse_titer_str(titer)
if numeric_value is not None:
return numeric_value, unit

# If none of the above, return None with default unit
return None, unit

@property
def aind_accommodation_comment(self) -> Optional[str]:
"""Maps accommodation_comment to aind model"""
Expand Down Expand Up @@ -2126,16 +2175,17 @@ class RetroOrbitalInjectionInfo:
),
)

@staticmethod
def map_viral_materials(injectable_materials: List[InjectableMaterial]):
def map_viral_materials(
self, injectable_materials: List[InjectableMaterial]
):
"""Maps injectable material to viral material"""
# TODO: map injectable material info in case tars gets no response
viral_materials = []
for material in injectable_materials:
# Use prep_lot_id in name for tars query
titer, unit = self._parse_titer(getattr(material, "titer", None))
viral_materials.append(
ViralMaterial.model_construct(
name=material.prep_lot_id, titer=material.titer
name=material.prep_lot_id, titer=titer, titer_unit=unit
)
)
return viral_materials
Expand Down
3 changes: 2 additions & 1 deletion src/aind_metadata_service/tars/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def get_virus_strains(response: ModelResponse) -> List:
virus_strains = [
getattr(material, "name").strip()
for material in procedure.injection_materials
if getattr(material, "name", None)
]
viruses.extend(virus_strains)
return viruses
Expand Down Expand Up @@ -252,7 +253,7 @@ def integrate_injection_materials(
):
if isinstance(
injection_material, ViralMaterial
) and hasattr(injection_material, "name"):
) and getattr(injection_material, "name", None):
virus_strain = injection_material.name.strip()
tars_response = tars_mapping.get(virus_strain)
if (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
"material_type": "Virus",
"name": "GT340C",
"tars_identifiers": null,
"titer": "50",
"titer": 50,
"titer_unit": "gc/mL"
},
{
"addgene_id": null,
"material_type": "Virus",
"name": null,
"tars_identifiers": null,
"titer": "50 gc/mL",
"titer": 50,
"titer_unit": "gc/mL"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
"material_type": "Virus",
"name": "GT340C",
"tars_identifiers": null,
"titer": "50",
"titer": 700000000000,
"titer_unit": "gc/mL"
},
{
"addgene_id": null,
"material_type": "Virus",
"name": null,
"tars_identifiers": null,
"titer": "50 gc/mL",
"titer": 50,
"titer_unit": "gc/mL"
},
{
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/sharepoint/las2020/raw/list_item2.json
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@
"roVolV5d": null,
"roTite1": "50",
"roTite1b": "50 gc/mL",
"roTite1c": null,
"roTite1c": "abc",
"roTite1d": null,
"roTite2": null,
"roTite2b": null,
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/sharepoint/las2020/raw/list_item3.json
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
"roVolV5b": null,
"roVolV5c": null,
"roVolV5d": null,
"roTite1": "50",
"roTite1": "7E11",
"roTite1b": "50 gc/mL",
"roTite1c": null,
"roTite1d": null,
Expand Down
2 changes: 1 addition & 1 deletion tests/sharepoint/las2020/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_parser(self):
for list_item in self.list_items:
raw_data = list_item[0]
expected_mapped_data = list_item[1]
raw_file_name = list_item[1]
raw_file_name = list_item[2]
logging.debug(f"Processing file: {raw_file_name}")
las_model = LASList.model_validate(raw_data)
mapper = MappedLASList(las=las_model)
Expand Down

0 comments on commit e49c184

Please sign in to comment.