diff --git a/src/ga4gh/core/_internal/identifiers.py b/src/ga4gh/core/_internal/identifiers.py index 76bbd7e5..3df16352 100644 --- a/src/ga4gh/core/_internal/identifiers.py +++ b/src/ga4gh/core/_internal/identifiers.py @@ -210,6 +210,10 @@ def identify_all( if input_obj is None: return None output_obj = input_obj + if isinstance(input_obj, str): + if input_obj.startswith("ga4gh:") and not input_obj.startswith("ga4gh:SQ"): + return input_obj.split(".")[-1] + if is_pydantic_custom_type(input_obj): val = export_pydantic_model(input_obj) if isinstance(val, str) and is_curie_type(val) and is_ga4gh_identifier(val): diff --git a/tests/test_vrs2.py b/tests/test_vrs2.py index 899ddfb3..0e67ef4d 100644 --- a/tests/test_vrs2.py +++ b/tests/test_vrs2.py @@ -200,6 +200,40 @@ def test_enref(): assert dereffed.location.model_dump(exclude_none=True) == allele_383650.location.model_dump(exclude_none=True) assert dereffed.model_dump() == allele_383650.model_dump() +def test_enref2(): + object_store = {} + a = { + "type": "Allele", + "location": { + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl" + }, + "start": 44908821, + "end": 44908822 + }, + "state": { + "type": "LiteralSequenceExpression", + "sequence": "T" + } + } + vo_a = models.Allele(**a) + a_enreffed = vrs_enref(vo_a, object_store=object_store) + orig_no_loc = vo_a.model_dump().copy() + orig_no_loc.pop("location") + actual_no_loc = a_enreffed.model_dump().copy() + actual_no_loc.pop("location") + assert orig_no_loc == actual_no_loc, "Original and enreffed match except for enreffed field" + assert a_enreffed.location == 'ga4gh:SL.m4B7OEJ6J3q6gPakM8mSEqKZeQkj1KYC' + assert a_enreffed.model_dump(exclude_none=True) == { + 'type': 'Allele', + 'location': 'ga4gh:SL.m4B7OEJ6J3q6gPakM8mSEqKZeQkj1KYC', + 'state': { + 'type': 'LiteralSequenceExpression', + 'sequence': 'T' + } + } def test_class_refatt_map(): class_refatt_map_expected = {