-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add cat vrs pydantic models (#4)
close #2
- Loading branch information
Showing
7 changed files
with
390 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[submodule "submodules/cat_vrs"] | ||
path = submodules/cat_vrs | ||
url = https://github.com/ga4gh/cat-vrs | ||
branch = 1.x |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,5 @@ | ||
"""Package for Cat-VRS Python implementation""" | ||
|
||
from . import profile_models as cat_vrs_models | ||
|
||
__all__ = ["cat_vrs_models"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
"""Define Pydantic models for GA4GH categorical variation objects. | ||
See the `CatVar page <https://www.ga4gh.org/product/categorical-variation-catvar/>`_ on | ||
the GA4GH website for more information. | ||
""" | ||
|
||
from enum import Enum | ||
from typing import Literal | ||
|
||
from ga4gh.core.entity_models import IRI, Coding, DomainEntity | ||
from ga4gh.vrs.models import CopyChange, Location, Range, Variation | ||
from pydantic import BaseModel, Field, RootModel, field_validator | ||
|
||
|
||
class Relation(str, Enum): | ||
"""Defined relationships between members of the categorical variant and the defining | ||
context. ``sequence_liftover`` refers to variants or locations that represent a | ||
congruent concept on a differing assembly of a human genome (e.g. "GRCh37" and | ||
"GRCh38") or gene (e.g. Locus Reference Genomic) sequence. ``transcript_projection`` | ||
refers to variants or locations that occur on transcripts projected from the defined | ||
genomic concept. ``codon_translation`` refers to variants or locations that | ||
translate from the codon(s) represented by the defined concept. | ||
""" | ||
|
||
SEQUENCE_LIFTOVER = "sequence_liftover" | ||
TRANSCRIPT_PROJECTION = "transcript_projection" | ||
CODON_TRANSLATION = "codon_translation" | ||
|
||
|
||
class DefiningContextConstraint(BaseModel): | ||
"""The location or location-state, congruent with other reference sequences, about | ||
which categorical variation is being described. | ||
""" | ||
|
||
type: Literal["DefiningContextConstraint"] = Field( | ||
"DefiningContextConstraint", description="MUST be 'DefiningContextConstraint'" | ||
) | ||
definingContext: Variation | Location | IRI # noqa: N815 | ||
relations: list[Relation] | None = Field( | ||
None, | ||
description="Defined relationships between members of the categorical variant and the defining context. ``sequence_liftover`` refers to variants or locations that represent a congruent concept on a differing assembly of a human genome (e.g. 'GRCh37' and 'GRCh38') or gene (e.g. Locus Reference Genomic) sequence. ``transcript_projection`` refers to variants or locations that occur on transcripts projected from the defined genomic concept. ``codon_translation`` refers to variants or locations that translate from the codon(s) represented by the defined concept.", | ||
) | ||
|
||
|
||
class CopyCountConstraint(BaseModel): | ||
"""The absolute number of copies in a system""" | ||
|
||
type: Literal["CopyCountConstraint"] = Field( | ||
"CopyCountConstraint", description="MUST be 'CopyCountConstraint'" | ||
) | ||
copies: int | Range | ||
|
||
|
||
class CopyChangeConstraint(BaseModel): | ||
"""A representation of copy number change""" | ||
|
||
type: Literal["CopyChangeConstraint"] = Field( | ||
"CopyChangeConstraint", description="MUST be 'CopyChangeConstraint'" | ||
) | ||
copyChange: Coding # noqa: N815 | ||
|
||
@field_validator("copyChange") | ||
@classmethod | ||
def validate_copy_change(cls, v: Coding) -> Coding: | ||
"""Validate copyChange property | ||
:param v: copyChange value | ||
:raises ValueError: If ``copyChange.code`` is not a valid CopyChange | ||
:return: copyChange property | ||
""" | ||
try: | ||
CopyChange(v.code.root) | ||
except ValueError as e: | ||
err_msg = f"copyChange, {v.code.root}, not one of {[cc.value for cc in CopyChange]}" | ||
raise ValueError(err_msg) from e | ||
return v | ||
|
||
|
||
class Constraint(RootModel): | ||
"""Constraints are used to construct an intensional semantics of categorical variant types.""" | ||
|
||
root: DefiningContextConstraint | CopyCountConstraint | CopyChangeConstraint = ( | ||
Field(..., discriminator="type") | ||
) | ||
|
||
|
||
class CategoricalVariant(DomainEntity): | ||
"""A representation of a categorically-defined domain for variation, in which | ||
individual contextual variation instances may be members of the domain. | ||
""" | ||
|
||
type: Literal["CategoricalVariant"] = Field( | ||
"CategoricalVariant", description="MUST be 'CategoricalVariant'" | ||
) | ||
members: list[Variation | IRI] | None = Field( | ||
None, | ||
description="A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", | ||
) | ||
constraints: list[Constraint] | None = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
"""Define Pydantic models for GA4GH categorical variation objects. | ||
See the `CatVar page <https://www.ga4gh.org/product/categorical-variation-catvar/>`_ on | ||
the GA4GH website for more information. | ||
""" | ||
|
||
from enum import Enum | ||
|
||
from ga4gh.cat_vrs.core_models import ( | ||
CategoricalVariant, | ||
Constraint, | ||
CopyChangeConstraint, | ||
CopyCountConstraint, | ||
DefiningContextConstraint, | ||
Relation, | ||
) | ||
from pydantic import BaseModel, Field, field_validator | ||
|
||
|
||
class CatVrsType(str, Enum): | ||
"""Define CatVRS types""" | ||
|
||
PROTEIN_SEQ_CONS = "ProteinSequenceConsequence" | ||
CANONICAL_ALLELE = "CanonicalAllele" | ||
CATEGORICAL_CNV = "CategoricalCnv" | ||
DESCRIBED_VAR = "DescribedVariation" | ||
NUMBER_COUNT = "NumberCount" | ||
NUMBER_CHANGE = "NumberChange" | ||
QUANTITY_VARIANCE = "QuantityVariance" | ||
|
||
|
||
class ProteinSequenceConsequenceProperties(BaseModel): | ||
"""Cat-VRS Constraints found in Protein Sequence Consequences.""" | ||
|
||
constraints: list[Constraint] = Field(..., min_length=1) | ||
|
||
@field_validator("constraints") | ||
@classmethod | ||
def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: | ||
"""Validate constraints property | ||
:param v: Constraints property to validate | ||
:raises ValueError: If none of the ``relations`` contains | ||
``Relation.CODON_TRANSLATION.value`` exactly once. | ||
:return: Constraints property | ||
""" | ||
if not any( | ||
constraint.relations.count(Relation.CODON_TRANSLATION) == 1 | ||
for constraint in v | ||
): | ||
err_msg = f"At least one `relations` in `constraints` must contain `{Relation.CODON_TRANSLATION.value}` exactly once." | ||
raise ValueError(err_msg) | ||
|
||
return v | ||
|
||
|
||
class ProteinSequenceConsequence( | ||
ProteinSequenceConsequenceProperties, CategoricalVariant | ||
): | ||
"""A change that occurs in a protein sequence as a result of genomic changes. Due to | ||
the degenerate nature of the genetic code, there are often several genomic changes | ||
that can cause a protein sequence consequence. | ||
The protein sequence consequence, like a :ref:`CanonicalAllele`, is defined by an | ||
`Allele <https://vrs.ga4gh.org/en/2.x/concepts/MolecularVariation/Allele.html#>`_ | ||
that is representative of a collection of congruent Protein Alleles that share the | ||
same altered codon(s). | ||
""" | ||
|
||
|
||
class CanonicalAlleleProperties(BaseModel): | ||
"""Cat-VRS Constraints found in Canonical Alleles.""" | ||
|
||
constraints: list[Constraint] = Field(..., min_length=1) | ||
|
||
@field_validator("constraints") | ||
@classmethod | ||
def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: | ||
"""Validate constraints property | ||
:param v: Constraints property to validate | ||
:raises ValueError: If none of the ``relations`` contains both | ||
``Relation.SEQUENCE_LIFTOVER`` and ``Relation.TRANSCRIPT_PROJECTION`` | ||
exactly once. | ||
:return: Constraints property | ||
""" | ||
if not any( | ||
( | ||
constraint.relations.count(Relation.SEQUENCE_LIFTOVER) == 1 | ||
and constraint.relations.count(Relation.TRANSCRIPT_PROJECTION) == 1 | ||
) | ||
for constraint in v | ||
): | ||
err_msg = f"At least one `relations` in `constraints` must contain {Relation.SEQUENCE_LIFTOVER} and {Relation.TRANSCRIPT_PROJECTION} exactly once." | ||
raise ValueError(err_msg) | ||
|
||
return v | ||
|
||
|
||
class CanonicalAllele(CanonicalAlleleProperties, CategoricalVariant): | ||
"""A canonical allele is defined by an | ||
`Allele <https://vrs.ga4gh.org/en/2.x/concepts/MolecularVariation/Allele.html#>`_ | ||
that is representative of a collection of congruent Alleles, each of which depict | ||
the same nucleic acid change on different underlying reference sequences. Congruent | ||
representations of an Allele often exist across different genome assemblies and | ||
associated cDNA transcript representations. | ||
""" | ||
|
||
|
||
class CategoricalCnvProperties(BaseModel): | ||
"""Cat-VRS Constraints found in CategoricalCnvs.""" | ||
|
||
constraints: list[Constraint] = Field(..., min_length=1) | ||
|
||
@field_validator("constraints") | ||
@classmethod | ||
def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: | ||
"""Validate constraints property | ||
:param v: Constraints property to validate | ||
:raises ValueError: If no ``DefiningContextConstraint`` with | ||
``Relation.SEQUENCE_LIFTOVER`` in ``relations`` is found in ``constraints`` | ||
or if neither ``CopyCountConstraint`` nor ``CopyChangeConstraint`` is found | ||
in ``constraints``. | ||
:return: Constraints property | ||
""" | ||
defining_context_found = False | ||
copy_found = False | ||
|
||
for constraint in v: | ||
if not defining_context_found: | ||
defining_context_found = ( | ||
isinstance(constraint, DefiningContextConstraint) | ||
and constraint.relations | ||
and Relation.SEQUENCE_LIFTOVER in constraint.relations | ||
) | ||
|
||
if not copy_found: | ||
copy_found = isinstance( | ||
constraint, CopyChangeConstraint | CopyCountConstraint | ||
) | ||
|
||
if not defining_context_found: | ||
err_msg = f"At least one item in `constraints` must be a `DefiningContextConstraint`` and contain ``{Relation.SEQUENCE_LIFTOVER}` in `relations`." | ||
raise ValueError(err_msg) | ||
|
||
if not copy_found: | ||
err_msg = "At least one item in `constraints` must be a `CopyCountConstraint` or a `CopyChangeConstraint`." | ||
raise ValueError(err_msg) | ||
|
||
return v | ||
|
||
|
||
class CategoricalCnv(CategoricalCnvProperties, CategoricalVariant): | ||
"""A representation of the constraints for matching knowledge about CNVs.""" |
Oops, something went wrong.