Skip to content

Commit

Permalink
feat: add cat vrs pydantic models (#4)
Browse files Browse the repository at this point in the history
close #2
  • Loading branch information
korikuzma authored Oct 31, 2024
1 parent abbf791 commit 6b35c8e
Show file tree
Hide file tree
Showing 7 changed files with 390 additions and 1 deletion.
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "submodules/cat_vrs"]
path = submodules/cat_vrs
url = https://github.com/ga4gh/cat-vrs
branch = 1.x
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ keywords = [
]
requires-python = ">=3.10"
dynamic = ["version"]
dependencies = []
dependencies = [
"ga4gh.vrs~=2.0.0a12",
"pydantic==2.*",
]

[project.optional-dependencies]
dev = [
Expand Down Expand Up @@ -134,6 +137,7 @@ ignore = [
# S101 - assert
# B011 - assert-false
"tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"]
"src/ga4gh/cat_vrs/*models.py" = ["ANN102"]

[tool.setuptools.packages.find]
where = ["src"]
Expand Down
4 changes: 4 additions & 0 deletions src/ga4gh/cat_vrs/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
"""Package for Cat-VRS Python implementation"""

from . import profile_models as cat_vrs_models

__all__ = ["cat_vrs_models"]
99 changes: 99 additions & 0 deletions src/ga4gh/cat_vrs/core_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Define Pydantic models for GA4GH categorical variation objects.
See the `CatVar page <https://www.ga4gh.org/product/categorical-variation-catvar/>`_ on
the GA4GH website for more information.
"""

from enum import Enum
from typing import Literal

from ga4gh.core.entity_models import IRI, Coding, DomainEntity
from ga4gh.vrs.models import CopyChange, Location, Range, Variation
from pydantic import BaseModel, Field, RootModel, field_validator


class Relation(str, Enum):
"""Defined relationships between members of the categorical variant and the defining
context. ``sequence_liftover`` refers to variants or locations that represent a
congruent concept on a differing assembly of a human genome (e.g. "GRCh37" and
"GRCh38") or gene (e.g. Locus Reference Genomic) sequence. ``transcript_projection``
refers to variants or locations that occur on transcripts projected from the defined
genomic concept. ``codon_translation`` refers to variants or locations that
translate from the codon(s) represented by the defined concept.
"""

SEQUENCE_LIFTOVER = "sequence_liftover"
TRANSCRIPT_PROJECTION = "transcript_projection"
CODON_TRANSLATION = "codon_translation"


class DefiningContextConstraint(BaseModel):
"""The location or location-state, congruent with other reference sequences, about
which categorical variation is being described.
"""

type: Literal["DefiningContextConstraint"] = Field(
"DefiningContextConstraint", description="MUST be 'DefiningContextConstraint'"
)
definingContext: Variation | Location | IRI # noqa: N815
relations: list[Relation] | None = Field(
None,
description="Defined relationships between members of the categorical variant and the defining context. ``sequence_liftover`` refers to variants or locations that represent a congruent concept on a differing assembly of a human genome (e.g. 'GRCh37' and 'GRCh38') or gene (e.g. Locus Reference Genomic) sequence. ``transcript_projection`` refers to variants or locations that occur on transcripts projected from the defined genomic concept. ``codon_translation`` refers to variants or locations that translate from the codon(s) represented by the defined concept.",
)


class CopyCountConstraint(BaseModel):
"""The absolute number of copies in a system"""

type: Literal["CopyCountConstraint"] = Field(
"CopyCountConstraint", description="MUST be 'CopyCountConstraint'"
)
copies: int | Range


class CopyChangeConstraint(BaseModel):
"""A representation of copy number change"""

type: Literal["CopyChangeConstraint"] = Field(
"CopyChangeConstraint", description="MUST be 'CopyChangeConstraint'"
)
copyChange: Coding # noqa: N815

@field_validator("copyChange")
@classmethod
def validate_copy_change(cls, v: Coding) -> Coding:
"""Validate copyChange property
:param v: copyChange value
:raises ValueError: If ``copyChange.code`` is not a valid CopyChange
:return: copyChange property
"""
try:
CopyChange(v.code.root)
except ValueError as e:
err_msg = f"copyChange, {v.code.root}, not one of {[cc.value for cc in CopyChange]}"
raise ValueError(err_msg) from e
return v


class Constraint(RootModel):
"""Constraints are used to construct an intensional semantics of categorical variant types."""

root: DefiningContextConstraint | CopyCountConstraint | CopyChangeConstraint = (
Field(..., discriminator="type")
)


class CategoricalVariant(DomainEntity):
"""A representation of a categorically-defined domain for variation, in which
individual contextual variation instances may be members of the domain.
"""

type: Literal["CategoricalVariant"] = Field(
"CategoricalVariant", description="MUST be 'CategoricalVariant'"
)
members: list[Variation | IRI] | None = Field(
None,
description="A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.",
)
constraints: list[Constraint] | None = None
154 changes: 154 additions & 0 deletions src/ga4gh/cat_vrs/profile_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""Define Pydantic models for GA4GH categorical variation objects.
See the `CatVar page <https://www.ga4gh.org/product/categorical-variation-catvar/>`_ on
the GA4GH website for more information.
"""

from enum import Enum

from ga4gh.cat_vrs.core_models import (
CategoricalVariant,
Constraint,
CopyChangeConstraint,
CopyCountConstraint,
DefiningContextConstraint,
Relation,
)
from pydantic import BaseModel, Field, field_validator


class CatVrsType(str, Enum):
"""Define CatVRS types"""

PROTEIN_SEQ_CONS = "ProteinSequenceConsequence"
CANONICAL_ALLELE = "CanonicalAllele"
CATEGORICAL_CNV = "CategoricalCnv"
DESCRIBED_VAR = "DescribedVariation"
NUMBER_COUNT = "NumberCount"
NUMBER_CHANGE = "NumberChange"
QUANTITY_VARIANCE = "QuantityVariance"


class ProteinSequenceConsequenceProperties(BaseModel):
"""Cat-VRS Constraints found in Protein Sequence Consequences."""

constraints: list[Constraint] = Field(..., min_length=1)

@field_validator("constraints")
@classmethod
def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]:
"""Validate constraints property
:param v: Constraints property to validate
:raises ValueError: If none of the ``relations`` contains
``Relation.CODON_TRANSLATION.value`` exactly once.
:return: Constraints property
"""
if not any(
constraint.relations.count(Relation.CODON_TRANSLATION) == 1
for constraint in v
):
err_msg = f"At least one `relations` in `constraints` must contain `{Relation.CODON_TRANSLATION.value}` exactly once."
raise ValueError(err_msg)

return v


class ProteinSequenceConsequence(
ProteinSequenceConsequenceProperties, CategoricalVariant
):
"""A change that occurs in a protein sequence as a result of genomic changes. Due to
the degenerate nature of the genetic code, there are often several genomic changes
that can cause a protein sequence consequence.
The protein sequence consequence, like a :ref:`CanonicalAllele`, is defined by an
`Allele <https://vrs.ga4gh.org/en/2.x/concepts/MolecularVariation/Allele.html#>`_
that is representative of a collection of congruent Protein Alleles that share the
same altered codon(s).
"""


class CanonicalAlleleProperties(BaseModel):
"""Cat-VRS Constraints found in Canonical Alleles."""

constraints: list[Constraint] = Field(..., min_length=1)

@field_validator("constraints")
@classmethod
def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]:
"""Validate constraints property
:param v: Constraints property to validate
:raises ValueError: If none of the ``relations`` contains both
``Relation.SEQUENCE_LIFTOVER`` and ``Relation.TRANSCRIPT_PROJECTION``
exactly once.
:return: Constraints property
"""
if not any(
(
constraint.relations.count(Relation.SEQUENCE_LIFTOVER) == 1
and constraint.relations.count(Relation.TRANSCRIPT_PROJECTION) == 1
)
for constraint in v
):
err_msg = f"At least one `relations` in `constraints` must contain {Relation.SEQUENCE_LIFTOVER} and {Relation.TRANSCRIPT_PROJECTION} exactly once."
raise ValueError(err_msg)

return v


class CanonicalAllele(CanonicalAlleleProperties, CategoricalVariant):
"""A canonical allele is defined by an
`Allele <https://vrs.ga4gh.org/en/2.x/concepts/MolecularVariation/Allele.html#>`_
that is representative of a collection of congruent Alleles, each of which depict
the same nucleic acid change on different underlying reference sequences. Congruent
representations of an Allele often exist across different genome assemblies and
associated cDNA transcript representations.
"""


class CategoricalCnvProperties(BaseModel):
"""Cat-VRS Constraints found in CategoricalCnvs."""

constraints: list[Constraint] = Field(..., min_length=1)

@field_validator("constraints")
@classmethod
def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]:
"""Validate constraints property
:param v: Constraints property to validate
:raises ValueError: If no ``DefiningContextConstraint`` with
``Relation.SEQUENCE_LIFTOVER`` in ``relations`` is found in ``constraints``
or if neither ``CopyCountConstraint`` nor ``CopyChangeConstraint`` is found
in ``constraints``.
:return: Constraints property
"""
defining_context_found = False
copy_found = False

for constraint in v:
if not defining_context_found:
defining_context_found = (
isinstance(constraint, DefiningContextConstraint)
and constraint.relations
and Relation.SEQUENCE_LIFTOVER in constraint.relations
)

if not copy_found:
copy_found = isinstance(
constraint, CopyChangeConstraint | CopyCountConstraint
)

if not defining_context_found:
err_msg = f"At least one item in `constraints` must be a `DefiningContextConstraint`` and contain ``{Relation.SEQUENCE_LIFTOVER}` in `relations`."
raise ValueError(err_msg)

if not copy_found:
err_msg = "At least one item in `constraints` must be a `CopyCountConstraint` or a `CopyChangeConstraint`."
raise ValueError(err_msg)

return v


class CategoricalCnv(CategoricalCnvProperties, CategoricalVariant):
"""A representation of the constraints for matching knowledge about CNVs."""
1 change: 1 addition & 0 deletions submodules/cat_vrs
Submodule cat_vrs added at 1458c8
Loading

0 comments on commit 6b35c8e

Please sign in to comment.