From 130178dd3b6c654d650fbec1231acff6e048220d Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 22 Oct 2024 19:48:30 -0400 Subject: [PATCH] wip: clean up + rst inheritance --- examples/Allele.json | 16 +++----- examples/Allele.rst | 5 ++- examples/pydantic_to_json_schema.py | 41 +++++-------------- examples/pydantic_to_rst.py | 28 ++++++++----- examples/utils.py | 60 +++++++++++++++++++++++++++ src/ga4gh/core/entity_models.py | 63 +++++++++++++++-------------- src/ga4gh/vrs/models.py | 53 ++++++++++++++---------- 7 files changed, 158 insertions(+), 108 deletions(-) diff --git a/examples/Allele.json b/examples/Allele.json index 9e5741aa..2ecad280 100644 --- a/examples/Allele.json +++ b/examples/Allele.json @@ -55,23 +55,17 @@ "$ref": "/ga4gh/schema/gks-common/1.x/core-im/json/IRI" }, { - "$ref": "/ga4gh/schema/vrs/2.x/json/SequenceLocation" + "$ref": "/ga4gh/schema/vrs/2.x/json/Location" } ] }, "state": { - "description": "An expression of the sequence state", - "oneOf": [ - { - "$ref": "/ga4gh/schema/vrs/2.x/json/LiteralSequenceExpression" - }, + "allOf": [ { - "$ref": "/ga4gh/schema/vrs/2.x/json/ReferenceLengthExpression" - }, - { - "$ref": "/ga4gh/schema/vrs/2.x/json/LengthExpression" + "$ref": "/ga4gh/schema/vrs/2.x/json/SequenceExpression" } - ] + ], + "description": "An expression of the sequence state" } }, "required": [ diff --git a/examples/Allele.rst b/examples/Allele.rst index 9f62b520..95974ba0 100644 --- a/examples/Allele.rst +++ b/examples/Allele.rst @@ -4,6 +4,7 @@ The state of a molecule at a :ref:`Location`. **Information Model** +Some Allele attributes are inherited from :ref:`Variation`. .. list-table:: :class: clean-wrap @@ -48,10 +49,10 @@ The state of a molecule at a :ref:`Location`. - 0..m - None * - location - - :ref:`IRI` | :ref:`SequenceLocation` + - :ref:`IRI` | :ref:`Location` - 0..1 - The location of the Allele * - state - - :ref:`LiteralSequenceExpression` | :ref:`ReferenceLengthExpression` | :ref:`LengthExpression` + - :ref:`SequenceExpression` - 0..1 - An expression of the sequence state \ No newline at end of file diff --git a/examples/pydantic_to_json_schema.py b/examples/pydantic_to_json_schema.py index f63595b8..4c739b01 100644 --- a/examples/pydantic_to_json_schema.py +++ b/examples/pydantic_to_json_schema.py @@ -1,8 +1,7 @@ from enum import Enum -from typing import List, Literal, Optional import json -from pydantic import Field, BaseModel, RootModel +from pydantic import BaseModel, RootModel from pydantic.json_schema import GenerateJsonSchema from ga4gh.core import entity_models, domain_models @@ -17,9 +16,9 @@ } -def create_model_module_map(*modules) -> dict[str, str]: - """Creates a mapping from model names to their modules.""" - model_module_map = {} +def map_model_to_ref(*modules) -> dict[str, str]: + """Creates a mapping from model names to their JSON schema references.""" + model_to_module = {} for module in modules: for attr_name in dir(module): model = getattr(module, attr_name) @@ -28,33 +27,11 @@ def create_model_module_map(*modules) -> dict[str, str]: and issubclass(model, (BaseModel, RootModel, Enum)) and model.__module__ == module.__name__ ): - model_module_map[attr_name] = MODULE_TO_REF[model.__module__] - return model_module_map + model_to_module[attr_name] = MODULE_TO_REF[model.__module__] + return model_to_module -MODEL_REF_MAP = create_model_module_map(domain_models, entity_models, models) - - -class Allele(models.Allele, extra="forbid"): - """The state of a molecule at a :ref:`Location`.""" - - class Config: - @staticmethod - def json_schema_extra(cls): - for prop in {"location", "state"}: - cls["properties"][prop]["oneOf"] = cls["properties"][prop]["anyOf"] - del cls["properties"][prop]["anyOf"] - - expressions: Optional[List[models.Expression]] = Field(None, ordered=False) - maturity: Literal["draft"] = Field("draft", frozen=True) - alternativeLabels: Optional[List[str]] = Field( - None, description="Alternative name(s) for the Entity.", ordered=False - ) - extensions: Optional[List[entity_models.Extension]] = Field( - None, - description="A list of extensions to the Entity, that allow for capture of information not directly supported by elements defined in the model.", - ordered=False, - ) +MODEL_TO_REF = map_model_to_ref(domain_models, entity_models, models) class GksGenerateJsonSchema(GenerateJsonSchema): @@ -81,7 +58,7 @@ def traverse_and_modify(self, schema): if "$ref" in schema: class_name = schema["$ref"].split("/")[-1] - schema["$ref"] = f"{MODEL_REF_MAP[class_name]}/{class_name}" + schema["$ref"] = f"{MODEL_TO_REF[class_name]}/{class_name}" if "description" in schema and isinstance(schema["description"], str): schema["description"] = scrub_rst_markup(schema["description"]) @@ -134,7 +111,7 @@ def generate(self, schema, mode="validation"): if __name__ == "__main__": with open("examples/Allele.json", "w") as wf: json.dump( - Allele.model_json_schema(schema_generator=GksGenerateJsonSchema), + models.Allele.model_json_schema(schema_generator=GksGenerateJsonSchema), wf, indent=2, ) diff --git a/examples/pydantic_to_rst.py b/examples/pydantic_to_rst.py index 18d86e90..fe6e2c7c 100644 --- a/examples/pydantic_to_rst.py +++ b/examples/pydantic_to_rst.py @@ -5,8 +5,9 @@ from typing import Annotated, Any, List from typing import get_args, get_origin -from utils import EXCLUDE_PROPS -from pydantic_to_json_schema import Allele +from utils import EXCLUDE_PROPS, INSTANCE_TO_ABC + +from ga4gh.vrs import models PYTHON_TO_JSON_TYPES = { @@ -61,8 +62,11 @@ def get_limits( def generate(model: BaseModel) -> str: - # TODO: Inheritance - inheritance = "" + model_name = model.__name__ + if model_name in INSTANCE_TO_ABC: + inheritance = f"Some {model_name} attributes are inherited from :ref:`{INSTANCE_TO_ABC[model_name]}`.\n" + else: + inheritance = "" rst_data = [ "**Computational Definition**", @@ -95,11 +99,15 @@ def generate(model: BaseModel) -> str: field_is_list = False field_type = f":ref:`{field_info.annotation.__name__}`" else: - field_annotation = tuple( - anno - for anno in get_args(field_info.annotation) - if anno is not type(None) - ) + field_anno_args = get_args(field_info.annotation) + if field_anno_args: + field_annotation = tuple( + anno + for anno in get_args(field_info.annotation) + if anno is not type(None) + ) + else: + field_annotation = [field_info.annotation] field_is_list = get_origin(field_annotation[0]) in {list, List} @@ -131,4 +139,4 @@ def generate(model: BaseModel) -> str: if __name__ == "__main__": with open("examples/Allele.rst", "w") as wf: - wf.write(generate(Allele)) + wf.write(generate(models.Allele)) diff --git a/examples/utils.py b/examples/utils.py index 77836eb4..11e36b6a 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -1,4 +1,10 @@ import re +from typing import get_args + +from pydantic import RootModel + +from ga4gh.core import entity_models, domain_models +from ga4gh.vrs import models REF_RE = re.compile(r":ref:`(.*?)(\s?<.*>)?`") @@ -11,3 +17,57 @@ def scrub_rst_markup(string): string = LINK_RE.sub(r"[\g<1>](\g<2>)", string) string = string.replace("\n", " ") return string + + +def map_abc_to_instances(*modules) -> dict[str, str]: + """Creates a mapping from ABC model names to their instance model names.""" + abc_to_instances = {} + excluded_types = { + "list", + "str", + "Optional", + "int", + "float", + "dict", + "bool", + "set", + "tuple", + } + + for module in modules: + for attr_name in dir(module): + model = getattr(module, attr_name) + if ( + isinstance(model, type) + and issubclass(model, RootModel) + and model.__module__ == module.__name__ + ): + root_anno = model.model_fields["root"].annotation + root_annos = get_args(root_anno) or (root_anno,) + root_anno_cls_names = [ + cls.__name__ + for cls in root_annos + if cls.__name__ not in excluded_types + ] + if root_anno_cls_names: + abc_to_instances[model.__name__] = root_anno_cls_names + + return abc_to_instances + + +def get_abc(key) -> str: + """Get original ABC class name + + :param key: Class name + :return: Original ABC class name + """ + while key in INSTANCE_TO_ABC: + key = INSTANCE_TO_ABC[key] + return key + + +ABC_TO_INSTANCES = map_abc_to_instances(models, entity_models, domain_models) +INSTANCE_TO_ABC = { + value: key for key, value_list in ABC_TO_INSTANCES.items() for value in value_list +} +INSTANCE_TO_ABC = {key: get_abc(key) for key in INSTANCE_TO_ABC} diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index 406c2d65..1b36a98f 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -1,6 +1,7 @@ """GKS Common Library Data Type and Entity models""" from __future__ import annotations +from abc import ABC import datetime import logging from typing import Any, Dict, Annotated, Literal, Optional, Union, List @@ -149,7 +150,7 @@ class Extension(BaseModel): ######################################### -class Entity(BaseModel): +class Entity(BaseModel, ABC): """Anything that exists, has existed, or will exist. Abstract base class to be extended by other classes. Do NOT instantiate directly. @@ -168,11 +169,11 @@ class Entity(BaseModel): None, description='A free-text description of the Entity.' ) - alternativeLabels: Optional[List[str]] = Field(None, description="Alternative name(s) for the Entity.") - extensions: Optional[List[Extension]] = Field(None, description="A list of extensions to the Entity, that allow for capture of information not directly supported by elements defined in the model.") + alternativeLabels: Optional[List[str]] = Field(None, description="Alternative name(s) for the Entity.", ordered=False) + extensions: Optional[List[Extension]] = Field(None, description="A list of extensions to the Entity, that allow for capture of information not directly supported by elements defined in the model.", ordered=False) -class DomainEntity(Entity): +class DomainEntity(Entity, ABC): """An Entity that is specific to a particular biomedical domain such as disease, therapeutics, or genes. Domain Entities are considered as 'concept-level' entities, as opposed to particular instances. e.g. 'Lung Cancer', not 'patient123's lung @@ -182,7 +183,7 @@ class DomainEntity(Entity): Abstract base class to be extended by other classes. Do NOT instantiate directly. """ - mappings: Optional[List[ConceptMapping]] = Field(None, description="A list of mappings to concepts in terminologies or code systems. Each mapping should include a coding and a relation.") + mappings: Optional[List[ConceptMapping]] = Field(None, description="A list of mappings to concepts in terminologies or code systems. Each mapping should include a coding and a relation.", ordered=False) class Agent(Entity): @@ -191,12 +192,12 @@ class Agent(Entity): or for another agent's activity. """ - type: Literal["Agent"] = Field(CoreImType.AGENT.value, description=f"MUST be '{CoreImType.AGENT.value}'.") + type: Literal["Agent"] = Field(CoreImType.AGENT.value, description=f"MUST be '{CoreImType.AGENT.value}'.", extends="type") name: Optional[str] = Field(None, description="The given name of the Agent.") subtype: Optional[AgentSubtype] = Field(None, description="A specific type of agent the Agent object represents. Must be one of {person, organization, software}.") -class ActivityBase(Entity): +class ActivityBase(Entity, ABC): """Internal base class that holds shared fields for Activity model. Abstract base class to be extended by other classes. Do NOT instantiate directly. @@ -227,7 +228,7 @@ class Activity(ActivityBase): time. Activities may use, generate, modify, move, or destroy one or more entities. """ - performedBy: Optional[List[Agent] ]= Field(None, description="An Agent who contributed to executing the Activity.") + performedBy: Optional[List[Agent] ]= Field(None, description="An Agent who contributed to executing the Activity.", ordered=False) class Contribution(ActivityBase): @@ -236,21 +237,21 @@ class Contribution(ActivityBase): DataSet, Publication, etc.) """ - type: Literal["Contribution"] = Field(CoreImType.CONTRIBUTION.value, description=f"MUST be '{CoreImType.CONTRIBUTION.value}'.") - contributor: Optional[List[Agent]] = Field(None, description="The agent that made the contribution.", min_length=1, max_length=1) + type: Literal["Contribution"] = Field(CoreImType.CONTRIBUTION.value, description=f"MUST be '{CoreImType.CONTRIBUTION.value}'.", extends="type") + contributor: Optional[List[Agent]] = Field(None, description="The agent that made the contribution.", min_length=1, max_length=1, extends="performedBy") activityType: Optional[Coding] = Field(None, description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc. . Values of this property may be framed as activities or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).") -class InformationEntityBase(Entity): +class InformationEntityBase(Entity, ABC): """Internal base class that holds shared fields for InformationEntity model. Abstract base class to be extended by other classes. Do NOT instantiate directly. """ - type: Literal["InformationEntity"] = Field(CoreImType.INFORMATION_ENTITY.value, description=f"MUST be {CoreImType.INFORMATION_ENTITY.value}.") - specifiedBy: Optional[Union[Method, IRI]] = Field(None, description="A specification that describes all or part of the process that led to creation of the Information Entity") - contributions: Optional[List[Contribution] ]= Field(None, description="Specific actions taken by an Agent toward the creation, modification, validation, or deprecation of an Information Entity.") - reportedIn: Optional[List[Union[Document, IRI]]] = Field(None, description="A document in which the the Information Entity is reported.") + type: Literal["InformationEntity"] = Field(CoreImType.INFORMATION_ENTITY.value, description=f"MUST be {CoreImType.INFORMATION_ENTITY.value}.", extends="type") + specifiedBy: Optional[Union[Method, IRI]] = Field(None, description="A specification that describes all or part of the process that led to creation of the Information Entity", ordered=False) + contributions: Optional[List[Contribution] ]= Field(None, description="Specific actions taken by an Agent toward the creation, modification, validation, or deprecation of an Information Entity.", ordered=False) + reportedIn: Optional[List[Union[Document, IRI]]] = Field(None, description="A document in which the the Information Entity is reported.", ordered=False) dateAuthored: Optional[str] = Field(None, description="Indicates when the information content expressed in the Information Entity was generated.") recordMetadata: Optional[RecordMetadata] = Field(None, description="Provenance metadata about a specific concrete record of information as encoded/serialized in a particular data set or object (as opposed to provenance about the abstract information content the encoding carries).") @@ -261,7 +262,7 @@ class InformationEntity(InformationEntityBase): artifacts like books, web pages, data tables, or photographs. """ - derivedFrom: Optional[List[InformationEntity]] = Field(None, description="Another Information Entity from which this Information Entity is derived, in whole or in part.") + derivedFrom: Optional[List[InformationEntity]] = Field(None, description="Another Information Entity from which this Information Entity is derived, in whole or in part.", ordered=False) class Document(InformationEntity): @@ -269,13 +270,13 @@ class Document(InformationEntity): form, intended to be read and understood together as a whole. """ - type: Literal["Document"] = Field(CoreImType.DOCUMENT.value, description=f"Must be '{CoreImType.DOCUMENT.value}'") + type: Literal["Document"] = Field(CoreImType.DOCUMENT.value, description=f"Must be '{CoreImType.DOCUMENT.value}'", extends="type") subtype: Optional[Coding] = Field( None, description="A specific type of document that a Document instance represents (e.g. 'publication', 'patent', 'pathology report')" ) title: Optional[str] = Field(None, description="The official title given to the document by its authors.") urls: Optional[List[Annotated[str, StringConstraints(pattern=r"^(https?|s?ftp)://")]]] = Field( - None, description="One or more URLs from which the content of the Document can be retrieved." + None, description="One or more URLs from which the content of the Document can be retrieved.", ordered=False ) doi: Optional[Annotated[str, StringConstraints(pattern=r"^10\.(\d+)(\.\d+)*\/[\w\-\.]+")]] = Field( None, @@ -290,7 +291,7 @@ class Document(InformationEntity): class Method(InformationEntity): """A set of instructions that specify how to achieve some objective.""" - type: Literal["Method"] = Field(CoreImType.METHOD.value, description=f"MUST be '{CoreImType.METHOD.value}'.") + type: Literal["Method"] = Field(CoreImType.METHOD.value, description=f"MUST be '{CoreImType.METHOD.value}'.", extends="type") subtype: Optional[Coding] = Field( None, description="A specific type of method that a Method instance represents (e.g. 'Variant Interpretation Guideline', or 'Experimental Protocol').", @@ -307,9 +308,9 @@ class RecordMetadata(BaseModel): recordIdentifier: Optional[str] = Field(None, description="The identifier of the data record or object described in this RecordMetadata object.") recordVersion: Optional[str] = Field(None, description="The version number of the record-level artifact the object describes.") - derivedFrom: Optional[str] = Field(None, description="Another data record from which the record described here was derived, through a data ingest and/or transformation process. Value should be a string representing the identifier of the source record.") + derivedFrom: Optional[str] = Field(None, description="Another data record from which the record described here was derived, through a data ingest and/or transformation process. Value should be a string representing the identifier of the source record.", ordered=False) dateRecordCreated: Optional[str] = Field(None, description="The date the record was initially created.") - contributions: Optional[List[Contribution]] = Field(None, description="Describes specific contributions made by an human or software agent to the creation, modification, or administrative management of a data record or object.") + contributions: Optional[List[Contribution]] = Field(None, description="Describes specific contributions made by an human or software agent to the creation, modification, or administrative management of a data record or object.", ordered=False) class DataSet(InformationEntity): @@ -317,7 +318,7 @@ class DataSet(InformationEntity): common format or structure, to enable their computational manipulation as a unit. """ - type: Literal["DataSet"] = Field(CoreImType.DATA_SET.value, description=f"MUST be '{CoreImType.DATA_SET.value}'.") + type: Literal["DataSet"] = Field(CoreImType.DATA_SET.value, description=f"MUST be '{CoreImType.DATA_SET.value}'.", extends="type") subtype: Optional[Coding] = Field(None, description="A specific type of data set the DataSet instance represents (e.g. a 'clinical data set', a 'sequencing data set', a 'gene expression data set', a 'genome annotation data set')") releaseDate: Optional[str] = Field(None, description="Indicates when a version of a Data Set was formally released.") version: Optional[str] = Field(None, description="The version of the Data Set, as assigned by its creator.") @@ -331,14 +332,14 @@ class EvidenceLine(InformationEntity): the target proposition. """ - type: Literal["EvidenceLine"] = Field(CoreImType.EVIDENCE_LINE.value, description=f"Must be '{CoreImType.EVIDENCE_LINE.value}'") - hasEvidenceItems: Optional[List[InformationEntity]] = Field(None, description="An individual piece of information that was evaluated as evidence in building the argument represented by an Evidence Line.") + type: Literal["EvidenceLine"] = Field(CoreImType.EVIDENCE_LINE.value, description=f"Must be '{CoreImType.EVIDENCE_LINE.value}'", extends="type") + hasEvidenceItems: Optional[List[InformationEntity]] = Field(None, description="An individual piece of information that was evaluated as evidence in building the argument represented by an Evidence Line.", ordered=False) directionOfEvidenceProvided: Optional[Direction] = Field(None, description="The direction of support that the Evidence Line is determined to provide toward its target Proposition (supports, disputes, neutral)") strengthOfEvidenceProvided: Optional[Union[Coding, IRI]] = Field(None, description="The strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") scoreOfEvidenceProvided: Optional[float] = Field(None, description="A quantitative score indicating the strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") -class StatementBase(InformationEntity): +class StatementBase(InformationEntity, ABC): """Internal base class that holds shared fields for Statement model. Abstract base class to be extended by other classes. Do NOT instantiate directly. @@ -350,7 +351,7 @@ class StatementBase(InformationEntity): score: Optional[float] = Field(None, description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it.") statementText: Optional[str] = Field(None, description="A natural-language expression of what a Statement asserts to be true.") classification: Optional[Union[Coding, IRI]] = Field(None, description="A single term or phrase summarizing the outcome of direction and strength assessments of a Statement's proposition, in terms of a classification of its subject.") - hasEvidenceLines: Optional[List[EvidenceLine]] = Field(None, description="An evidence-based argument that supports or disputes the validity of the proposition that a Statement assesses or puts forth as true. The strength and direction of this argument (whether it supports or disputes the proposition, and how strongly) is based on an interpretation of one or more pieces of information as evidence (i.e. 'Evidence Items).") + hasEvidenceLines: Optional[List[EvidenceLine]] = Field(None, description="An evidence-based argument that supports or disputes the validity of the proposition that a Statement assesses or puts forth as true. The strength and direction of this argument (whether it supports or disputes the proposition, and how strongly) is based on an interpretation of one or more pieces of information as evidence (i.e. 'Evidence Items).", ordered=False) class Statement(StatementBase): @@ -373,8 +374,8 @@ class StudyGroup(Entity): type: Literal["StudyGroup"] = Field(CoreImType.STUDY_GROUP.value, description=f'Must be "{CoreImType.STUDY_GROUP.value}"') memberCount: Optional[int] = Field(None, description="The total number of individual members in the StudyGroup.") - isSubsetOf: Optional[List[StudyGroup] ]= Field(None, description="A larger StudyGroup of which this StudyGroup represents a subset.") - characteristics: Optional[List[Characteristic]] = Field(None, description="A feature or role shared by all members of the StudyGroup, representing a criterion for membership in the group.") + isSubsetOf: Optional[List[StudyGroup] ]= Field(None, description="A larger StudyGroup of which this StudyGroup represents a subset.", ordered=False) + characteristics: Optional[List[Characteristic]] = Field(None, description="A feature or role shared by all members of the StudyGroup, representing a criterion for membership in the group.", ordered=False) class Characteristic(BaseModel): @@ -387,7 +388,7 @@ class Characteristic(BaseModel): valueOperator: Optional[bool] = Field(None, description="An operation that defines how to logically interpret a set of more than one Characteristic values ('AND', 'OR', 'NOT')") -class StudyResultBase(InformationEntityBase): +class StudyResultBase(InformationEntityBase, ABC): """Internal base class that holds shared fields for StudyResult model. Abstract base class to be extended by other classes. Do NOT instantiate directly. @@ -398,7 +399,7 @@ class StudyResultBase(InformationEntityBase): qualityMeasures: Optional[Dict] = None -class StudyResult(InformationEntityBase): +class StudyResult(InformationEntityBase, ABC): """A collection of data items from a single study that pertain to a particular subject or experimental unit in the study, along with optional provenance information describing how these data items were generated. @@ -408,7 +409,7 @@ class StudyResult(InformationEntityBase): focus: Optional[Union[DomainEntity, Coding, IRI]] = Field(None, description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.") sourceDataSet: Optional[List[DataSet]] = Field(None, description="A larger DataSet from which the content of the StudyResult was derived.", max_length=1) - componentResult: Optional[List[StudyResult]] = Field(None, description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .") + componentResult: Optional[List[StudyResult]] = Field(None, description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .", ordered=False) studyGroup: Optional[StudyGroup] = Field(None, description="A description of a specific group or population of subjects interrogated in the ResearchStudy that produced the data captured in the StudyResult.") ancillaryResults: Optional[Dict] = None qualityMeasures: Optional[Dict] = None diff --git a/src/ga4gh/vrs/models.py b/src/ga4gh/vrs/models.py index 8e924917..88de678f 100644 --- a/src/ga4gh/vrs/models.py +++ b/src/ga4gh/vrs/models.py @@ -10,6 +10,9 @@ * `import ga4gh.vrs`, and refer to models using the fully-qualified module name, e.g., `ga4gh.vrs.models.Allele` """ +from __future__ import annotations + +from abc import ABC from typing import List, Literal, Optional, Union, Dict, Annotated from collections import OrderedDict from enum import Enum @@ -211,7 +214,7 @@ def _recurse_ga4gh_serialize(obj): return obj -class _ValueObject(Entity): +class _ValueObject(Entity, ABC): """A contextual value whose equality is based on value, not identity. See https://en.wikipedia.org/wiki/Value_object for more on Value Objects. @@ -236,7 +239,7 @@ def is_ga4gh_identifiable(): return False -class Ga4ghIdentifiableObject(_ValueObject): +class Ga4ghIdentifiableObject(_ValueObject, ABC): """A contextual value object for which a GA4GH computed identifier can be created. All GA4GH Identifiable Objects may have computed digests from the VRS Computed Identifier algorithm. @@ -554,27 +557,34 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): ######################################### -class _VariationBase(Ga4ghIdentifiableObject): +class _VariationBase(Ga4ghIdentifiableObject, ABC): """Base class for variation Abstract base class to be extended by other classes. Do NOT instantiate directly. """ - expressions: Optional[List[Expression]] = None + expressions: Optional[List[Expression]] = Field(None, ordered=False) ######################################### # vrs molecular variation ######################################### -class Allele(_VariationBase): - """The state of a molecule at a `Location`.""" +class Allele(_VariationBase, extra="forbid"): + """The state of a molecule at a :ref:`Location`.""" + + class Config: + @staticmethod + def json_schema_extra(cls): + cls["properties"]["location"]["oneOf"] = cls["properties"]["location"]["anyOf"] + del cls["properties"]["location"]["anyOf"] + maturity: Literal["draft"] = Field("draft", frozen=True) type: Literal["Allele"] = Field(VrsType.ALLELE.value, description=f'MUST be "{VrsType.ALLELE.value}"') - location: Union[IRI, SequenceLocation] = Field( + location: Union[IRI, Location] = Field( ..., description='The location of the Allele' ) - state: Union[LiteralSequenceExpression, ReferenceLengthExpression, LengthExpression] = Field( + state: SequenceExpression = Field( ..., description='An expression of the sequence state' ) @@ -735,7 +745,7 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): ######################################### -class _CopyNumber(_VariationBase): +class CopyNumber(_VariationBase, ABC): """A measure of the copies of a `Location` within a system (e.g. genome, cell, etc.) Abstract base class to be extended by other classes. Do NOT instantiate directly. @@ -747,7 +757,7 @@ class _CopyNumber(_VariationBase): ) -class CopyNumberCount(_CopyNumber): +class CopyNumberCount(CopyNumber): """The absolute count of discrete copies of a `Location` or `Gene`, within a system (e.g. genome, cell, etc.). """ @@ -766,7 +776,7 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): ] -class CopyNumberChange(_CopyNumber): +class CopyNumberChange(CopyNumber): """An assessment of the copy number of a `Location` or a `Gene` within a system (e.g. genome, cell, etc.) relative to a baseline ploidy. """ @@ -826,32 +836,31 @@ class Location(RootModel): ) -class Variation(RootModel): - """A representation of the state of one or more biomolecules.""" +class SystemicVariation(RootModel): + """A Variation of multiple molecules in the context of a system, e.g. a genome, + sample, or homologous chromosomes. + """ - root: Union[Allele, CisPhasedBlock, Adjacency, Terminus, DerivativeMolecule, CopyNumberChange, CopyNumberCount] = Field( + root: Union[CopyNumberChange, CopyNumberCount] = Field( ..., json_schema_extra={ - 'description': 'A representation of the state of one or more biomolecules.' + 'description': 'A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.' }, discriminator='type', ) -class SystemicVariation(RootModel): - """A Variation of multiple molecules in the context of a system, e.g. a genome, - sample, or homologous chromosomes. - """ +class Variation(RootModel): + """A representation of the state of one or more biomolecules.""" - root: Union[CopyNumberChange, CopyNumberCount] = Field( + root: Union[MolecularVariation, SystemicVariation] = Field( ..., json_schema_extra={ - 'description': 'A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.' + 'description': 'A representation of the state of one or more biomolecules.' }, discriminator='type', ) - # At end so classes exist ( reffable_classes,