From 96211e7ae5e8b52bd61aef7670b37b51765c38ad Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 11 Sep 2024 14:38:36 -0400 Subject: [PATCH 1/9] feat!: update gks-common / vrs models --- src/ga4gh/core/entity_models.py | 271 ++++++++++++++++++++++++++++++-- submodules/vrs | 2 +- 2 files changed, 262 insertions(+), 11 deletions(-) diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index f40e9c9d..ed794908 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -10,16 +10,36 @@ * `import ga4gh.core`, and refer to models using the fully-qualified module name, e.g., `ga4gh.core.entity_models.Coding` """ +from __future__ import annotations + from abc import ABC -from typing import Any, Dict, Annotated, Optional, Union, List +import datetime +import logging +from typing import Any, Dict, Annotated, Literal, Optional, Union, List from enum import Enum -from pydantic import BaseModel, Field, RootModel, StringConstraints, ConfigDict +from pydantic import BaseModel, Field, RootModel, StringConstraints, ConfigDict, field_validator, model_validator from ga4gh.core import GA4GH_IR_REGEXP +class CoreImType(str, Enum): + """Define Core Information Model Types""" + + AGENT = "Agent" + CONTRIBUTION = "Contribution" + DOCUMENT = "Document" + METHOD = "Method" + DATA_ITEM = "DataItem" + DATA_SET = "DataSet" + EVIDENCE_LINE = "EvidenceLine" + INFORMATION_ENTITY = "InformationEntity" + PROPOSITION = "Proposition" + STUDY_GROUP = "StudyGroup" + + + class Relation(str, Enum): """A mapping relation between concepts as defined by the Simple Knowledge Organization System (SKOS). @@ -47,6 +67,23 @@ class Syntax(str, Enum): GNOMAD = "gnomad" SPDI = "spdi" + +class AgentSubtype(str, Enum): + """A specific type of agent the Agent object represents.""" + + PERSON = "person" + ORGANIZATION = "organization" + SOFTWARE = "software" + + +class Direction(str, Enum): + """Define constraints for direction""" + + SUPPORTS = "supports" + NEUTRAL = "neutral" + DISPUTES = "disputes" + + ######################################### # GKS Common Abstract Utility Classes # These do not inherit from Entity and are not typed explicitly @@ -105,7 +142,7 @@ class Coding(BaseModel): ..., description="The terminology/code system that defined the code. May be reported as a free-text name (e.g. 'Sequence Ontology'), but it is preferable to provide a uri/url for the system. When the 'code' is reported as a CURIE, the 'system' should be reported as the uri that the CURIE's prefix expands to (e.g. 'http://purl.obofoundry.org/so.owl/' for the Sequence Ontology)." ) - version: Optional[str] = Field( + systemVersion: Optional[str] = Field( None, description='Version of the terminology or code system that provided the code.' ) @@ -156,18 +193,19 @@ class Expression(BaseModel): # GKS Common Abstract Entity Class Definitions ######################################### +class Entity(BaseModel, ABC): + """Anything that exists, has existed, or will exist. -class Entity(ABC, BaseModel): - """Entity is the root class of the 'gks-common' core information model classes - - those that have identifiers and other general metadata like labels, xrefs, urls, - descriptions, etc. All common classes descend from and inherit its attributes. + Entity is the root class of the 'gks-common' core information model. All common + classes that have ids and other general metadata like label, description, type, or + extensions descend from this class and inherit its attributes. """ id: Optional[str] = Field( None, - description="The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." + description="The 'logical' identifier of the Entity in the system of record, e.g. a UUID. This 'id' is unique within a given system, but may or may not be globally unique outside the system. It is used within a system to reference an object from another." ) - type: str + type: str = Field(..., description="The name of the class that is instantiated by a data object representing the Entity.") label: Optional[str] = Field( None, description='A primary label for the entity.' @@ -177,7 +215,7 @@ class Entity(ABC, BaseModel): description='A free-text description of the entity.' ) alternativeLabels: Optional[List[str]] = Field(None, description="Alternative name(s) for the Entity.") - extensions: Optional[List[Extension]] = Field(None, description="A list of extensions to the entity. Extensions are not expected to be natively understood, but may be used for pre-negotiated exchange of message attributes between systems.") + extensions: Optional[List[Extension]] = Field(None, description="A list of extensions to the Entity, that allow for capture of information not directly supported by elements defined in the model.") class DomainEntity(Entity, ABC): @@ -189,3 +227,216 @@ class DomainEntity(Entity, ABC): """ mappings: Optional[List[ConceptMapping]] = Field(None, description="A list of mappings to concepts in terminologies or code systems. Each mapping should include a coding and a relation.") + + +class Agent(Entity): + """An autonomous actor (person, organization, or software agent) that bears some + form of responsibility for an activity taking place, for the existence of an entity, + or for another agent's activity. + """ + + type: Literal["Agent"] = Field(CoreImType.AGENT.value, description=f"MUST be '{CoreImType.AGENT.value}'.") + name: Optional[str] = Field(None, description="The descriptive name of the agent.") + subtype: Optional[AgentSubtype] = Field(None, description="A specific type of agent the Agent object represents.") + + +class Activity(Entity): + """An action or set of actions performed by an agent, that occurs over a period of + time. Activities may use, generate, modify, move, or destroy one or more entities. + """ + + subtype: Optional[Coding] = Field(None, description="A specific type of activity the Activity instance represents.") + date: Optional[str] = Field(None, description="The date that the Activity was completed.") + performedBy: Optional[List[Agent] ]= Field(None, description="An Agent who contributed to executing the Activity.") # noqa: N815 + specifiedBy: Optional[List[Method]] = Field(None, description="A method that was followed in performing an Activity, that describes how it was executed.") # noqa: N815 + + @field_validator("date") + @classmethod + def date_format(cls, v: Optional[str]) -> Optional[str]: + """Check that date is YYYY-MM-DD format""" + if v: + valid_format = "%Y-%m-%d" + + try: + datetime.datetime.strptime(v, valid_format).replace( + tzinfo=datetime.timezone.utc + ).strftime(valid_format) + except ValueError: + logging.warning("`date` SHOULD be formatted as a date string in ISO format 'YYYY-MM-DD'") + return v + + +class Contribution(Activity): + """An action taken by an agent in contributing to the creation, modification, + assessment, or deprecation of a particular entity (e.g. a Statement, EvidenceLine, + DataSet, Publication, etc.) + """ + + type: Literal["Contribution"] = Field(CoreImType.CONTRIBUTION.value, description=f"MUST be {CoreImType.CONTRIBUTION.value}.") + contributor: Optional[List[Agent]] = Field(None, description="The agent that made the contribution.", min_length=1, max_length=1) + activityType: Optional[Coding] = Field(None, description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc. . Values of this property may be framed as activities or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).") + + @model_validator(mode="before") + def handle_extends_prop(cls, values: dict[str, Any]) -> dict[str, Any]: + """Handle extends properties by renaming fields + + :param values: Input values to process + :return: Processed values with extended properties renamed + """ + if "performedBy" in values: + values["contributor"] = values.pop("performedBy") + return values + + +class InformationEntity(Entity, ABC): + """An abstract (non-physical) entity that is about something - representing the + underlying 'information content' conveyed by physical or digital information + artifacts like books, web pages, data tables, or photographs. + """ + + type: Literal["InformationEntity"] = Field(CoreImType.INFORMATION_ENTITY.value, description=f"MUST be {CoreImType.INFORMATION_ENTITY.value}.") + specifiedBy: Optional[Union[Method, IRI]] = Field(None, description="A specification that describes all or part of the process that led to creation of the Information Entity ") # noqa: N815 + contributions: Optional[List[Contribution] ]= Field(None, description="Specific actions taken by an Agent toward the creation, modification, validation, or deprecation of an Information Entity.") + reportedIn: Optional[List[Union[Document, IRI]]] = Field(None, description="A document in which the the Information Entity is reported.") # noqa: N815 + dateAuthored: Optional[str] = Field(None, description="Indicates when the information content expressed in the Information Entity was generated.") # noqa: N815 + derivedFrom: Optional[List[InformationEntity]] = Field(None, description="Another Information Entity from which this Information Entity is derived, in whole or in part.") # noqa: N815 + recordMetadata: Optional[RecordMetadata] = Field(None, description="Provenance metadata about a specific concrete record of information as encoded/serialized in a particular data set or object (as opposed to provenance about the abstract information content the encoding carries).") # noqa: N815 + + +class Document(InformationEntity): + """A collection of information, usually in a text-based or graphic human-readable + form, intended to be read and understood together as a whole. + """ + + type: Literal["Document"] = Field(CoreImType.DOCUMENT.value, description=f"Must be '{CoreImType.DOCUMENT.value}'.") + subtype: Optional[Coding] = Field( + None, description="A specific type of document that a Document instance represents (e.g. 'publication', 'patent', 'pathology report')" + ) + title: Optional[str] = Field(None, description="The official title given to the document by its authors.") + urls: Optional[List[Annotated[str, StringConstraints(pattern=r"^(https?|s?ftp)://")]]] = Field( + None, description="One or more URLs from which the content of the Document can be retrieved." + ) + doi: Optional[Annotated[str, StringConstraints(pattern=r"^10.(\\d+)(\\.\\d+)*\\/[\\w\\-\\.]+")]] = Field( + None, + description="A `Digital Object Identifier _` for the document.", + ) + pmid: Optional[int] = Field( + None, + description="A `PubMed unique identifier `_.", + ) + + +class Method(InformationEntity): + """A set of instructions that specify how to achieve some objective.""" + + type: Literal["Method"] = Field(CoreImType.METHOD.value, description=f"MUST be '{CoreImType.METHOD.value}'.") + subtype: Optional[Coding] = Field( + None, + description="A specific type of method that a Method instance represents (e.g. 'Variant Interpretation Guideline', or 'Experimental Protocol').", + ) + license: Optional[str] = Field(None, description="A specific license that dictates legal permissions for how a method can be used (by whom, where, for what purposes, with what additional requirements, etc.).") + + +class RecordMetadata(BaseModel): + """A reusable structure that encapsulates provenance metadata about a serialized + data record or object in a particular dataset (as opposed to provenance about the + real world entity this + record or object represents). + """ + + recordIdentifier: Optional[str] = Field(None, description="The identifier of the data record or object described in this RecordMetadata object.") # noqa: N815 + recordVersion: Optional[str] = Field(None, description="The version number of the record-level artifact the object describes.") # noqa: N815 + derivedFrom: Optional[str] = Field(None, description="Another data record from which the record described here was derived, through a data ingest and/or transformation process. Value should be a string representing the identifier of the source record.") # noqa: N815 + dateRecordCreated: Optional[str] = Field(None, description="The date the record was initially created.") # noqa: N815 + contributions: Optional[List[Contribution]] = Field(None, description="Describes specific contributions made by an human or software agent to the creation, modification, or administrative management of a data record or object.") + + +class DataSet(InformationEntity): + """A collection of related data items or records that are organized together in a + common format or structure, to enable their computational manipulation as a unit. + """ + + type: Literal["DataSet"] = Field(CoreImType.DATA_SET.value, description=f"MUST be '{CoreImType.DATA_SET.value}'") + subtype: Optional[Coding] = Field(None, description="A specific type of data set the DataSet instance represents (e.g. a 'clinical data set', a 'sequencing data set', a 'gene expression data set', a 'genome annotation data set')") + releaseDate: Optional[str] = Field(None, description="Indicates when a version of a Data Set was formally released.") # noqa: N815 + version: Optional[str] = Field(None, description="The version of the Data Set, as assigned by its creator.") + license: Optional[str] = Field(None, description="A specific license that dictates legal permissions for how a data set can be used (by whom, where, for what purposes, with what additional requirements, etc.)") + +class EvidenceLine(InformationEntity): + """An independent, evidence-based argument that may support or refute the validity + of a specific proposition. The strength and direction of this argument is based on + an interpretation of one or more pieces of information as evidence for or against + the target proposition. + """ + + type: Literal["EvidenceLine"] = Field(CoreImType.EVIDENCE_LINE.value, description=f"MUST be '{CoreImType.EVIDENCE_LINE.value}'") + hasEvidenceItems: List[InformationEntity] = Field(None, description="An individual piece of information that was evaluated as evidence in building the argument represented by an Evidence Line.") # noqa: N815 + directionOfEvidenceProvided: Optional[Direction] = Field(None, description="The direction of support that the Evidence Line is determined to provide toward its target Proposition (supports, disputes, neutral)") # noqa: N815 + strengthOfEvidenceProvided: Optional[Union[Coding, IRI]] = Field(None, description="The strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") + scoreOfEvidenceProvided: Optional[float] = Field(None, description="A quantitative score indicating the strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") + + +class Statement(InformationEntity, ABC): + """A claim of purported truth as made by a particular agent, on a particular + occasion. Statements may be used to simply put forth a possible fact (i.e. a + 'proposition') as true, or to provide a more nuanced assessment of the level of + confidence or evidence supporting a particular proposition. + """ + + subject: Dict = Field(..., description="The Entity about which the Statement is made.") + predicate: str = Field(..., description="The relationship declared to hold between the subject and the object of the Statement.") + object: Dict = Field(..., description="An Entity or concept that is related to the subject of a Statement via its predicate.") + direction: Optional[Direction] = Field(None, description="A term indicating whether the Statement supports, disputes, or remains neutral w.r.t. the validity of the Proposition it evaluates.") + strength: Optional[Union[Coding, IRI]]= Field(None, description="A term used to report the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Implementers may choose to frame a strength assessment in terms of how *confident* an agent is that the Proposition is true or false, or in terms of the *strength of all evidence* they believe supports or disputes it.") + score: Optional[float] = Field(None, description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it.") + statementText: Optional[str] = Field(None, description="A natural-language expression of what a Statement asserts to be true.") # noqa: N815 + classification: Optional[Union[Coding, IRI]] = Field(None, description="A single term or phrase summarizing the outcome of direction and strength assessments of a Statement's proposition, in terms of a classification of its subject.") + hasEvidenceLines: Optional[List[EvidenceLine]] = Field(None, description="An evidence-based argument that supports or disputes the validity of the proposition that a Statement assesses or puts forth as true. The strength and direction of this argument (whether it supports or disputes the proposition, and how strongly) is based on an interpretation of one or more pieces of information as evidence (i.e. 'Evidence Items).") + + +class StudyGroup(Entity): + """A collection of individuals or specimens from the same taxonomic class, selected + for analysis in a scientific study based on their exhibiting one or more common + characteristics (e.g. species, race, age, gender, disease state, income). May be + referred to as a 'cohort' or 'population' in specific research settings. + """ + + type: Literal["StudyGroup"] = Field(CoreImType.STUDY_GROUP.value, description=f"Must be '{CoreImType.STUDY_GROUP.value}'") + memberCount: Optional[int] = Field(None, description="The total number of individual members in the StudyGroup.") # noqa: N815 + isSubsetOf: Optional[List[StudyGroup] ]= Field(None, description="A larger StudyGroup of which this StudyGroup represents a subset.") # noqa: N815 + characteristics: Optional[List[Characteristic]] = Field(None, description="A feature or role shared by all members of the StudyGroup, representing a criterion for membership in the group.") + + +class Characteristic(BaseModel): + """An object holding a name-value pair used to describe a trait or role of an + individual member of a StudyGroup. + """ + + name: str = Field(..., description="The type of the trait or role described by the trait (e.g. 'ethnicity', 'sex', 'age', 'disease status').") + value: str = Field(..., description="The specific value(s) that the indicated traitor role holds in all population members (e.g. 'east asian', 'female', 'adolescent', 'cancer').") + valueOperator: bool = Field(None, description="An operation that defines how to logically interpret a set of more than one Characteristic values ('AND', 'OR', 'NOT')") + + +class StudyResult(InformationEntity, ABC): + """A collection of data items from a single study that pertain to a particular + subject or experimental unit in the study, along with optional provenance + information describing how these data items were generated. + """ + + focus: Optional[Union[DomainEntity, Coding, IRI]] = Field(None, description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.") + sourceDataSet: Optional[List[DataSet]] = Field(None, description="A larger DataSet from which the content of the StudyResult was derived.", max_length=1) # noqa: N815 + componentResult: Optional[List[StudyResult]] = Field(None, description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .") # noqa: N815 + studyGroup: Optional[StudyGroup] = Field(None, description="A description of a specific group or population of subjects interrogated in the ResearchStudy that produced the data captured in the StudyResult.") # noqa: N815 + ancillaryResults: Optional[Dict] = None + qualityMeasures: Optional[Dict] = None + + @model_validator(mode="before") + def handle_extends_prop(cls, values: dict[str, Any]) -> dict[str, Any]: + """Handle extends properties by renaming fields + + :param values: Input values to process + :return: Processed values with extended properties renamed + """ + if "derivedFrom" in values: + values["sourceDataSet"] = values.pop("derivedFrom") + return values \ No newline at end of file diff --git a/submodules/vrs b/submodules/vrs index a7d75410..f22966af 160000 --- a/submodules/vrs +++ b/submodules/vrs @@ -1 +1 @@ -Subproject commit a7d75410ff774aee76514f47438fc1a13165ce6d +Subproject commit f22966af39a7be230ed6d988f769856c8f22dd46 From ec8fd404eba534e89a5c9e5287a18da058320057 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 27 Sep 2024 14:01:22 -0400 Subject: [PATCH 2/9] pull in latest changes --- src/ga4gh/core/domain_models.py | 13 +-- src/ga4gh/core/entity_models.py | 149 ++++++++++---------------------- src/ga4gh/vrs/models.py | 57 +++++++++--- submodules/vrs | 2 +- 4 files changed, 96 insertions(+), 125 deletions(-) diff --git a/src/ga4gh/core/domain_models.py b/src/ga4gh/core/domain_models.py index 853da78b..e7cd0c4f 100644 --- a/src/ga4gh/core/domain_models.py +++ b/src/ga4gh/core/domain_models.py @@ -1,15 +1,4 @@ -"""GKS Common Library Domain Entity models - -**This module should not be imported directly.** - -Instead, users should use one of the following: - - * `from ga4gh.core import domain_models`, and refer to models with the - abbreviated name, e.g., `domain_models.Gene` (recommended) - - * `import ga4gh.core`, and refer to models using the fully-qualified - module name, e.g., `ga4gh.core.domain_models.Gene` -""" +"""GKS Common Library Domain Entity models""" from enum import Enum from typing import Literal, Union, List diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index ed794908..fc6e8c1a 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -1,15 +1,4 @@ -"""GKS Common Library Entity models - -**This module should not be imported directly.** - -Instead, users should use one of the following: - - * `from ga4gh.core import entity_models`, and refer to models with the - abbreviated name, e.g., `entity_models.Coding` (recommended) - - * `import ga4gh.core`, and refer to models using the fully-qualified - module name, e.g., `ga4gh.core.entity_models.Coding` -""" +"""GKS Common Library Data Type and Entity models""" from __future__ import annotations from abc import ABC @@ -18,7 +7,7 @@ from typing import Any, Dict, Annotated, Literal, Optional, Union, List from enum import Enum -from pydantic import BaseModel, Field, RootModel, StringConstraints, ConfigDict, field_validator, model_validator +from pydantic import BaseModel, Field, RootModel, StringConstraints, ConfigDict, field_validator from ga4gh.core import GA4GH_IR_REGEXP @@ -31,11 +20,9 @@ class CoreImType(str, Enum): CONTRIBUTION = "Contribution" DOCUMENT = "Document" METHOD = "Method" - DATA_ITEM = "DataItem" DATA_SET = "DataSet" EVIDENCE_LINE = "EvidenceLine" INFORMATION_ENTITY = "InformationEntity" - PROPOSITION = "Proposition" STUDY_GROUP = "StudyGroup" @@ -52,22 +39,6 @@ class Relation(str, Enum): RELATED_MATCH = 'relatedMatch' -class Syntax(str, Enum): - """The syntax used to describe the variation. The value should be one of the - supported syntaxes. - """ - - HGVS_C = "hgvs.c" - HGVS_P = "hgvs.p" - HGVS_G = "hgvs.g" - HGVS_M = "hgvs.m" - HGVS_N = "hgvs.n" - HGVS_R = "hgvs.r" - HGVS_ISCN = "iscn" - GNOMAD = "gnomad" - SPDI = "spdi" - - class AgentSubtype(str, Enum): """A specific type of agent the Agent object represents.""" @@ -176,30 +147,12 @@ class Extension(BaseModel): description: Optional[str] = Field(None, description="A description of the meaning or utility of the Extension, to explain the type of information it is meant to hold.") -class Expression(BaseModel): - """Representation of a variation by a specified nomenclature or syntax for a - Variation object. Common examples of expressions for the description of molecular - variation include the HGVS and ISCN nomenclatures. - """ - - model_config = ConfigDict(use_enum_values=True) - - syntax: Syntax = Field(..., description="The syntax used to describe the variation. The value should be one of the supported syntaxes.") - value: str = Field(..., description="The expression of the variation in the specified syntax. The value should be a valid expression in the specified syntax.") - syntax_version: Optional[str] = Field(None, description="The version of the syntax used to describe the variation. This is particularly important for HGVS expressions, as the syntax has evolved over time.") - - ######################################### # GKS Common Abstract Entity Class Definitions ######################################### class Entity(BaseModel, ABC): - """Anything that exists, has existed, or will exist. - - Entity is the root class of the 'gks-common' core information model. All common - classes that have ids and other general metadata like label, description, type, or - extensions descend from this class and inherit its attributes. - """ + """Anything that exists, has existed, or will exist.""" id: Optional[str] = Field( None, @@ -212,7 +165,7 @@ class Entity(BaseModel, ABC): ) description: Optional[str] = Field( None, - description='A free-text description of the entity.' + description='A free-text description of the Entity.' ) alternativeLabels: Optional[List[str]] = Field(None, description="Alternative name(s) for the Entity.") extensions: Optional[List[Extension]] = Field(None, description="A list of extensions to the Entity, that allow for capture of information not directly supported by elements defined in the model.") @@ -240,15 +193,15 @@ class Agent(Entity): subtype: Optional[AgentSubtype] = Field(None, description="A specific type of agent the Agent object represents.") -class Activity(Entity): - """An action or set of actions performed by an agent, that occurs over a period of - time. Activities may use, generate, modify, move, or destroy one or more entities. +class _ActivityBase(Entity): + """Internal base class that holds shared fields for Activity model. + + This class should not be used directly. """ subtype: Optional[Coding] = Field(None, description="A specific type of activity the Activity instance represents.") date: Optional[str] = Field(None, description="The date that the Activity was completed.") - performedBy: Optional[List[Agent] ]= Field(None, description="An Agent who contributed to executing the Activity.") # noqa: N815 - specifiedBy: Optional[List[Method]] = Field(None, description="A method that was followed in performing an Activity, that describes how it was executed.") # noqa: N815 + specifiedBy: Optional[List[Method]] = Field(None, description="A method that was followed in performing an Activity, that describes how it was executed.") @field_validator("date") @classmethod @@ -265,8 +218,15 @@ def date_format(cls, v: Optional[str]) -> Optional[str]: logging.warning("`date` SHOULD be formatted as a date string in ISO format 'YYYY-MM-DD'") return v +class Activity(_ActivityBase): + """An action or set of actions performed by an agent, that occurs over a period of + time. Activities may use, generate, modify, move, or destroy one or more entities. + """ -class Contribution(Activity): + performedBy: Optional[List[Agent] ]= Field(None, description="An Agent who contributed to executing the Activity.") + + +class Contribution(_ActivityBase): """An action taken by an agent in contributing to the creation, modification, assessment, or deprecation of a particular entity (e.g. a Statement, EvidenceLine, DataSet, Publication, etc.) @@ -276,32 +236,28 @@ class Contribution(Activity): contributor: Optional[List[Agent]] = Field(None, description="The agent that made the contribution.", min_length=1, max_length=1) activityType: Optional[Coding] = Field(None, description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc. . Values of this property may be framed as activities or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).") - @model_validator(mode="before") - def handle_extends_prop(cls, values: dict[str, Any]) -> dict[str, Any]: - """Handle extends properties by renaming fields - :param values: Input values to process - :return: Processed values with extended properties renamed - """ - if "performedBy" in values: - values["contributor"] = values.pop("performedBy") - return values +class _InformationEntityBase(Entity): + """Internal base class that holds shared fields for InformationEntity model. + This class should not be used directly. + """ -class InformationEntity(Entity, ABC): + type: Literal["InformationEntity"] = Field(CoreImType.INFORMATION_ENTITY.value, description=f"MUST be {CoreImType.INFORMATION_ENTITY.value}.") + specifiedBy: Optional[Union[Method, IRI]] = Field(None, description="A specification that describes all or part of the process that led to creation of the Information Entity ") + contributions: Optional[List[Contribution] ]= Field(None, description="Specific actions taken by an Agent toward the creation, modification, validation, or deprecation of an Information Entity.") + reportedIn: Optional[List[Union[Document, IRI]]] = Field(None, description="A document in which the the Information Entity is reported.") + dateAuthored: Optional[str] = Field(None, description="Indicates when the information content expressed in the Information Entity was generated.") + recordMetadata: Optional[RecordMetadata] = Field(None, description="Provenance metadata about a specific concrete record of information as encoded/serialized in a particular data set or object (as opposed to provenance about the abstract information content the encoding carries).") + + +class InformationEntity(_InformationEntityBase): """An abstract (non-physical) entity that is about something - representing the underlying 'information content' conveyed by physical or digital information artifacts like books, web pages, data tables, or photographs. """ - type: Literal["InformationEntity"] = Field(CoreImType.INFORMATION_ENTITY.value, description=f"MUST be {CoreImType.INFORMATION_ENTITY.value}.") - specifiedBy: Optional[Union[Method, IRI]] = Field(None, description="A specification that describes all or part of the process that led to creation of the Information Entity ") # noqa: N815 - contributions: Optional[List[Contribution] ]= Field(None, description="Specific actions taken by an Agent toward the creation, modification, validation, or deprecation of an Information Entity.") - reportedIn: Optional[List[Union[Document, IRI]]] = Field(None, description="A document in which the the Information Entity is reported.") # noqa: N815 - dateAuthored: Optional[str] = Field(None, description="Indicates when the information content expressed in the Information Entity was generated.") # noqa: N815 - derivedFrom: Optional[List[InformationEntity]] = Field(None, description="Another Information Entity from which this Information Entity is derived, in whole or in part.") # noqa: N815 - recordMetadata: Optional[RecordMetadata] = Field(None, description="Provenance metadata about a specific concrete record of information as encoded/serialized in a particular data set or object (as opposed to provenance about the abstract information content the encoding carries).") # noqa: N815 - + derivedFrom: Optional[List[InformationEntity]] = Field(None, description="Another Information Entity from which this Information Entity is derived, in whole or in part.") class Document(InformationEntity): """A collection of information, usually in a text-based or graphic human-readable @@ -316,7 +272,7 @@ class Document(InformationEntity): urls: Optional[List[Annotated[str, StringConstraints(pattern=r"^(https?|s?ftp)://")]]] = Field( None, description="One or more URLs from which the content of the Document can be retrieved." ) - doi: Optional[Annotated[str, StringConstraints(pattern=r"^10.(\\d+)(\\.\\d+)*\\/[\\w\\-\\.]+")]] = Field( + doi: Optional[Annotated[str, StringConstraints(pattern=r"^10\.(\d+)(\.\d+)*\/[\w\-\.]+")]] = Field( None, description="A `Digital Object Identifier _` for the document.", ) @@ -344,10 +300,10 @@ class RecordMetadata(BaseModel): record or object represents). """ - recordIdentifier: Optional[str] = Field(None, description="The identifier of the data record or object described in this RecordMetadata object.") # noqa: N815 - recordVersion: Optional[str] = Field(None, description="The version number of the record-level artifact the object describes.") # noqa: N815 - derivedFrom: Optional[str] = Field(None, description="Another data record from which the record described here was derived, through a data ingest and/or transformation process. Value should be a string representing the identifier of the source record.") # noqa: N815 - dateRecordCreated: Optional[str] = Field(None, description="The date the record was initially created.") # noqa: N815 + recordIdentifier: Optional[str] = Field(None, description="The identifier of the data record or object described in this RecordMetadata object.") + recordVersion: Optional[str] = Field(None, description="The version number of the record-level artifact the object describes.") + derivedFrom: Optional[str] = Field(None, description="Another data record from which the record described here was derived, through a data ingest and/or transformation process. Value should be a string representing the identifier of the source record.") + dateRecordCreated: Optional[str] = Field(None, description="The date the record was initially created.") contributions: Optional[List[Contribution]] = Field(None, description="Describes specific contributions made by an human or software agent to the creation, modification, or administrative management of a data record or object.") @@ -358,7 +314,7 @@ class DataSet(InformationEntity): type: Literal["DataSet"] = Field(CoreImType.DATA_SET.value, description=f"MUST be '{CoreImType.DATA_SET.value}'") subtype: Optional[Coding] = Field(None, description="A specific type of data set the DataSet instance represents (e.g. a 'clinical data set', a 'sequencing data set', a 'gene expression data set', a 'genome annotation data set')") - releaseDate: Optional[str] = Field(None, description="Indicates when a version of a Data Set was formally released.") # noqa: N815 + releaseDate: Optional[str] = Field(None, description="Indicates when a version of a Data Set was formally released.") version: Optional[str] = Field(None, description="The version of the Data Set, as assigned by its creator.") license: Optional[str] = Field(None, description="A specific license that dictates legal permissions for how a data set can be used (by whom, where, for what purposes, with what additional requirements, etc.)") @@ -370,8 +326,8 @@ class EvidenceLine(InformationEntity): """ type: Literal["EvidenceLine"] = Field(CoreImType.EVIDENCE_LINE.value, description=f"MUST be '{CoreImType.EVIDENCE_LINE.value}'") - hasEvidenceItems: List[InformationEntity] = Field(None, description="An individual piece of information that was evaluated as evidence in building the argument represented by an Evidence Line.") # noqa: N815 - directionOfEvidenceProvided: Optional[Direction] = Field(None, description="The direction of support that the Evidence Line is determined to provide toward its target Proposition (supports, disputes, neutral)") # noqa: N815 + hasEvidenceItems: Optional[List[InformationEntity]] = Field(None, description="An individual piece of information that was evaluated as evidence in building the argument represented by an Evidence Line.") + directionOfEvidenceProvided: Optional[Direction] = Field(None, description="The direction of support that the Evidence Line is determined to provide toward its target Proposition (supports, disputes, neutral)") strengthOfEvidenceProvided: Optional[Union[Coding, IRI]] = Field(None, description="The strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") scoreOfEvidenceProvided: Optional[float] = Field(None, description="A quantitative score indicating the strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") @@ -389,7 +345,7 @@ class Statement(InformationEntity, ABC): direction: Optional[Direction] = Field(None, description="A term indicating whether the Statement supports, disputes, or remains neutral w.r.t. the validity of the Proposition it evaluates.") strength: Optional[Union[Coding, IRI]]= Field(None, description="A term used to report the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Implementers may choose to frame a strength assessment in terms of how *confident* an agent is that the Proposition is true or false, or in terms of the *strength of all evidence* they believe supports or disputes it.") score: Optional[float] = Field(None, description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it.") - statementText: Optional[str] = Field(None, description="A natural-language expression of what a Statement asserts to be true.") # noqa: N815 + statementText: Optional[str] = Field(None, description="A natural-language expression of what a Statement asserts to be true.") classification: Optional[Union[Coding, IRI]] = Field(None, description="A single term or phrase summarizing the outcome of direction and strength assessments of a Statement's proposition, in terms of a classification of its subject.") hasEvidenceLines: Optional[List[EvidenceLine]] = Field(None, description="An evidence-based argument that supports or disputes the validity of the proposition that a Statement assesses or puts forth as true. The strength and direction of this argument (whether it supports or disputes the proposition, and how strongly) is based on an interpretation of one or more pieces of information as evidence (i.e. 'Evidence Items).") @@ -402,8 +358,8 @@ class StudyGroup(Entity): """ type: Literal["StudyGroup"] = Field(CoreImType.STUDY_GROUP.value, description=f"Must be '{CoreImType.STUDY_GROUP.value}'") - memberCount: Optional[int] = Field(None, description="The total number of individual members in the StudyGroup.") # noqa: N815 - isSubsetOf: Optional[List[StudyGroup] ]= Field(None, description="A larger StudyGroup of which this StudyGroup represents a subset.") # noqa: N815 + memberCount: Optional[int] = Field(None, description="The total number of individual members in the StudyGroup.") + isSubsetOf: Optional[List[StudyGroup] ]= Field(None, description="A larger StudyGroup of which this StudyGroup represents a subset.") characteristics: Optional[List[Characteristic]] = Field(None, description="A feature or role shared by all members of the StudyGroup, representing a criterion for membership in the group.") @@ -414,29 +370,18 @@ class Characteristic(BaseModel): name: str = Field(..., description="The type of the trait or role described by the trait (e.g. 'ethnicity', 'sex', 'age', 'disease status').") value: str = Field(..., description="The specific value(s) that the indicated traitor role holds in all population members (e.g. 'east asian', 'female', 'adolescent', 'cancer').") - valueOperator: bool = Field(None, description="An operation that defines how to logically interpret a set of more than one Characteristic values ('AND', 'OR', 'NOT')") + valueOperator: Optional[bool] = Field(None, description="An operation that defines how to logically interpret a set of more than one Characteristic values ('AND', 'OR', 'NOT')") -class StudyResult(InformationEntity, ABC): +class StudyResult(_InformationEntityBase, ABC): """A collection of data items from a single study that pertain to a particular subject or experimental unit in the study, along with optional provenance information describing how these data items were generated. """ focus: Optional[Union[DomainEntity, Coding, IRI]] = Field(None, description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.") - sourceDataSet: Optional[List[DataSet]] = Field(None, description="A larger DataSet from which the content of the StudyResult was derived.", max_length=1) # noqa: N815 - componentResult: Optional[List[StudyResult]] = Field(None, description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .") # noqa: N815 - studyGroup: Optional[StudyGroup] = Field(None, description="A description of a specific group or population of subjects interrogated in the ResearchStudy that produced the data captured in the StudyResult.") # noqa: N815 + sourceDataSet: Optional[List[DataSet]] = Field(None, description="A larger DataSet from which the content of the StudyResult was derived.", max_length=1) + componentResult: Optional[List[StudyResult]] = Field(None, description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .") + studyGroup: Optional[StudyGroup] = Field(None, description="A description of a specific group or population of subjects interrogated in the ResearchStudy that produced the data captured in the StudyResult.") ancillaryResults: Optional[Dict] = None qualityMeasures: Optional[Dict] = None - - @model_validator(mode="before") - def handle_extends_prop(cls, values: dict[str, Any]) -> dict[str, Any]: - """Handle extends properties by renaming fields - - :param values: Input values to process - :return: Processed values with extended properties renamed - """ - if "derivedFrom" in values: - values["sourceDataSet"] = values.pop("derivedFrom") - return values \ No newline at end of file diff --git a/src/ga4gh/vrs/models.py b/src/ga4gh/vrs/models.py index bfe6279f..a6a594a1 100644 --- a/src/ga4gh/vrs/models.py +++ b/src/ga4gh/vrs/models.py @@ -32,7 +32,7 @@ from ga4gh.core.pydantic import ( getattr_in ) -from ga4gh.core.entity_models import IRI, Expression, Entity +from ga4gh.core.entity_models import IRI, Entity def flatten(vals): @@ -151,6 +151,13 @@ class VrsType(str, Enum): CN_CHANGE = "CopyNumberChange" +class Orientation(str, Enum): + """The orientation of the molecular variation component.""" + + FORWARD = "forward" + REVERSE_COMPLEMENT = "reverse_complement" + + class ResidueAlphabet(str, Enum): """The interpretation of the character codes referred to by the refget accession, where "aa" specifies an amino acid character set, and "na" specifies a nucleic acid @@ -174,6 +181,22 @@ class CopyChange(str, Enum): EFO_0030072 = 'EFO:0030072' +class Syntax(str, Enum): + """The syntax used to describe the variation. The value should be one of the + supported syntaxes. + """ + + HGVS_C = "hgvs.c" + HGVS_P = "hgvs.p" + HGVS_G = "hgvs.g" + HGVS_M = "hgvs.m" + HGVS_N = "hgvs.n" + HGVS_R = "hgvs.r" + HGVS_ISCN = "iscn" + GNOMAD = "gnomad" + SPDI = "spdi" + + def _recurse_ga4gh_serialize(obj): if isinstance(obj, _Ga4ghIdentifiableObject): return obj.get_or_create_digest() @@ -314,6 +337,18 @@ def get_or_create_digest(self, recompute=False) -> str: class ga4gh(_ValueObject.ga4gh): prefix: str +class Expression(BaseModel): + """Representation of a variation by a specified nomenclature or syntax for a + Variation object. Common examples of expressions for the description of molecular + variation include the HGVS and ISCN nomenclatures. + """ + + model_config = ConfigDict(use_enum_values=True) + + syntax: Syntax = Field(..., description="The syntax used to describe the variation. The value should be one of the supported syntaxes.") + value: str = Field(..., description="The expression of the variation in the specified syntax. The value should be a valid expression in the specified syntax.") + syntax_version: Optional[str] = Field(None, description="The version of the syntax used to describe the variation. This is particularly important for HGVS expressions, as the syntax has evolved over time.") + ######################################### # vrs numerics, comparators, and ranges @@ -646,17 +681,19 @@ class TraversalBlock(_ValueObject): """A component used to describe the orientation of a molecular variation within a DerivativeMolecule.""" + model_config = ConfigDict(use_enum_values=True) + type: Literal["TraversalBlock"] = Field( VrsType.TRAVERSAL_BLOCK.value, description=f'MUST be "{VrsType.TRAVERSAL_BLOCK.value}"' ) - orientation: Literal["forward", "reverse_complement"] = Field( - ..., + orientation: Optional[Orientation] = Field( + None, description='The orientation of the traversal block, either forward or reverse_complement.' ) - component: Union[IRI, Adjacency, Allele, Terminus, CisPhasedBlock] = Field( - ..., - description="The component that make up the derivative molecule." + component: Optional[Union[Allele, CisPhasedBlock, Adjacency, Terminus]] = Field( + None, + description="The unoriented molecular variation component." ) class ga4gh(_ValueObject.ga4gh): @@ -664,7 +701,7 @@ class ga4gh(_ValueObject.ga4gh): 'component', 'orientation', 'type' - ] + ] class DerivativeMolecule(_VariationBase): """The "Derivative Molecule" data class is a structure for describing a derivate @@ -672,12 +709,12 @@ class DerivativeMolecule(_VariationBase): """ type: Literal["DerivativeMolecule"] = Field(VrsType.DERIVATIVE_MOL.value, description=f'MUST be "{VrsType.DERIVATIVE_MOL.value}"') - components: List[TraversalBlock] = Field( + components: List[Union[IRI, TraversalBlock]] = Field( ..., - description="The traversal block components that make up the derivative molecule.", + description="The molecular components that constitute the derivative molecule.", min_length=2 ) - circular: Optional[bool] = Field(None, description="A flag indicating if the derivative molecule is circular (true) or linear (false).") + circular: Optional[bool] = Field(None, description="A boolean indicating whether the molecule represented by the sequence is circular (true) or linear (false).") class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = "DM" diff --git a/submodules/vrs b/submodules/vrs index f22966af..cf968d24 160000 --- a/submodules/vrs +++ b/submodules/vrs @@ -1 +1 @@ -Subproject commit f22966af39a7be230ed6d988f769856c8f22dd46 +Subproject commit cf968d24a37cefca5a2f363f0e2f36741cd12ad5 From b5f4505e7b6c742a9a743f0931bbf3355437047b Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 30 Sep 2024 21:19:54 -0400 Subject: [PATCH 3/9] create base classes for stmt + study result --- src/ga4gh/core/entity_models.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index fc6e8c1a..057f2eec 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -332,7 +332,19 @@ class EvidenceLine(InformationEntity): scoreOfEvidenceProvided: Optional[float] = Field(None, description="A quantitative score indicating the strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") -class Statement(InformationEntity, ABC): +class StatementBase(InformationEntity, ABC): + """Internal base class that holds shared fields for Statement model.""" + + direction: Optional[Direction] = Field(None, description="A term indicating whether the Statement supports, disputes, or remains neutral w.r.t. the validity of the Proposition it evaluates.") + strength: Optional[Union[Coding, IRI]]= Field(None, description="A term used to report the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Implementers may choose to frame a strength assessment in terms of how *confident* an agent is that the Proposition is true or false, or in terms of the *strength of all evidence* they believe supports or disputes it.") + score: Optional[float] = Field(None, description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it.") + statementText: Optional[str] = Field(None, description="A natural-language expression of what a Statement asserts to be true.") + classification: Optional[Union[Coding, IRI]] = Field(None, description="A single term or phrase summarizing the outcome of direction and strength assessments of a Statement's proposition, in terms of a classification of its subject.") + hasEvidenceLines: Optional[List[EvidenceLine]] = Field(None, description="An evidence-based argument that supports or disputes the validity of the proposition that a Statement assesses or puts forth as true. The strength and direction of this argument (whether it supports or disputes the proposition, and how strongly) is based on an interpretation of one or more pieces of information as evidence (i.e. 'Evidence Items).") + + + +class Statement(StatementBase): """A claim of purported truth as made by a particular agent, on a particular occasion. Statements may be used to simply put forth a possible fact (i.e. a 'proposition') as true, or to provide a more nuanced assessment of the level of @@ -342,12 +354,6 @@ class Statement(InformationEntity, ABC): subject: Dict = Field(..., description="The Entity about which the Statement is made.") predicate: str = Field(..., description="The relationship declared to hold between the subject and the object of the Statement.") object: Dict = Field(..., description="An Entity or concept that is related to the subject of a Statement via its predicate.") - direction: Optional[Direction] = Field(None, description="A term indicating whether the Statement supports, disputes, or remains neutral w.r.t. the validity of the Proposition it evaluates.") - strength: Optional[Union[Coding, IRI]]= Field(None, description="A term used to report the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Implementers may choose to frame a strength assessment in terms of how *confident* an agent is that the Proposition is true or false, or in terms of the *strength of all evidence* they believe supports or disputes it.") - score: Optional[float] = Field(None, description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it.") - statementText: Optional[str] = Field(None, description="A natural-language expression of what a Statement asserts to be true.") - classification: Optional[Union[Coding, IRI]] = Field(None, description="A single term or phrase summarizing the outcome of direction and strength assessments of a Statement's proposition, in terms of a classification of its subject.") - hasEvidenceLines: Optional[List[EvidenceLine]] = Field(None, description="An evidence-based argument that supports or disputes the validity of the proposition that a Statement assesses or puts forth as true. The strength and direction of this argument (whether it supports or disputes the proposition, and how strongly) is based on an interpretation of one or more pieces of information as evidence (i.e. 'Evidence Items).") class StudyGroup(Entity): @@ -373,6 +379,14 @@ class Characteristic(BaseModel): valueOperator: Optional[bool] = Field(None, description="An operation that defines how to logically interpret a set of more than one Characteristic values ('AND', 'OR', 'NOT')") +class StudyResultBase(_InformationEntityBase, ABC): + """Internal base class that holds shared fields for StudyResult model.""" + + sourceDataSet: Optional[List[DataSet]] = Field(None, description="A larger DataSet from which the content of the StudyResult was derived.", max_length=1) + ancillaryResults: Optional[Dict] = None + qualityMeasures: Optional[Dict] = None + + class StudyResult(_InformationEntityBase, ABC): """A collection of data items from a single study that pertain to a particular subject or experimental unit in the study, along with optional provenance From 5ebca83d9c2436f25b3afd913187158d6797d2c9 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 30 Sep 2024 21:21:50 -0400 Subject: [PATCH 4/9] rm unnecessary abc + dont make class private --- src/ga4gh/core/entity_models.py | 44 +++++++++++++++++++++------------ src/ga4gh/vrs/models.py | 43 +++++++++++++++++++------------- 2 files changed, 54 insertions(+), 33 deletions(-) diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index 057f2eec..c28f5a06 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -1,7 +1,6 @@ """GKS Common Library Data Type and Entity models""" from __future__ import annotations -from abc import ABC import datetime import logging from typing import Any, Dict, Annotated, Literal, Optional, Union, List @@ -151,8 +150,11 @@ class Extension(BaseModel): # GKS Common Abstract Entity Class Definitions ######################################### -class Entity(BaseModel, ABC): - """Anything that exists, has existed, or will exist.""" +class Entity(BaseModel): + """Anything that exists, has existed, or will exist. + + Abstract base class to be extended by other classes. Do NOT instantiate directly. + """ id: Optional[str] = Field( None, @@ -171,12 +173,14 @@ class Entity(BaseModel, ABC): extensions: Optional[List[Extension]] = Field(None, description="A list of extensions to the Entity, that allow for capture of information not directly supported by elements defined in the model.") -class DomainEntity(Entity, ABC): +class DomainEntity(Entity): """An Entity that is specific to a particular biomedical domain such as disease, therapeutics, or genes. Domain Entities are considered as 'concept-level' entities, as opposed to particular instances. e.g. 'Lung Cancer', not 'patient123's lung cancer'. Or 'Erlotinib', not the particular doses given to a patient on a specific occasion. + + Abstract base class to be extended by other classes. Do NOT instantiate directly. """ mappings: Optional[List[ConceptMapping]] = Field(None, description="A list of mappings to concepts in terminologies or code systems. Each mapping should include a coding and a relation.") @@ -193,10 +197,10 @@ class Agent(Entity): subtype: Optional[AgentSubtype] = Field(None, description="A specific type of agent the Agent object represents.") -class _ActivityBase(Entity): +class ActivityBase(Entity): """Internal base class that holds shared fields for Activity model. - This class should not be used directly. + Abstract base class to be extended by other classes. Do NOT instantiate directly. """ subtype: Optional[Coding] = Field(None, description="A specific type of activity the Activity instance represents.") @@ -218,7 +222,7 @@ def date_format(cls, v: Optional[str]) -> Optional[str]: logging.warning("`date` SHOULD be formatted as a date string in ISO format 'YYYY-MM-DD'") return v -class Activity(_ActivityBase): +class Activity(ActivityBase): """An action or set of actions performed by an agent, that occurs over a period of time. Activities may use, generate, modify, move, or destroy one or more entities. """ @@ -226,7 +230,7 @@ class Activity(_ActivityBase): performedBy: Optional[List[Agent] ]= Field(None, description="An Agent who contributed to executing the Activity.") -class Contribution(_ActivityBase): +class Contribution(ActivityBase): """An action taken by an agent in contributing to the creation, modification, assessment, or deprecation of a particular entity (e.g. a Statement, EvidenceLine, DataSet, Publication, etc.) @@ -237,10 +241,10 @@ class Contribution(_ActivityBase): activityType: Optional[Coding] = Field(None, description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc. . Values of this property may be framed as activities or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).") -class _InformationEntityBase(Entity): +class InformationEntityBase(Entity): """Internal base class that holds shared fields for InformationEntity model. - This class should not be used directly. + Abstract base class to be extended by other classes. Do NOT instantiate directly. """ type: Literal["InformationEntity"] = Field(CoreImType.INFORMATION_ENTITY.value, description=f"MUST be {CoreImType.INFORMATION_ENTITY.value}.") @@ -251,7 +255,7 @@ class _InformationEntityBase(Entity): recordMetadata: Optional[RecordMetadata] = Field(None, description="Provenance metadata about a specific concrete record of information as encoded/serialized in a particular data set or object (as opposed to provenance about the abstract information content the encoding carries).") -class InformationEntity(_InformationEntityBase): +class InformationEntity(InformationEntityBase): """An abstract (non-physical) entity that is about something - representing the underlying 'information content' conveyed by physical or digital information artifacts like books, web pages, data tables, or photographs. @@ -332,8 +336,11 @@ class EvidenceLine(InformationEntity): scoreOfEvidenceProvided: Optional[float] = Field(None, description="A quantitative score indicating the strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") -class StatementBase(InformationEntity, ABC): - """Internal base class that holds shared fields for Statement model.""" +class StatementBase(InformationEntity): + """Internal base class that holds shared fields for Statement model. + + Abstract base class to be extended by other classes. Do NOT instantiate directly. + """ direction: Optional[Direction] = Field(None, description="A term indicating whether the Statement supports, disputes, or remains neutral w.r.t. the validity of the Proposition it evaluates.") strength: Optional[Union[Coding, IRI]]= Field(None, description="A term used to report the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Implementers may choose to frame a strength assessment in terms of how *confident* an agent is that the Proposition is true or false, or in terms of the *strength of all evidence* they believe supports or disputes it.") @@ -379,18 +386,23 @@ class Characteristic(BaseModel): valueOperator: Optional[bool] = Field(None, description="An operation that defines how to logically interpret a set of more than one Characteristic values ('AND', 'OR', 'NOT')") -class StudyResultBase(_InformationEntityBase, ABC): - """Internal base class that holds shared fields for StudyResult model.""" +class StudyResultBase(InformationEntityBase): + """Internal base class that holds shared fields for StudyResult model. + + Abstract base class to be extended by other classes. Do NOT instantiate directly. + """ sourceDataSet: Optional[List[DataSet]] = Field(None, description="A larger DataSet from which the content of the StudyResult was derived.", max_length=1) ancillaryResults: Optional[Dict] = None qualityMeasures: Optional[Dict] = None -class StudyResult(_InformationEntityBase, ABC): +class StudyResult(InformationEntityBase): """A collection of data items from a single study that pertain to a particular subject or experimental unit in the study, along with optional provenance information describing how these data items were generated. + + Abstract base class to be extended by other classes. Do NOT instantiate directly. """ focus: Optional[Union[DomainEntity, Coding, IRI]] = Field(None, description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.") diff --git a/src/ga4gh/vrs/models.py b/src/ga4gh/vrs/models.py index a6a594a1..c662acce 100644 --- a/src/ga4gh/vrs/models.py +++ b/src/ga4gh/vrs/models.py @@ -10,7 +10,6 @@ * `import ga4gh.vrs`, and refer to models using the fully-qualified module name, e.g., `ga4gh.vrs.models.Allele` """ -from abc import ABC from typing import List, Literal, Optional, Union, Dict, Annotated from collections import OrderedDict from enum import Enum @@ -198,7 +197,7 @@ class Syntax(str, Enum): def _recurse_ga4gh_serialize(obj): - if isinstance(obj, _Ga4ghIdentifiableObject): + if isinstance(obj, Ga4ghIdentifiableObject): return obj.get_or_create_digest() elif isinstance(obj, _ValueObject): return obj.ga4gh_serialize() @@ -212,9 +211,11 @@ def _recurse_ga4gh_serialize(obj): return obj -class _ValueObject(Entity, ABC): +class _ValueObject(Entity): """A contextual value whose equality is based on value, not identity. See https://en.wikipedia.org/wiki/Value_object for more on Value Objects. + + Abstract base class to be extended by other classes. Do NOT instantiate directly. """ def __hash__(self): @@ -235,10 +236,12 @@ def is_ga4gh_identifiable(): return False -class _Ga4ghIdentifiableObject(_ValueObject, ABC): +class Ga4ghIdentifiableObject(_ValueObject): """A contextual value object for which a GA4GH computed identifier can be created. All GA4GH Identifiable Objects may have computed digests from the VRS Computed Identifier algorithm. + + Abstract base class to be extended by other classes. Do NOT instantiate directly. """ type: str @@ -480,7 +483,7 @@ class ga4gh(_ValueObject.ga4gh): ] -class SequenceLocation(_Ga4ghIdentifiableObject): +class SequenceLocation(Ga4ghIdentifiableObject): """A `Location` defined by an interval on a referenced `Sequence`.""" type: Literal["SequenceLocation"] = Field(VrsType.SEQ_LOC.value, description=f'MUST be "{VrsType.SEQ_LOC.value}"') @@ -536,7 +539,7 @@ def get_refget_accession(self): else: return None - class ga4gh(_Ga4ghIdentifiableObject.ga4gh): + class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = 'SL' priorPrefix = {PrevVrsVersion.V1_3.value: 'VSL'} keys = [ @@ -551,8 +554,11 @@ class ga4gh(_Ga4ghIdentifiableObject.ga4gh): ######################################### -class _VariationBase(_Ga4ghIdentifiableObject, ABC): - """Base class for variation""" +class _VariationBase(Ga4ghIdentifiableObject): + """Base class for variation + + Abstract base class to be extended by other classes. Do NOT instantiate directly. + """ expressions: Optional[List[Expression]] = None @@ -594,7 +600,7 @@ def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion): return f'{{"location":"{location_digest}","state":{{"sequence":"{sequence}","type":"LiteralSequenceExpression"}},"type":"Allele"}}' - class ga4gh(_Ga4ghIdentifiableObject.ga4gh): + class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = 'VA' priorPrefix = {PrevVrsVersion.V1_3.value: 'VA'} keys = [ @@ -620,7 +626,7 @@ def ga4gh_serialize(self) -> Dict: out["members"] = sorted(out["members"]) return out - class ga4gh(_Ga4ghIdentifiableObject.ga4gh): + class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = 'CPB' keys = [ 'members', @@ -652,7 +658,7 @@ class Adjacency(_VariationBase): ) homology: Optional[bool] = Field(None, description="A flag indicating if coordinate ambiguity in the adjoined sequences is from sequence homology (true) or other uncertainty (false).") - class ga4gh(_Ga4ghIdentifiableObject.ga4gh): + class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = 'AJ' keys = [ 'adjoinedSequences', @@ -670,7 +676,7 @@ class Terminus(_VariationBase): type: Literal["Terminus"] = Field(VrsType.TERMINUS.value, description=f'MUST be "{VrsType.TERMINUS.value}"') location: Union[IRI, SequenceLocation] = Field(..., description="The location of the terminus.") - class ga4gh(_Ga4ghIdentifiableObject.ga4gh): + class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = "TM" keys = [ "location", @@ -716,7 +722,7 @@ class DerivativeMolecule(_VariationBase): ) circular: Optional[bool] = Field(None, description="A boolean indicating whether the molecule represented by the sequence is circular (true) or linear (false).") - class ga4gh(_Ga4ghIdentifiableObject.ga4gh): + class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = "DM" keys = [ "components", @@ -729,8 +735,11 @@ class ga4gh(_Ga4ghIdentifiableObject.ga4gh): ######################################### -class _CopyNumber(_VariationBase, ABC): - """A measure of the copies of a `Location` within a system (e.g. genome, cell, etc.)""" +class _CopyNumber(_VariationBase): + """A measure of the copies of a `Location` within a system (e.g. genome, cell, etc.) + + Abstract base class to be extended by other classes. Do NOT instantiate directly. + """ location: Union[IRI, SequenceLocation] = Field( ..., @@ -748,7 +757,7 @@ class CopyNumberCount(_CopyNumber): ..., description='The integral number of copies of the subject in a system' ) - class ga4gh(_Ga4ghIdentifiableObject.ga4gh): + class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = 'CN' keys = [ 'copies', @@ -770,7 +779,7 @@ class CopyNumberChange(_CopyNumber): description='MUST be one of "EFO:0030069" (complete genomic loss), "EFO:0020073" (high-level loss), "EFO:0030068" (low-level loss), "EFO:0030067" (loss), "EFO:0030064" (regional base ploidy), "EFO:0030070" (gain), "EFO:0030071" (low-level gain), "EFO:0030072" (high-level gain).', ) - class ga4gh(_Ga4ghIdentifiableObject.ga4gh): + class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = 'CX' keys = [ 'copyChange', From 7d79121168e3bd83787b1e8a075edc3cd5e966d4 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 10 Oct 2024 07:58:10 -0400 Subject: [PATCH 5/9] add schema tests + fix extension required props --- src/ga4gh/core/entity_models.py | 2 +- tests/test_schemas.py | 140 ++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 tests/test_schemas.py diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index c28f5a06..6969a24f 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -141,7 +141,7 @@ class Extension(BaseModel): name: str = Field(..., description='A name for the Extension. Should be indicative of its meaning and/or the type of information it value represents.') value: Optional[Union[float, str, bool, Dict[str, Any], List[Any]]] = Field( - None, description='The value of the Extension - can be any primitive or structured object' + ..., description='The value of the Extension - can be any primitive or structured object' ) description: Optional[str] = Field(None, description="A description of the meaning or utility of the Extension, to explain the type of information it is meant to hold.") diff --git a/tests/test_schemas.py b/tests/test_schemas.py new file mode 100644 index 00000000..ae71520b --- /dev/null +++ b/tests/test_schemas.py @@ -0,0 +1,140 @@ +"""Test that VRS-Python Pydantic models match VRS and GKS-Common schemas""" + +from enum import Enum +import json +from pathlib import Path + +import pytest +from pydantic import BaseModel + +from ga4gh.core import entity_models, domain_models +from ga4gh.vrs import models as vrs_models + + +class GKSSchema(str, Enum): + """Enum for GKS schema""" + + VRS = "vrs" + CORE_IM = "core-im" + DOMAIN = "domain-entities" + + +class GKSSchemaMapping(BaseModel): + """Model for representing GKS Schema concrete classes, primitives, and schema""" + + base_classes: set = set() + concrete_classes: set = set() + primitives: set = set() + schema: dict = dict() + + +def _update_gks_schema_mapping( + f_path: Path, gks_schema_mapping: GKSSchemaMapping +) -> None: + """Update ``gks_schema_mapping`` properties + + :param f_path: Path to JSON Schema file + :param gks_schema_mapping: GKS schema mapping to update + """ + with f_path.open() as rf: + cls_def = json.load(rf) + + spec_class = cls_def["title"] + gks_schema_mapping.schema[spec_class] = cls_def + + if "properties" in cls_def: + gks_schema_mapping.concrete_classes.add(spec_class) + elif cls_def.get("type") in {"array", "integer", "string"}: + gks_schema_mapping.primitives.add(spec_class) + else: + gks_schema_mapping.base_classes.add(spec_class) + + +GKS_SCHEMA_MAPPING = {gks: GKSSchemaMapping() for gks in GKSSchema} +SUBMODULES_DIR = Path(__file__).parents[1] / "submodules" / "vrs" + + +# Get vrs classes +vrs_mapping = GKS_SCHEMA_MAPPING[GKSSchema.VRS] +for f in (SUBMODULES_DIR / "schema" / "vrs" / "json").glob("*"): + _update_gks_schema_mapping(f, vrs_mapping) + + +# Get core-im + domain classes +for child in (SUBMODULES_DIR / "submodules" / "gks-common" / "schema").iterdir(): + mapping_key = ( + GKSSchema.DOMAIN if str(child).endswith(GKSSchema.DOMAIN) else GKSSchema.CORE_IM + ) + mapping = GKS_SCHEMA_MAPPING[mapping_key] + for f in (child / "json").glob("*"): + _update_gks_schema_mapping(f, mapping) + + +@pytest.mark.parametrize( + "gks_schema,pydantic_models", + [ + (GKSSchema.VRS, vrs_models), + (GKSSchema.CORE_IM, entity_models), + (GKSSchema.DOMAIN, domain_models), + ], +) +def test_schema_models_in_pydantic(gks_schema, pydantic_models): + """Ensure that each schema model has corresponding Pydantic model""" + mapping = GKS_SCHEMA_MAPPING[gks_schema] + for schema_model in ( + mapping.base_classes | mapping.concrete_classes | mapping.primitives + ): + assert getattr(pydantic_models, schema_model, False) + + +@pytest.mark.parametrize( + "gks_schema,pydantic_models", + [ + (GKSSchema.VRS, vrs_models), + (GKSSchema.CORE_IM, entity_models), + (GKSSchema.DOMAIN, domain_models), + ], +) +def test_schema_class_fields(gks_schema, pydantic_models): + """Check that each schema model properties exist and are required in corresponding + Pydantic model, and validate required properties + """ + mapping = GKS_SCHEMA_MAPPING[gks_schema] + for schema_model in mapping.concrete_classes: + schema_fields = set(mapping.schema[schema_model]["properties"]) + pydantic_model = getattr(pydantic_models, schema_model) + assert set(pydantic_model.model_fields) == schema_fields, schema_model + + # Check required fields + required_schema_fields = set(mapping.schema[schema_model]["required"]) + required_pydantic_fields = { + field + for field, field_info in pydantic_model.model_fields.items() + if field_info.is_required() or isinstance(field_info.default, str) + } + assert required_pydantic_fields == required_schema_fields, schema_model + + +def test_ga4gh_keys(): + """Ensure ga4ghDigest keys defined in schema model exist in corresponding Pydantic model""" + vrs_mapping = GKS_SCHEMA_MAPPING[GKSSchema.VRS] + for vrs_class in vrs_mapping.concrete_classes: + if ( + vrs_mapping.schema[vrs_class].get("ga4ghDigest", {}).get("keys", None) + is None + ): + continue + + pydantic_model = getattr(vrs_models, vrs_class) + + try: + pydantic_model_digest_keys = pydantic_model.ga4gh.keys + except AttributeError as e: + raise AttributeError(vrs_class) from e + + assert set(pydantic_model_digest_keys) == set( + vrs_mapping.schema[vrs_class]["ga4ghDigest"]["keys"] + ), vrs_class + assert pydantic_model_digest_keys == sorted( + pydantic_model.ga4gh.keys + ), vrs_class From 0c3d3497c6e18ce8e5bf96bb64461e90410dc2ef Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 11 Oct 2024 13:52:40 -0400 Subject: [PATCH 6/9] more tests for descriptions --- src/ga4gh/core/domain_models.py | 2 +- src/ga4gh/core/entity_models.py | 33 ++++++++++++++++---------------- src/ga4gh/vrs/models.py | 34 ++++++++++++++++----------------- tests/test_schemas.py | 28 +++++++++++++++++---------- 4 files changed, 53 insertions(+), 44 deletions(-) diff --git a/src/ga4gh/core/domain_models.py b/src/ga4gh/core/domain_models.py index e7cd0c4f..6a8558fc 100644 --- a/src/ga4gh/core/domain_models.py +++ b/src/ga4gh/core/domain_models.py @@ -127,5 +127,5 @@ class Gene(DomainEntity): type: Literal["Gene"] = Field( CommonDomainType.GENE.value, - description=f'MUST be "{CommonDomainType.GENE.value}".' + description=f'MUST be "{CommonDomainType.GENE.value}"' ) diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index 6969a24f..406c2d65 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -11,7 +11,6 @@ from ga4gh.core import GA4GH_IR_REGEXP - class CoreImType(str, Enum): """Define Core Information Model Types""" @@ -25,7 +24,6 @@ class CoreImType(str, Enum): STUDY_GROUP = "StudyGroup" - class Relation(str, Enum): """A mapping relation between concepts as defined by the Simple Knowledge Organization System (SKOS). @@ -59,6 +57,7 @@ class Direction(str, Enum): # These do not inherit from Entity and are not typed explicitly ######################################### + class Code(RootModel): """Indicates that the value is taken from a set of controlled strings defined elsewhere. Technically, a code is restricted to a string which has at least one @@ -98,7 +97,6 @@ def ga4gh_serialize(self): ) - class Coding(BaseModel): """A structured representation of a code for a defined concept in a terminology or code system. @@ -150,6 +148,7 @@ class Extension(BaseModel): # GKS Common Abstract Entity Class Definitions ######################################### + class Entity(BaseModel): """Anything that exists, has existed, or will exist. @@ -163,7 +162,7 @@ class Entity(BaseModel): type: str = Field(..., description="The name of the class that is instantiated by a data object representing the Entity.") label: Optional[str] = Field( None, - description='A primary label for the entity.' + description='A primary name for the entity.' ) description: Optional[str] = Field( None, @@ -193,8 +192,8 @@ class Agent(Entity): """ type: Literal["Agent"] = Field(CoreImType.AGENT.value, description=f"MUST be '{CoreImType.AGENT.value}'.") - name: Optional[str] = Field(None, description="The descriptive name of the agent.") - subtype: Optional[AgentSubtype] = Field(None, description="A specific type of agent the Agent object represents.") + name: Optional[str] = Field(None, description="The given name of the Agent.") + subtype: Optional[AgentSubtype] = Field(None, description="A specific type of agent the Agent object represents. Must be one of {person, organization, software}.") class ActivityBase(Entity): @@ -222,6 +221,7 @@ def date_format(cls, v: Optional[str]) -> Optional[str]: logging.warning("`date` SHOULD be formatted as a date string in ISO format 'YYYY-MM-DD'") return v + class Activity(ActivityBase): """An action or set of actions performed by an agent, that occurs over a period of time. Activities may use, generate, modify, move, or destroy one or more entities. @@ -236,7 +236,7 @@ class Contribution(ActivityBase): DataSet, Publication, etc.) """ - type: Literal["Contribution"] = Field(CoreImType.CONTRIBUTION.value, description=f"MUST be {CoreImType.CONTRIBUTION.value}.") + type: Literal["Contribution"] = Field(CoreImType.CONTRIBUTION.value, description=f"MUST be '{CoreImType.CONTRIBUTION.value}'.") contributor: Optional[List[Agent]] = Field(None, description="The agent that made the contribution.", min_length=1, max_length=1) activityType: Optional[Coding] = Field(None, description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc. . Values of this property may be framed as activities or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).") @@ -248,7 +248,7 @@ class InformationEntityBase(Entity): """ type: Literal["InformationEntity"] = Field(CoreImType.INFORMATION_ENTITY.value, description=f"MUST be {CoreImType.INFORMATION_ENTITY.value}.") - specifiedBy: Optional[Union[Method, IRI]] = Field(None, description="A specification that describes all or part of the process that led to creation of the Information Entity ") + specifiedBy: Optional[Union[Method, IRI]] = Field(None, description="A specification that describes all or part of the process that led to creation of the Information Entity") contributions: Optional[List[Contribution] ]= Field(None, description="Specific actions taken by an Agent toward the creation, modification, validation, or deprecation of an Information Entity.") reportedIn: Optional[List[Union[Document, IRI]]] = Field(None, description="A document in which the the Information Entity is reported.") dateAuthored: Optional[str] = Field(None, description="Indicates when the information content expressed in the Information Entity was generated.") @@ -263,12 +263,13 @@ class InformationEntity(InformationEntityBase): derivedFrom: Optional[List[InformationEntity]] = Field(None, description="Another Information Entity from which this Information Entity is derived, in whole or in part.") + class Document(InformationEntity): """A collection of information, usually in a text-based or graphic human-readable form, intended to be read and understood together as a whole. """ - type: Literal["Document"] = Field(CoreImType.DOCUMENT.value, description=f"Must be '{CoreImType.DOCUMENT.value}'.") + type: Literal["Document"] = Field(CoreImType.DOCUMENT.value, description=f"Must be '{CoreImType.DOCUMENT.value}'") subtype: Optional[Coding] = Field( None, description="A specific type of document that a Document instance represents (e.g. 'publication', 'patent', 'pathology report')" ) @@ -278,11 +279,11 @@ class Document(InformationEntity): ) doi: Optional[Annotated[str, StringConstraints(pattern=r"^10\.(\d+)(\.\d+)*\/[\w\-\.]+")]] = Field( None, - description="A `Digital Object Identifier _` for the document.", + description="A [Digital Object Identifier](https://www.doi.org/the-identifier/what-is-a-doi/) for the document.", ) pmid: Optional[int] = Field( None, - description="A `PubMed unique identifier `_.", + description="A [PubMed unique identifier](https://en.wikipedia.org/wiki/PubMed#PubMed_identifier) for the document.", ) @@ -316,12 +317,13 @@ class DataSet(InformationEntity): common format or structure, to enable their computational manipulation as a unit. """ - type: Literal["DataSet"] = Field(CoreImType.DATA_SET.value, description=f"MUST be '{CoreImType.DATA_SET.value}'") + type: Literal["DataSet"] = Field(CoreImType.DATA_SET.value, description=f"MUST be '{CoreImType.DATA_SET.value}'.") subtype: Optional[Coding] = Field(None, description="A specific type of data set the DataSet instance represents (e.g. a 'clinical data set', a 'sequencing data set', a 'gene expression data set', a 'genome annotation data set')") releaseDate: Optional[str] = Field(None, description="Indicates when a version of a Data Set was formally released.") version: Optional[str] = Field(None, description="The version of the Data Set, as assigned by its creator.") license: Optional[str] = Field(None, description="A specific license that dictates legal permissions for how a data set can be used (by whom, where, for what purposes, with what additional requirements, etc.)") + class EvidenceLine(InformationEntity): """An independent, evidence-based argument that may support or refute the validity of a specific proposition. The strength and direction of this argument is based on @@ -329,7 +331,7 @@ class EvidenceLine(InformationEntity): the target proposition. """ - type: Literal["EvidenceLine"] = Field(CoreImType.EVIDENCE_LINE.value, description=f"MUST be '{CoreImType.EVIDENCE_LINE.value}'") + type: Literal["EvidenceLine"] = Field(CoreImType.EVIDENCE_LINE.value, description=f"Must be '{CoreImType.EVIDENCE_LINE.value}'") hasEvidenceItems: Optional[List[InformationEntity]] = Field(None, description="An individual piece of information that was evaluated as evidence in building the argument represented by an Evidence Line.") directionOfEvidenceProvided: Optional[Direction] = Field(None, description="The direction of support that the Evidence Line is determined to provide toward its target Proposition (supports, disputes, neutral)") strengthOfEvidenceProvided: Optional[Union[Coding, IRI]] = Field(None, description="The strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") @@ -342,6 +344,7 @@ class StatementBase(InformationEntity): Abstract base class to be extended by other classes. Do NOT instantiate directly. """ + predicate: str = Field(..., description="The relationship declared to hold between the subject and the object of the Statement.") direction: Optional[Direction] = Field(None, description="A term indicating whether the Statement supports, disputes, or remains neutral w.r.t. the validity of the Proposition it evaluates.") strength: Optional[Union[Coding, IRI]]= Field(None, description="A term used to report the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Implementers may choose to frame a strength assessment in terms of how *confident* an agent is that the Proposition is true or false, or in terms of the *strength of all evidence* they believe supports or disputes it.") score: Optional[float] = Field(None, description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it.") @@ -350,7 +353,6 @@ class StatementBase(InformationEntity): hasEvidenceLines: Optional[List[EvidenceLine]] = Field(None, description="An evidence-based argument that supports or disputes the validity of the proposition that a Statement assesses or puts forth as true. The strength and direction of this argument (whether it supports or disputes the proposition, and how strongly) is based on an interpretation of one or more pieces of information as evidence (i.e. 'Evidence Items).") - class Statement(StatementBase): """A claim of purported truth as made by a particular agent, on a particular occasion. Statements may be used to simply put forth a possible fact (i.e. a @@ -359,7 +361,6 @@ class Statement(StatementBase): """ subject: Dict = Field(..., description="The Entity about which the Statement is made.") - predicate: str = Field(..., description="The relationship declared to hold between the subject and the object of the Statement.") object: Dict = Field(..., description="An Entity or concept that is related to the subject of a Statement via its predicate.") @@ -370,7 +371,7 @@ class StudyGroup(Entity): referred to as a 'cohort' or 'population' in specific research settings. """ - type: Literal["StudyGroup"] = Field(CoreImType.STUDY_GROUP.value, description=f"Must be '{CoreImType.STUDY_GROUP.value}'") + type: Literal["StudyGroup"] = Field(CoreImType.STUDY_GROUP.value, description=f'Must be "{CoreImType.STUDY_GROUP.value}"') memberCount: Optional[int] = Field(None, description="The total number of individual members in the StudyGroup.") isSubsetOf: Optional[List[StudyGroup] ]= Field(None, description="A larger StudyGroup of which this StudyGroup represents a subset.") characteristics: Optional[List[Characteristic]] = Field(None, description="A feature or role shared by all members of the StudyGroup, representing a criterion for membership in the group.") diff --git a/src/ga4gh/vrs/models.py b/src/ga4gh/vrs/models.py index c662acce..8e924917 100644 --- a/src/ga4gh/vrs/models.py +++ b/src/ga4gh/vrs/models.py @@ -411,7 +411,7 @@ class LengthExpression(_ValueObject): type: Literal["LengthExpression"] = Field( VrsType.LEN_EXPR.value, description=f'MUST be "{VrsType.LEN_EXPR.value}"' ) - length: Optional[Union[Range, int]] = None + length: Optional[Union[Range, int]] = Field(None, description="The length of the sequence.") class ga4gh(_ValueObject.ga4gh): keys = [ @@ -427,13 +427,13 @@ class ReferenceLengthExpression(_ValueObject): VrsType.REF_LEN_EXPR.value, description=f'MUST be "{VrsType.REF_LEN_EXPR.value}"' ) length: Union[Range, int] = Field( - ..., description='The number of residues of the expressed sequence.' + ..., description='The number of residues in the expressed sequence.' ) sequence: Optional[SequenceString] = Field( - None, description='the `Sequence` encoded by the Reference Length Expression.' + None, description='the literal Sequence encoded by the Reference Length Expression.' ) repeatSubunitLength: int = Field( - ..., description='The number of residues of the repeat subunit.' + ..., description='The number of residues in the repeat subunit.' ) class ga4gh(_ValueObject.ga4gh): @@ -471,9 +471,9 @@ class SequenceReference(_ValueObject): type: Literal["SequenceReference"] = Field(VrsType.SEQ_REF.value, description=f'MUST be "{VrsType.SEQ_REF.value}"') refgetAccession: Annotated[str, StringConstraints(pattern=r'^SQ.[0-9A-Za-z_\-]{32}$')] = Field( ..., - description='A `GA4GH RefGet ` identifier for the referenced sequence, using the sha512t24u digest.', + description='A [GA4GH RefGet](http://samtools.github.io/hts-specs/refget.html) identifier for the referenced sequence, using the sha512t24u digest.', ) - residueAlphabet: Optional[ResidueAlphabet] = Field(None, description="The interpretation of the character codes referred to by the refget accession, where 'aa' specifies an amino acid character set, and 'na' specifies a nucleic acid character set.") + residueAlphabet: Optional[ResidueAlphabet] = Field(None, description='The interpretation of the character codes referred to by the refget accession, where "aa" specifies an amino acid character set, and "na" specifies a nucleic acid character set.') circular: Optional[bool] = Field(None, description="A boolean indicating whether the molecule represented by the sequence is circular (true) or linear (false).") class ga4gh(_ValueObject.ga4gh): @@ -488,15 +488,15 @@ class SequenceLocation(Ga4ghIdentifiableObject): type: Literal["SequenceLocation"] = Field(VrsType.SEQ_LOC.value, description=f'MUST be "{VrsType.SEQ_LOC.value}"') sequenceReference: Optional[Union[IRI, SequenceReference]] = Field( - None, description='A reference to a `Sequence` on which the location is defined.' + None, description='A reference to a Sequence on which the location is defined.' ) start: Optional[Union[Range, int]] = Field( None, - description='The start coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range less than the value of `end`.', + description='The start coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range less than or equal to the value of `end`.', ) end: Optional[Union[Range, int]] = Field( None, - description='The end coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range greater than the value of `start`.', + description='The end coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range greater than or equal to the value of `start`.', ) sequence: Optional[SequenceString] = Field(None, description="The literal sequence encoded by the `sequenceReference` at these coordinates.") @@ -616,7 +616,7 @@ class CisPhasedBlock(_VariationBase): type: Literal["CisPhasedBlock"] = Field(VrsType.CIS_PHASED_BLOCK.value, description=f'MUST be "{VrsType.CIS_PHASED_BLOCK.value}"') members: List[Union[Allele, IRI]] = Field( ..., - description='A list of `Alleles` that are found in-cis on a shared molecule.', + description='A list of Alleles that are found in-cis on a shared molecule.', min_length=2, ) sequenceReference: Optional[SequenceReference] = Field(None, description="An optional Sequence Reference on which all of the in-cis Alleles are found. When defined, this may be used to implicitly define the `sequenceReference` attribute for each of the CisPhasedBlock member Alleles.") @@ -645,7 +645,7 @@ class Adjacency(_VariationBase): potentially with an intervening linker sequence. """ - type: Literal["Adjacency"] = Field(VrsType.ADJACENCY.value, description=f'MUST be "{VrsType.ADJACENCY.value}"') + type: Literal["Adjacency"] = Field(VrsType.ADJACENCY.value, description=f'MUST be "{VrsType.ADJACENCY.value}".') adjoinedSequences: List[Union[IRI, SequenceLocation]] = Field( ..., description="The terminal sequence or pair of adjoined sequences that defines in the adjacency.", @@ -656,7 +656,7 @@ class Adjacency(_VariationBase): None, description="The sequence found between adjoined sequences." ) - homology: Optional[bool] = Field(None, description="A flag indicating if coordinate ambiguity in the adjoined sequences is from sequence homology (true) or other uncertainty (false).") + homology: Optional[bool] = Field(None, description="A flag indicating if coordinate ambiguity in the adjoined sequences is from sequence homology (true) or other uncertainty, such as instrument ambiguity (false).") class ga4gh(Ga4ghIdentifiableObject.ga4gh): prefix = 'AJ' @@ -673,7 +673,7 @@ class Terminus(_VariationBase): is not allowed and it removes the unnecessary array structure. """ - type: Literal["Terminus"] = Field(VrsType.TERMINUS.value, description=f'MUST be "{VrsType.TERMINUS.value}"') + type: Literal["Terminus"] = Field(VrsType.TERMINUS.value, description=f'MUST be "{VrsType.TERMINUS.value}".') location: Union[IRI, SequenceLocation] = Field(..., description="The location of the terminus.") class ga4gh(Ga4ghIdentifiableObject.ga4gh): @@ -690,11 +690,11 @@ class TraversalBlock(_ValueObject): model_config = ConfigDict(use_enum_values=True) type: Literal["TraversalBlock"] = Field( - VrsType.TRAVERSAL_BLOCK.value, description=f'MUST be "{VrsType.TRAVERSAL_BLOCK.value}"' + VrsType.TRAVERSAL_BLOCK.value, description=f'MUST be "{VrsType.TRAVERSAL_BLOCK.value}".' ) orientation: Optional[Orientation] = Field( None, - description='The orientation of the traversal block, either forward or reverse_complement.' + description='The orientation of the molecular variation component.' ) component: Optional[Union[Allele, CisPhasedBlock, Adjacency, Terminus]] = Field( @@ -714,7 +714,7 @@ class DerivativeMolecule(_VariationBase): molecule composed from multiple sequence components. """ - type: Literal["DerivativeMolecule"] = Field(VrsType.DERIVATIVE_MOL.value, description=f'MUST be "{VrsType.DERIVATIVE_MOL.value}"') + type: Literal["DerivativeMolecule"] = Field(VrsType.DERIVATIVE_MOL.value, description=f'MUST be "{VrsType.DERIVATIVE_MOL.value}".') components: List[Union[IRI, TraversalBlock]] = Field( ..., description="The molecular components that constitute the derivative molecule.", @@ -776,7 +776,7 @@ class CopyNumberChange(_CopyNumber): type: Literal["CopyNumberChange"] = Field(VrsType.CN_CHANGE.value, description=f'MUST be "{VrsType.CN_CHANGE.value}"') copyChange: CopyChange = Field( ..., - description='MUST be one of "EFO:0030069" (complete genomic loss), "EFO:0020073" (high-level loss), "EFO:0030068" (low-level loss), "EFO:0030067" (loss), "EFO:0030064" (regional base ploidy), "EFO:0030070" (gain), "EFO:0030071" (low-level gain), "EFO:0030072" (high-level gain).', + description='MUST be a Coding representing one of "EFO:0030069" (complete genomic loss), "EFO:0020073" (high-level loss), "EFO:0030068" (low-level loss), "EFO:0030067" (loss), "EFO:0030064" (regional base ploidy), "EFO:0030070" (gain), "EFO:0030071" (low-level gain), "EFO:0030072" (high-level gain).', ) class ga4gh(Ga4ghIdentifiableObject.ga4gh): diff --git a/tests/test_schemas.py b/tests/test_schemas.py index ae71520b..b648c7cf 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -84,7 +84,7 @@ def test_schema_models_in_pydantic(gks_schema, pydantic_models): for schema_model in ( mapping.base_classes | mapping.concrete_classes | mapping.primitives ): - assert getattr(pydantic_models, schema_model, False) + assert getattr(pydantic_models, schema_model, False), schema_model @pytest.mark.parametrize( @@ -101,18 +101,26 @@ def test_schema_class_fields(gks_schema, pydantic_models): """ mapping = GKS_SCHEMA_MAPPING[gks_schema] for schema_model in mapping.concrete_classes: - schema_fields = set(mapping.schema[schema_model]["properties"]) + schema_properties = mapping.schema[schema_model]["properties"] pydantic_model = getattr(pydantic_models, schema_model) - assert set(pydantic_model.model_fields) == schema_fields, schema_model + assert set(pydantic_model.model_fields) == set(schema_properties), schema_model - # Check required fields required_schema_fields = set(mapping.schema[schema_model]["required"]) - required_pydantic_fields = { - field - for field, field_info in pydantic_model.model_fields.items() - if field_info.is_required() or isinstance(field_info.default, str) - } - assert required_pydantic_fields == required_schema_fields, schema_model + + for property, property_def in schema_properties.items(): + pydantic_model_field_info = pydantic_model.model_fields[property] + pydantic_field_required = pydantic_model_field_info.is_required() + + if property in required_schema_fields: + if property != "type": + assert pydantic_field_required, f"{pydantic_model}.{property}" + else: + assert not pydantic_field_required, f"{pydantic_model}.{property}" + + if "description" in property_def: + assert property_def["description"].replace("'", "\"") == pydantic_model_field_info.description.replace("'", "\""), f"{pydantic_model}.{property}" + else: + assert pydantic_model_field_info.description is None, f"{pydantic_model}.{property}" def test_ga4gh_keys(): From 9cab291f9e0dfeeddc9c2d17543835a809f39672 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 28 Oct 2024 12:05:10 -0400 Subject: [PATCH 7/9] Revert "rm unnecessary abc + dont make class private" This reverts commit 5ebca83d9c2436f25b3afd913187158d6797d2c9. --- src/ga4gh/core/entity_models.py | 33 +++++++++---------------- src/ga4gh/vrs/models.py | 43 +++++++++++++-------------------- 2 files changed, 29 insertions(+), 47 deletions(-) diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index 406c2d65..1631d266 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -1,6 +1,7 @@ """GKS Common Library Data Type and Entity models""" from __future__ import annotations +from abc import ABC import datetime import logging from typing import Any, Dict, Annotated, Literal, Optional, Union, List @@ -149,7 +150,7 @@ class Extension(BaseModel): ######################################### -class Entity(BaseModel): +class Entity(BaseModel, ABC): """Anything that exists, has existed, or will exist. Abstract base class to be extended by other classes. Do NOT instantiate directly. @@ -172,14 +173,12 @@ class Entity(BaseModel): extensions: Optional[List[Extension]] = Field(None, description="A list of extensions to the Entity, that allow for capture of information not directly supported by elements defined in the model.") -class DomainEntity(Entity): +class DomainEntity(Entity, ABC): """An Entity that is specific to a particular biomedical domain such as disease, therapeutics, or genes. Domain Entities are considered as 'concept-level' entities, as opposed to particular instances. e.g. 'Lung Cancer', not 'patient123's lung cancer'. Or 'Erlotinib', not the particular doses given to a patient on a specific occasion. - - Abstract base class to be extended by other classes. Do NOT instantiate directly. """ mappings: Optional[List[ConceptMapping]] = Field(None, description="A list of mappings to concepts in terminologies or code systems. Each mapping should include a coding and a relation.") @@ -196,10 +195,10 @@ class Agent(Entity): subtype: Optional[AgentSubtype] = Field(None, description="A specific type of agent the Agent object represents. Must be one of {person, organization, software}.") -class ActivityBase(Entity): +class ActivityBase(Entity, ABC): """Internal base class that holds shared fields for Activity model. - Abstract base class to be extended by other classes. Do NOT instantiate directly. + This class should not be used directly. """ subtype: Optional[Coding] = Field(None, description="A specific type of activity the Activity instance represents.") @@ -241,10 +240,10 @@ class Contribution(ActivityBase): activityType: Optional[Coding] = Field(None, description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc. . Values of this property may be framed as activities or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).") -class InformationEntityBase(Entity): +class InformationEntityBase(Entity, ABC): """Internal base class that holds shared fields for InformationEntity model. - Abstract base class to be extended by other classes. Do NOT instantiate directly. + This class should not be used directly. """ type: Literal["InformationEntity"] = Field(CoreImType.INFORMATION_ENTITY.value, description=f"MUST be {CoreImType.INFORMATION_ENTITY.value}.") @@ -338,11 +337,8 @@ class EvidenceLine(InformationEntity): scoreOfEvidenceProvided: Optional[float] = Field(None, description="A quantitative score indicating the strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.") -class StatementBase(InformationEntity): - """Internal base class that holds shared fields for Statement model. - - Abstract base class to be extended by other classes. Do NOT instantiate directly. - """ +class StatementBase(InformationEntity, ABC): + """Internal base class that holds shared fields for Statement model.""" predicate: str = Field(..., description="The relationship declared to hold between the subject and the object of the Statement.") direction: Optional[Direction] = Field(None, description="A term indicating whether the Statement supports, disputes, or remains neutral w.r.t. the validity of the Proposition it evaluates.") @@ -387,23 +383,18 @@ class Characteristic(BaseModel): valueOperator: Optional[bool] = Field(None, description="An operation that defines how to logically interpret a set of more than one Characteristic values ('AND', 'OR', 'NOT')") -class StudyResultBase(InformationEntityBase): - """Internal base class that holds shared fields for StudyResult model. - - Abstract base class to be extended by other classes. Do NOT instantiate directly. - """ +class StudyResultBase(InformationEntityBase, ABC): + """Internal base class that holds shared fields for StudyResult model.""" sourceDataSet: Optional[List[DataSet]] = Field(None, description="A larger DataSet from which the content of the StudyResult was derived.", max_length=1) ancillaryResults: Optional[Dict] = None qualityMeasures: Optional[Dict] = None -class StudyResult(InformationEntityBase): +class StudyResult(InformationEntityBase, ABC): """A collection of data items from a single study that pertain to a particular subject or experimental unit in the study, along with optional provenance information describing how these data items were generated. - - Abstract base class to be extended by other classes. Do NOT instantiate directly. """ focus: Optional[Union[DomainEntity, Coding, IRI]] = Field(None, description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.") diff --git a/src/ga4gh/vrs/models.py b/src/ga4gh/vrs/models.py index 8e924917..8cb796dd 100644 --- a/src/ga4gh/vrs/models.py +++ b/src/ga4gh/vrs/models.py @@ -10,6 +10,7 @@ * `import ga4gh.vrs`, and refer to models using the fully-qualified module name, e.g., `ga4gh.vrs.models.Allele` """ +from abc import ABC from typing import List, Literal, Optional, Union, Dict, Annotated from collections import OrderedDict from enum import Enum @@ -197,7 +198,7 @@ class Syntax(str, Enum): def _recurse_ga4gh_serialize(obj): - if isinstance(obj, Ga4ghIdentifiableObject): + if isinstance(obj, _Ga4ghIdentifiableObject): return obj.get_or_create_digest() elif isinstance(obj, _ValueObject): return obj.ga4gh_serialize() @@ -211,11 +212,9 @@ def _recurse_ga4gh_serialize(obj): return obj -class _ValueObject(Entity): +class _ValueObject(Entity, ABC): """A contextual value whose equality is based on value, not identity. See https://en.wikipedia.org/wiki/Value_object for more on Value Objects. - - Abstract base class to be extended by other classes. Do NOT instantiate directly. """ def __hash__(self): @@ -236,12 +235,10 @@ def is_ga4gh_identifiable(): return False -class Ga4ghIdentifiableObject(_ValueObject): +class _Ga4ghIdentifiableObject(_ValueObject, ABC): """A contextual value object for which a GA4GH computed identifier can be created. All GA4GH Identifiable Objects may have computed digests from the VRS Computed Identifier algorithm. - - Abstract base class to be extended by other classes. Do NOT instantiate directly. """ type: str @@ -483,7 +480,7 @@ class ga4gh(_ValueObject.ga4gh): ] -class SequenceLocation(Ga4ghIdentifiableObject): +class SequenceLocation(_Ga4ghIdentifiableObject): """A `Location` defined by an interval on a referenced `Sequence`.""" type: Literal["SequenceLocation"] = Field(VrsType.SEQ_LOC.value, description=f'MUST be "{VrsType.SEQ_LOC.value}"') @@ -539,7 +536,7 @@ def get_refget_accession(self): else: return None - class ga4gh(Ga4ghIdentifiableObject.ga4gh): + class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = 'SL' priorPrefix = {PrevVrsVersion.V1_3.value: 'VSL'} keys = [ @@ -554,11 +551,8 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): ######################################### -class _VariationBase(Ga4ghIdentifiableObject): - """Base class for variation - - Abstract base class to be extended by other classes. Do NOT instantiate directly. - """ +class _VariationBase(_Ga4ghIdentifiableObject, ABC): + """Base class for variation""" expressions: Optional[List[Expression]] = None @@ -600,7 +594,7 @@ def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion): return f'{{"location":"{location_digest}","state":{{"sequence":"{sequence}","type":"LiteralSequenceExpression"}},"type":"Allele"}}' - class ga4gh(Ga4ghIdentifiableObject.ga4gh): + class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = 'VA' priorPrefix = {PrevVrsVersion.V1_3.value: 'VA'} keys = [ @@ -626,7 +620,7 @@ def ga4gh_serialize(self) -> Dict: out["members"] = sorted(out["members"]) return out - class ga4gh(Ga4ghIdentifiableObject.ga4gh): + class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = 'CPB' keys = [ 'members', @@ -658,7 +652,7 @@ class Adjacency(_VariationBase): ) homology: Optional[bool] = Field(None, description="A flag indicating if coordinate ambiguity in the adjoined sequences is from sequence homology (true) or other uncertainty, such as instrument ambiguity (false).") - class ga4gh(Ga4ghIdentifiableObject.ga4gh): + class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = 'AJ' keys = [ 'adjoinedSequences', @@ -676,7 +670,7 @@ class Terminus(_VariationBase): type: Literal["Terminus"] = Field(VrsType.TERMINUS.value, description=f'MUST be "{VrsType.TERMINUS.value}".') location: Union[IRI, SequenceLocation] = Field(..., description="The location of the terminus.") - class ga4gh(Ga4ghIdentifiableObject.ga4gh): + class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = "TM" keys = [ "location", @@ -722,7 +716,7 @@ class DerivativeMolecule(_VariationBase): ) circular: Optional[bool] = Field(None, description="A boolean indicating whether the molecule represented by the sequence is circular (true) or linear (false).") - class ga4gh(Ga4ghIdentifiableObject.ga4gh): + class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = "DM" keys = [ "components", @@ -735,11 +729,8 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): ######################################### -class _CopyNumber(_VariationBase): - """A measure of the copies of a `Location` within a system (e.g. genome, cell, etc.) - - Abstract base class to be extended by other classes. Do NOT instantiate directly. - """ +class _CopyNumber(_VariationBase, ABC): + """A measure of the copies of a `Location` within a system (e.g. genome, cell, etc.)""" location: Union[IRI, SequenceLocation] = Field( ..., @@ -757,7 +748,7 @@ class CopyNumberCount(_CopyNumber): ..., description='The integral number of copies of the subject in a system' ) - class ga4gh(Ga4ghIdentifiableObject.ga4gh): + class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = 'CN' keys = [ 'copies', @@ -779,7 +770,7 @@ class CopyNumberChange(_CopyNumber): description='MUST be a Coding representing one of "EFO:0030069" (complete genomic loss), "EFO:0020073" (high-level loss), "EFO:0030068" (low-level loss), "EFO:0030067" (loss), "EFO:0030064" (regional base ploidy), "EFO:0030070" (gain), "EFO:0030071" (low-level gain), "EFO:0030072" (high-level gain).', ) - class ga4gh(Ga4ghIdentifiableObject.ga4gh): + class ga4gh(_Ga4ghIdentifiableObject.ga4gh): prefix = 'CX' keys = [ 'copyChange', From 912301069dbf2b780f288859b1a8d3cfecabe32a Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 29 Oct 2024 09:33:03 -0400 Subject: [PATCH 8/9] remove duplicate test module --- tests/{ => validation}/test_schemas.py | 2 +- tests/validation/test_vrs_schema.py | 65 -------------------------- 2 files changed, 1 insertion(+), 66 deletions(-) rename tests/{ => validation}/test_schemas.py (98%) delete mode 100644 tests/validation/test_vrs_schema.py diff --git a/tests/test_schemas.py b/tests/validation/test_schemas.py similarity index 98% rename from tests/test_schemas.py rename to tests/validation/test_schemas.py index b648c7cf..87c588ac 100644 --- a/tests/test_schemas.py +++ b/tests/validation/test_schemas.py @@ -51,7 +51,7 @@ def _update_gks_schema_mapping( GKS_SCHEMA_MAPPING = {gks: GKSSchemaMapping() for gks in GKSSchema} -SUBMODULES_DIR = Path(__file__).parents[1] / "submodules" / "vrs" +SUBMODULES_DIR = Path(__file__).parents[2] / "submodules" / "vrs" # Get vrs classes diff --git a/tests/validation/test_vrs_schema.py b/tests/validation/test_vrs_schema.py deleted file mode 100644 index a42853a4..00000000 --- a/tests/validation/test_vrs_schema.py +++ /dev/null @@ -1,65 +0,0 @@ -"""test that VRS Python model structures match VRS Schema -""" -import json -from pathlib import Path - -from ga4gh.vrs import models - -ROOT_DIR = Path(__file__).parents[2] -VRS_SCHEMA_DIR = ROOT_DIR / 'submodules' / 'vrs' / 'schema' / 'vrs' / 'json' -VRS_SCHEMA = {} - -VRS_CONCRETE_CLASSES = set() -VRS_PRIMITIVES = set() - -for f in VRS_SCHEMA_DIR.glob("*"): - with open(f, "r") as rf: - cls_def = json.load(rf) - - vrs_class = cls_def["title"] - VRS_SCHEMA[vrs_class] = cls_def - if "properties" in cls_def: - VRS_CONCRETE_CLASSES.add(vrs_class) - elif cls_def.get("type") in {"array", "int", "str"}: - VRS_PRIMITIVES.add(vrs_class) - - -NOT_IMPLEMENTED = ['Adjacency', 'Haplotype'] # Use this to skip testing of not-implemented classes - # TODO: Remove this once 2.0 models at beta - - -def test_schema_models_exist(): - """test that VRS Python covers the models defined by VRS - """ - for vrs_class in VRS_CONCRETE_CLASSES | VRS_PRIMITIVES: - if vrs_class in NOT_IMPLEMENTED: - continue - assert getattr(models, vrs_class, False) - - -def test_schema_class_fields_are_valid(): - """test that VRS Python model fields match the VRS specification - """ - for vrs_class in VRS_CONCRETE_CLASSES: - if vrs_class in NOT_IMPLEMENTED: - continue - schema_fields = set(VRS_SCHEMA[vrs_class]['properties']) - pydantic_model = getattr(models, vrs_class) - assert set(pydantic_model.model_fields) == schema_fields, vrs_class - - -def test_model_keys_are_valid(): - """test that digest keys on Value Objects are valid and sorted - """ - for vrs_class in VRS_CONCRETE_CLASSES: - if vrs_class in NOT_IMPLEMENTED: - continue - if VRS_SCHEMA[vrs_class].get('ga4ghDigest', {}).get('keys', None) is None: - continue - pydantic_model = getattr(models, vrs_class) - try: - pydantic_model_digest_keys = pydantic_model.ga4gh.keys - except AttributeError: - raise AttributeError(vrs_class) - assert set(pydantic_model_digest_keys) == set(VRS_SCHEMA[vrs_class]['ga4ghDigest']['keys']), vrs_class - assert pydantic_model_digest_keys == sorted(pydantic_model.ga4gh.keys), vrs_class From 9e78fcaf1ee6810d7c164cb72848f3367e3608ea Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 29 Oct 2024 13:56:12 -0400 Subject: [PATCH 9/9] ruff cleanup --- tests/validation/test_schemas.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/validation/test_schemas.py b/tests/validation/test_schemas.py index 87c588ac..ee0b3783 100644 --- a/tests/validation/test_schemas.py +++ b/tests/validation/test_schemas.py @@ -25,7 +25,7 @@ class GKSSchemaMapping(BaseModel): base_classes: set = set() concrete_classes: set = set() primitives: set = set() - schema: dict = dict() + schema: dict = {} def _update_gks_schema_mapping( @@ -71,7 +71,7 @@ def _update_gks_schema_mapping( @pytest.mark.parametrize( - "gks_schema,pydantic_models", + ("gks_schema", "pydantic_models"), [ (GKSSchema.VRS, vrs_models), (GKSSchema.CORE_IM, entity_models), @@ -88,7 +88,7 @@ def test_schema_models_in_pydantic(gks_schema, pydantic_models): @pytest.mark.parametrize( - "gks_schema,pydantic_models", + ("gks_schema", "pydantic_models"), [ (GKSSchema.VRS, vrs_models), (GKSSchema.CORE_IM, entity_models), @@ -107,20 +107,20 @@ def test_schema_class_fields(gks_schema, pydantic_models): required_schema_fields = set(mapping.schema[schema_model]["required"]) - for property, property_def in schema_properties.items(): - pydantic_model_field_info = pydantic_model.model_fields[property] + for prop, property_def in schema_properties.items(): + pydantic_model_field_info = pydantic_model.model_fields[prop] pydantic_field_required = pydantic_model_field_info.is_required() - if property in required_schema_fields: - if property != "type": - assert pydantic_field_required, f"{pydantic_model}.{property}" + if prop in required_schema_fields: + if prop != "type": + assert pydantic_field_required, f"{pydantic_model}.{prop}" else: - assert not pydantic_field_required, f"{pydantic_model}.{property}" + assert not pydantic_field_required, f"{pydantic_model}.{prop}" if "description" in property_def: - assert property_def["description"].replace("'", "\"") == pydantic_model_field_info.description.replace("'", "\""), f"{pydantic_model}.{property}" + assert property_def["description"].replace("'", "\"") == pydantic_model_field_info.description.replace("'", "\""), f"{pydantic_model}.{prop}" else: - assert pydantic_model_field_info.description is None, f"{pydantic_model}.{property}" + assert pydantic_model_field_info.description is None, f"{pydantic_model}.{prop}" def test_ga4gh_keys():