diff --git a/.gitignore b/.gitignore index a8561da..14151f8 100644 --- a/.gitignore +++ b/.gitignore @@ -110,6 +110,9 @@ ENV/ env.bak/ venv.bak/ +# IDEs +.vscode/ + # Spyder project settings .spyderproject .spyproject diff --git a/.requirements.txt b/.requirements.txt index e797df6..43de4b7 100644 --- a/.requirements.txt +++ b/.requirements.txt @@ -1,7 +1,7 @@ pytest -sphinx==3.5.4 +sphinx ~= 7.2 +sphinx-rtd-theme ~= 1.2 pyyaml -sphinx-rtd-theme -ga4gh.gks.metaschema==0.3.0b1 -python-jsonschema-objects>=0.3.13 -jsonschema==3.2.0 \ No newline at end of file +ga4gh.gks.metaschema==0.3.0b6 +jsonschema +referencing \ No newline at end of file diff --git a/schema/Makefile b/schema/Makefile index 4ed39b3..abb6035 100644 --- a/schema/Makefile +++ b/schema/Makefile @@ -1,22 +1,5 @@ -.PHONY: defs FORCE -.SECONDARY: -.SUFFIXES: -.DELETE_ON_ERROR: +all: + (cd catvrs; make all) -JSYAMLS:=catvrs.yaml -JSONS:=${JSYAMLS:.yaml=.json} - -all: ${JSONS} merged.json defs - -%.json: %.yaml - jsy2js.py <$< >$@ - -%.yaml: %-source.yaml - source2jsy.py $< >$@ - -merged.yaml: catvrs-source.yaml - source2mergedjsy.py $< >$@ - -defs: - rm -rf defs - y2t.py catvrs-source.yaml +clean: + (cd catvrs; make clean) \ No newline at end of file diff --git a/schema/catvrs.json b/schema/catvrs.json deleted file mode 100644 index b24645f..0000000 --- a/schema/catvrs.json +++ /dev/null @@ -1,355 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft/2020-12/schema", - "title": "GA4GH-Cat-VRS-Definitions", - "type": "object", - "$defs": { - "CategoricalVariation": { - "description": "A representation of a categorically-defined domain for variation, in which individual contextual variation instances may be members of the domain.", - "oneOf": [ - { - "$ref": "#/$defs/CanonicalAllele" - }, - { - "$ref": "#/$defs/CategoricalCnv" - }, - { - "$ref": "#/$defs/DescribedVariation" - }, - { - "$ref": "#/$defs/ProteinSequenceConsequence" - } - ] - }, - "ProteinSequenceConsequence": { - "maturity": "draft", - "type": "object", - "description": "A change that occurs in a protein sequence as a result of genomic changes. Due to the degenerate nature of the genetic code, there are often several genomic changes that can cause a protein sequence consequence. The protein sequence consequence, like a CanonicalAllele, is defined by an `Allele ` that is representative of a collection of congruent Protein Alleles that share the same altered codon(s).", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "gks.common.json#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "gks.common.json#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "ProteinSequenceConsequence", - "default": "ProteinSequenceConsequence", - "description": "MUST be \"ProteinSequenceConsequence\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "members": { - "type": "array", - "ordered": false, - "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - "items": { - "oneOf": [ - { - "$ref": "gks.common.json#/$defs/IRI" - }, - { - "$ref": "vrs.json#/$defs/Variation" - } - ] - } - }, - "definingContext": { - "oneOf": [ - { - "$ref": "gks.common.json#/$defs/IRI" - }, - { - "$ref": "vrs.json#/$defs/Allele" - } - ], - "description": "The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) object that is congruent with (projects to the same codons) as alleles on other protein reference sequences." - } - }, - "required": [ - "definingContext", - "type" - ], - "additionalProperties": false - }, - "CanonicalAllele": { - "maturity": "draft", - "description": "A canonical allele is defined by an `Allele ` that is representative of a collection of congruent Alleles, each of which depict the same nucleic acid change on different underlying reference sequences. Congruent representations of an Allele often exist across different genome assemblies and associated cDNA transcript representations.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "gks.common.json#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "gks.common.json#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "CanonicalAllele", - "default": "CanonicalAllele", - "description": "MUST be \"CanonicalAllele\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "members": { - "type": "array", - "ordered": false, - "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - "items": { - "oneOf": [ - { - "$ref": "gks.common.json#/$defs/IRI" - }, - { - "$ref": "vrs.json#/$defs/Variation" - } - ] - } - }, - "definingContext": { - "oneOf": [ - { - "$ref": "gks.common.json#/$defs/IRI" - }, - { - "$ref": "vrs.json#/$defs/Allele" - } - ], - "description": "The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) object that is congruent with variants on alternate reference sequences." - } - }, - "required": [ - "definingContext", - "type" - ], - "additionalProperties": false - }, - "CategoricalCnv": { - "maturity": "draft", - "type": "object", - "description": "A categorical variation domain is defined first by a sequence derived from a canonical [Location ](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#Location) , which is representative of a collection of congruent Locations. The change or count of this sequence is also described, either by a numeric value (e.g. \"3 or more copies\") or categorical representation (e.g. \"high-level gain\"). Categorical CNVs may optionally be defined by rules specifying the location match characteristics for member CNVs.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "gks.common.json#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "gks.common.json#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "CategoricalCnv", - "default": "CategoricalCnv", - "description": "MUST be \"CategoricalCnv\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "members": { - "type": "array", - "ordered": false, - "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - "items": { - "oneOf": [ - { - "$ref": "gks.common.json#/$defs/IRI" - }, - { - "$ref": "vrs.json#/$defs/Variation" - } - ] - } - }, - "location": { - "type": { - "$ref": "vrs.json#/$defs/Location" - }, - "description": "A [VRS Location](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#location) object that represents a sequence derived from that location, and is congruent with locations on alternate reference sequences." - }, - "locationMatchCharacteristic": { - "type": "string", - "enum": [ - "exact", - "partial", - "subinterval", - "superinterval" - ], - "description": "The characteristics of a valid match between a contextual CNV location (the query) and the Categorical CNV location (the domain), when both query and domain are represented on the same reference sequence. An `exact` match requires the location of the query and domain to be identical. A `subinterval` match requires the query to be a subinterval of the domain. A `superinterval` match requires the query to be a superinterval of the domain. A `partial` match requires at least 1 residue of overlap between the query and domain." - }, - "copyChange": { - "type": "string", - "enum": [ - "efo:0030069", - "efo:0020073", - "efo:0030068", - "efo:0030067", - "efo:0030064", - "efo:0030070", - "efo:0030071", - "efo:0030072" - ], - "description": "A representation of the change in copies of a sequence in a system. MUST be one of \"efo:0030069\" (complete genomic loss), \"efo:0020073\" (high-level loss), \"efo:0030068\" (low-level loss), \"efo:0030067\" (loss), \"efo:0030064\" (regional base ploidy), \"efo:0030070\" (gain), \"efo:0030071\" (low-level gain), \"efo:0030072\" (high-level gain)." - }, - "copies": { - "oneOf": [ - { - "$ref": "vrs.json#/$defs/Range" - }, - { - "type": "integer" - } - ], - "description": "The integral number of copies of the subject in a system." - } - }, - "required": [ - "location", - "type" - ], - "additionalProperties": false - }, - "DescribedVariation": { - "maturity": "draft", - "type": "object", - "description": "Some categorical variation concepts are supported by custom nomenclatures or text-descriptive representations for which a categorical variation model does not exist. DescribedVariation is a class that adds requirements and contextual semantics to the `label` and `description` fields to indicate how a categorical variation concept should be evaluated for matching variants.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "gks.common.json#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "gks.common.json#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "DescribedVariation", - "default": "DescribedVariation", - "description": "MUST be \"DescribedVariation\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "members": { - "type": "array", - "ordered": false, - "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - "items": { - "oneOf": [ - { - "$ref": "gks.common.json#/$defs/IRI" - }, - { - "$ref": "vrs.json#/$defs/Variation" - } - ] - } - }, - "label": { - "type": "string", - "description": "A primary label for the categorical variation. This required property should provide a short and descriptive textual representation of the concept." - }, - "description": { - "type": "string", - "description": "A textual description of the domain of variation that should match the categorical variation entity." - } - }, - "required": [ - "label", - "type" - ], - "additionalProperties": false - } - } -} \ No newline at end of file diff --git a/schema/catvrs.yaml b/schema/catvrs.yaml deleted file mode 100644 index f391455..0000000 --- a/schema/catvrs.yaml +++ /dev/null @@ -1,293 +0,0 @@ -$schema: http://json-schema.org/draft/2020-12/schema -title: GA4GH-Cat-VRS-Definitions -type: object -$defs: - CategoricalVariation: - description: A representation of a categorically-defined domain for variation, - in which individual contextual variation instances may be members of the domain. - oneOf: - - $ref: '#/$defs/CanonicalAllele' - - $ref: '#/$defs/CategoricalCnv' - - $ref: '#/$defs/DescribedVariation' - - $ref: '#/$defs/ProteinSequenceConsequence' - ProteinSequenceConsequence: - maturity: draft - type: object - description: A change that occurs in a protein sequence as a result of genomic - changes. Due to the degenerate nature of the genetic code, there are often several - genomic changes that can cause a protein sequence consequence. The protein sequence - consequence, like a CanonicalAllele, is defined by an `Allele ` - that is representative of a collection of congruent Protein Alleles that share - the same altered codon(s). - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: gks.common.json#/$defs/Extension - mappings: - type: array - ordered: false - items: - $ref: gks.common.json#/$defs/Mapping - type: - type: string - const: ProteinSequenceConsequence - default: ProteinSequenceConsequence - description: MUST be "ProteinSequenceConsequence" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - members: - type: array - ordered: false - description: A non-exhaustive list of VRS variation contexts that satisfy - the constraints of this categorical variant. - items: - oneOf: - - $ref: gks.common.json#/$defs/IRI - - $ref: vrs.json#/$defs/Variation - definingContext: - oneOf: - - $ref: gks.common.json#/$defs/IRI - - $ref: vrs.json#/$defs/Allele - description: The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) - object that is congruent with (projects to the same codons) as alleles on - other protein reference sequences. - required: - - definingContext - - type - additionalProperties: false - CanonicalAllele: - maturity: draft - description: A canonical allele is defined by an `Allele ` that - is representative of a collection of congruent Alleles, each of which depict - the same nucleic acid change on different underlying reference sequences. Congruent - representations of an Allele often exist across different genome assemblies - and associated cDNA transcript representations. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: gks.common.json#/$defs/Extension - mappings: - type: array - ordered: false - items: - $ref: gks.common.json#/$defs/Mapping - type: - type: string - const: CanonicalAllele - default: CanonicalAllele - description: MUST be "CanonicalAllele" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - members: - type: array - ordered: false - description: A non-exhaustive list of VRS variation contexts that satisfy - the constraints of this categorical variant. - items: - oneOf: - - $ref: gks.common.json#/$defs/IRI - - $ref: vrs.json#/$defs/Variation - definingContext: - oneOf: - - $ref: gks.common.json#/$defs/IRI - - $ref: vrs.json#/$defs/Allele - description: The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) - object that is congruent with variants on alternate reference sequences. - required: - - definingContext - - type - additionalProperties: false - CategoricalCnv: - maturity: draft - type: object - description: A categorical variation domain is defined first by a sequence derived - from a canonical [Location ](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#Location) - , which is representative of a collection of congruent Locations. The change - or count of this sequence is also described, either by a numeric value (e.g. - "3 or more copies") or categorical representation (e.g. "high-level gain"). Categorical - CNVs may optionally be defined by rules specifying the location match characteristics - for member CNVs. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: gks.common.json#/$defs/Extension - mappings: - type: array - ordered: false - items: - $ref: gks.common.json#/$defs/Mapping - type: - type: string - const: CategoricalCnv - default: CategoricalCnv - description: MUST be "CategoricalCnv" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - members: - type: array - ordered: false - description: A non-exhaustive list of VRS variation contexts that satisfy - the constraints of this categorical variant. - items: - oneOf: - - $ref: gks.common.json#/$defs/IRI - - $ref: vrs.json#/$defs/Variation - location: - type: - $ref: vrs.json#/$defs/Location - description: A [VRS Location](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#location) - object that represents a sequence derived from that location, and is congruent - with locations on alternate reference sequences. - locationMatchCharacteristic: - type: string - enum: - - exact - - partial - - subinterval - - superinterval - description: The characteristics of a valid match between a contextual CNV - location (the query) and the Categorical CNV location (the domain), when - both query and domain are represented on the same reference sequence. An - `exact` match requires the location of the query and domain to be identical. A - `subinterval` match requires the query to be a subinterval of the domain. - A `superinterval` match requires the query to be a superinterval of the - domain. A `partial` match requires at least 1 residue of overlap between - the query and domain. - copyChange: - type: string - enum: - - efo:0030069 - - efo:0020073 - - efo:0030068 - - efo:0030067 - - efo:0030064 - - efo:0030070 - - efo:0030071 - - efo:0030072 - description: A representation of the change in copies of a sequence in a system. - MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" (high-level - loss), "efo:0030068" (low-level loss), "efo:0030067" (loss), "efo:0030064" - (regional base ploidy), "efo:0030070" (gain), "efo:0030071" (low-level gain), - "efo:0030072" (high-level gain). - copies: - oneOf: - - $ref: vrs.json#/$defs/Range - - type: integer - description: The integral number of copies of the subject in a system. - required: - - location - - type - additionalProperties: false - DescribedVariation: - maturity: draft - type: object - description: Some categorical variation concepts are supported by custom nomenclatures - or text-descriptive representations for which a categorical variation model - does not exist. DescribedVariation is a class that adds requirements and contextual - semantics to the `label` and `description` fields to indicate how a categorical - variation concept should be evaluated for matching variants. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - extensions: - type: array - ordered: true - items: - $ref: gks.common.json#/$defs/Extension - mappings: - type: array - ordered: false - items: - $ref: gks.common.json#/$defs/Mapping - type: - type: string - const: DescribedVariation - default: DescribedVariation - description: MUST be "DescribedVariation" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - members: - type: array - ordered: false - description: A non-exhaustive list of VRS variation contexts that satisfy - the constraints of this categorical variant. - items: - oneOf: - - $ref: gks.common.json#/$defs/IRI - - $ref: vrs.json#/$defs/Variation - label: - type: string - description: A primary label for the categorical variation. This required - property should provide a short and descriptive textual representation - of the concept. - description: - type: string - description: A textual description of the domain of variation that should - match the categorical variation entity. - required: - - label - - type - additionalProperties: false diff --git a/schema/catvrs/.gitignore b/schema/catvrs/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/schema/catvrs/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/schema/catvrs/Makefile b/schema/catvrs/Makefile new file mode 100644 index 0000000..30a68ba --- /dev/null +++ b/schema/catvrs/Makefile @@ -0,0 +1,40 @@ +.SECONDARY: +.DELETE_ON_ERROR: + +BUILD_DIR := build +IMPORT_DIR := import + +vpath %tag $(BUILD_DIR) +vpath %.classes $(BUILD_DIR) + +SOURCES := $(wildcard *-source.yaml) +TAGS := $(SOURCES:-source.yaml=.tag) +IMPORTS := $(wildcard $(IMPORT_DIR)/*-source.yaml) + +all: build-ordered ${TAGS} prune-tag + +build-ordered: | $(BUILD_DIR) + +$(BUILD_DIR): + mkdir $(BUILD_DIR) + +%.tag: %.classes %.json-tag %.defs-tag + touch $(BUILD_DIR)/$@ + +%.classes: %-source.yaml + source2classes.py $< >${BUILD_DIR}/$@ + +%.json-tag: %-source.yaml ${IMPORTS} + source2splitjs.py $< + touch $(BUILD_DIR)/$@ + +%.defs-tag: %-source.yaml ${IMPORTS} + y2t.py $< + touch $(BUILD_DIR)/$@ + +prune-tag: ${TAGS} + $(MAKE) -f prune.mk + touch $(BUILD_DIR)/$@ + +clean: + rm $(BUILD_DIR)/* diff --git a/schema/catvrs-source.yaml b/schema/catvrs/catvrs-source.yaml similarity index 97% rename from schema/catvrs-source.yaml rename to schema/catvrs/catvrs-source.yaml index 72b6d46..0fffe45 100644 --- a/schema/catvrs-source.yaml +++ b/schema/catvrs/catvrs-source.yaml @@ -1,15 +1,16 @@ -$schema: http://json-schema.org/draft/2020-12/schema +$schema: "https://json-schema.org/draft/2020-12/schema" +$id: "https://w3id.org/ga4gh/schema/catvrs/1.x/catvrs-source.yaml" title: GA4GH-Cat-VRS-Definitions type: object strict: true imports: - gks.core: gks.common-source.yaml - vrs: vrs-source.yaml + gks.core: ../gks-common/core-source.yaml + vrs: ../vrs/vrs-source.yaml namespaces: - vrs: vrs.json#/$defs/ - gks.core: gks.common.json#/$defs/ + vrs: ../vrs/vrs.yaml#/$defs/ + gks.core: ../gks-common/core.yaml#/$defs/ $defs: # VRSATILE Categorical Variation are presented top-down. Everything rolls up to diff --git a/schema/catvrs/def/CanonicalAllele.rst b/schema/catvrs/def/CanonicalAllele.rst new file mode 100644 index 0000000..e352213 --- /dev/null +++ b/schema/catvrs/def/CanonicalAllele.rst @@ -0,0 +1,54 @@ +**Computational Definition** + +A canonical allele is defined by an `Allele ` that is representative of a collection of congruent Alleles, each of which depict the same nucleic acid change on different underlying reference sequences. Congruent representations of an Allele often exist across different genome assemblies and associated cDNA transcript representations. + + **Information Model** + +Some CanonicalAllele attributes are inherited from :ref:`CategoricalVariation`. + + .. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - id + - string + - 0..1 + - The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE). + * - label + - string + - 0..1 + - A primary label for the entity. + * - description + - string + - 0..1 + - A free-text description of the entity. + * - extensions + - `Extension <../gks-common/core.json#/$defs/Extension>`_ + - 0..m + - + * - mappings + - `Mapping <../gks-common/core.json#/$defs/Mapping>`_ + - 0..m + - + * - type + - string + - 1..1 + - MUST be "CanonicalAllele" + * - aliases + - string + - 0..m + - Aliases are alternate labels for a Domain Entity. + * - members + - `Variation <../vrs/vrs.yaml#/$defs/Variation>`_ | `IRI <../gks-common/core.yaml#/$defs/IRI>`_ + - 0..m + - A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant. + * - definingContext + - `Allele <../vrs/vrs.yaml#/$defs/Allele>`_ | `IRI <../gks-common/core.yaml#/$defs/IRI>`_ + - 1..1 + - The `VRS Allele `_ object that is congruent with variants on alternate reference sequences. diff --git a/schema/catvrs/def/CategoricalCnv.rst b/schema/catvrs/def/CategoricalCnv.rst new file mode 100644 index 0000000..b8bbee1 --- /dev/null +++ b/schema/catvrs/def/CategoricalCnv.rst @@ -0,0 +1,66 @@ +**Computational Definition** + +A categorical variation domain is defined first by a sequence derived from a canonical `Location `_ , which is representative of a collection of congruent Locations. The change or count of this sequence is also described, either by a numeric value (e.g. "3 or more copies") or categorical representation (e.g. "high-level gain"). Categorical CNVs may optionally be defined by rules specifying the location match characteristics for member CNVs. + + **Information Model** + +Some CategoricalCnv attributes are inherited from :ref:`CategoricalVariation`. + + .. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - id + - string + - 0..1 + - The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE). + * - label + - string + - 0..1 + - A primary label for the entity. + * - description + - string + - 0..1 + - A free-text description of the entity. + * - extensions + - `Extension <../gks-common/core.json#/$defs/Extension>`_ + - 0..m + - + * - mappings + - `Mapping <../gks-common/core.json#/$defs/Mapping>`_ + - 0..m + - + * - type + - string + - 1..1 + - MUST be "CategoricalCnv" + * - aliases + - string + - 0..m + - Aliases are alternate labels for a Domain Entity. + * - members + - `Variation <../vrs/vrs.yaml#/$defs/Variation>`_ | `IRI <../gks-common/core.yaml#/$defs/IRI>`_ + - 0..m + - A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant. + * - location + - {'$ref': '../vrs/vrs.yaml#/$defs/Location'} + - 1..1 + - A `VRS Location `_ object that represents a sequence derived from that location, and is congruent with locations on alternate reference sequences. + * - locationMatchCharacteristic + - string + - 0..1 + - The characteristics of a valid match between a contextual CNV location (the query) and the Categorical CNV location (the domain), when both query and domain are represented on the same reference sequence. An `exact` match requires the location of the query and domain to be identical. A `subinterval` match requires the query to be a subinterval of the domain. A `superinterval` match requires the query to be a superinterval of the domain. A `partial` match requires at least 1 residue of overlap between the query and domain. + * - copyChange + - string + - 0..1 + - A representation of the change in copies of a sequence in a system. MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" (high-level loss), "efo:0030068" (low-level loss), "efo:0030067" (loss), "efo:0030064" (regional base ploidy), "efo:0030070" (gain), "efo:0030071" (low-level gain), "efo:0030072" (high-level gain). + * - copies + - integer | `Range <../vrs/vrs.yaml#/$defs/Range>`_ + - 0..1 + - The integral number of copies of the subject in a system. diff --git a/schema/catvrs/def/CategoricalVariation.rst b/schema/catvrs/def/CategoricalVariation.rst new file mode 100644 index 0000000..1577896 --- /dev/null +++ b/schema/catvrs/def/CategoricalVariation.rst @@ -0,0 +1,50 @@ +**Computational Definition** + +A representation of a categorically-defined domain for variation, in which individual contextual variation instances may be members of the domain. + + **Information Model** + +Some CategoricalVariation attributes are inherited from :ref:`gks.core:DomainEntity`. + + .. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - id + - string + - 0..1 + - The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE). + * - label + - string + - 0..1 + - A primary label for the entity. + * - description + - string + - 0..1 + - A free-text description of the entity. + * - extensions + - `Extension <../gks-common/core.json#/$defs/Extension>`_ + - 0..m + - + * - mappings + - `Mapping <../gks-common/core.json#/$defs/Mapping>`_ + - 0..m + - + * - type + - string + - 1..1 + - + * - aliases + - string + - 0..m + - Aliases are alternate labels for a Domain Entity. + * - members + - `Variation <../vrs/vrs.yaml#/$defs/Variation>`_ | `IRI <../gks-common/core.yaml#/$defs/IRI>`_ + - 0..m + - A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant. diff --git a/schema/catvrs/def/DescribedVariation.rst b/schema/catvrs/def/DescribedVariation.rst new file mode 100644 index 0000000..5a477a3 --- /dev/null +++ b/schema/catvrs/def/DescribedVariation.rst @@ -0,0 +1,50 @@ +**Computational Definition** + +Some categorical variation concepts are supported by custom nomenclatures or text-descriptive representations for which a categorical variation model does not exist. DescribedVariation is a class that adds requirements and contextual semantics to the `label` and `description` fields to indicate how a categorical variation concept should be evaluated for matching variants. + + **Information Model** + +Some DescribedVariation attributes are inherited from :ref:`CategoricalVariation`. + + .. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - id + - string + - 0..1 + - The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE). + * - extensions + - `Extension <../gks-common/core.json#/$defs/Extension>`_ + - 0..m + - + * - mappings + - `Mapping <../gks-common/core.json#/$defs/Mapping>`_ + - 0..m + - + * - type + - string + - 1..1 + - MUST be "DescribedVariation" + * - aliases + - string + - 0..m + - Aliases are alternate labels for a Domain Entity. + * - members + - `Variation <../vrs/vrs.yaml#/$defs/Variation>`_ | `IRI <../gks-common/core.yaml#/$defs/IRI>`_ + - 0..m + - A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant. + * - label + - string + - 1..1 + - A primary label for the categorical variation. This required property should provide a short and descriptive textual representation of the concept. + * - description + - string + - 0..1 + - A textual description of the domain of variation that should match the categorical variation entity. diff --git a/schema/catvrs/def/ProteinSequenceConsequence.rst b/schema/catvrs/def/ProteinSequenceConsequence.rst new file mode 100644 index 0000000..13d03bd --- /dev/null +++ b/schema/catvrs/def/ProteinSequenceConsequence.rst @@ -0,0 +1,54 @@ +**Computational Definition** + +A change that occurs in a protein sequence as a result of genomic changes. Due to the degenerate nature of the genetic code, there are often several genomic changes that can cause a protein sequence consequence. The protein sequence consequence, like a :ref:`CanonicalAllele`, is defined by an `Allele ` that is representative of a collection of congruent Protein Alleles that share the same altered codon(s). + + **Information Model** + +Some ProteinSequenceConsequence attributes are inherited from :ref:`CategoricalVariation`. + + .. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - id + - string + - 0..1 + - The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE). + * - label + - string + - 0..1 + - A primary label for the entity. + * - description + - string + - 0..1 + - A free-text description of the entity. + * - extensions + - `Extension <../gks-common/core.json#/$defs/Extension>`_ + - 0..m + - + * - mappings + - `Mapping <../gks-common/core.json#/$defs/Mapping>`_ + - 0..m + - + * - type + - string + - 1..1 + - MUST be "ProteinSequenceConsequence" + * - aliases + - string + - 0..m + - Aliases are alternate labels for a Domain Entity. + * - members + - `Variation <../vrs/vrs.yaml#/$defs/Variation>`_ | `IRI <../gks-common/core.yaml#/$defs/IRI>`_ + - 0..m + - A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant. + * - definingContext + - `Allele <../vrs/vrs.yaml#/$defs/Allele>`_ | `IRI <../gks-common/core.yaml#/$defs/IRI>`_ + - 1..1 + - The `VRS Allele `_ object that is congruent with (projects to the same codons) as alleles on other protein reference sequences. diff --git a/schema/defs/catvrs/CanonicalAllele.rst b/schema/catvrs/defs/catvrs/CanonicalAllele.rst similarity index 100% rename from schema/defs/catvrs/CanonicalAllele.rst rename to schema/catvrs/defs/catvrs/CanonicalAllele.rst diff --git a/schema/defs/catvrs/CategoricalCnv.rst b/schema/catvrs/defs/catvrs/CategoricalCnv.rst similarity index 100% rename from schema/defs/catvrs/CategoricalCnv.rst rename to schema/catvrs/defs/catvrs/CategoricalCnv.rst diff --git a/schema/defs/catvrs/CategoricalVariation.rst b/schema/catvrs/defs/catvrs/CategoricalVariation.rst similarity index 100% rename from schema/defs/catvrs/CategoricalVariation.rst rename to schema/catvrs/defs/catvrs/CategoricalVariation.rst diff --git a/schema/defs/catvrs/DescribedVariation.rst b/schema/catvrs/defs/catvrs/DescribedVariation.rst similarity index 100% rename from schema/defs/catvrs/DescribedVariation.rst rename to schema/catvrs/defs/catvrs/DescribedVariation.rst diff --git a/schema/defs/catvrs/ProteinSequenceConsequence.rst b/schema/catvrs/defs/catvrs/ProteinSequenceConsequence.rst similarity index 100% rename from schema/defs/catvrs/ProteinSequenceConsequence.rst rename to schema/catvrs/defs/catvrs/ProteinSequenceConsequence.rst diff --git a/schema/catvrs/json/CanonicalAllele b/schema/catvrs/json/CanonicalAllele new file mode 100644 index 0000000..9beb53e --- /dev/null +++ b/schema/catvrs/json/CanonicalAllele @@ -0,0 +1,81 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://w3id.org/ga4gh/schema/catvrs/1.x/json/CanonicalAllele", + "title": "CanonicalAllele", + "type": "object", + "maturity": "draft", + "description": "A canonical allele is defined by an `Allele ` that is representative of a collection of congruent Alleles, each of which depict the same nucleic acid change on different underlying reference sequences. Congruent representations of an Allele often exist across different genome assemblies and associated cDNA transcript representations.", + "properties": { + "id": { + "type": "string", + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." + }, + "label": { + "type": "string", + "description": "A primary label for the entity." + }, + "description": { + "type": "string", + "description": "A free-text description of the entity." + }, + "extensions": { + "type": "array", + "ordered": true, + "items": { + "$ref": "/ga4gh/schema/gks-common/1.x/json/Extension" + } + }, + "mappings": { + "type": "array", + "ordered": false, + "items": { + "$ref": "/ga4gh/schema/gks-common/1.x/json/Mapping" + } + }, + "type": { + "type": "string", + "const": "CanonicalAllele", + "default": "CanonicalAllele", + "description": "MUST be \"CanonicalAllele\"" + }, + "aliases": { + "description": "Aliases are alternate labels for a Domain Entity.", + "ordered": false, + "type": "array", + "items": { + "type": "string" + } + }, + "members": { + "type": "array", + "ordered": false, + "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", + "items": { + "oneOf": [ + { + "$ref": "/ga4gh/schema/gks-common/1.x/json/IRI" + }, + { + "$ref": "/ga4gh/schema/vrs/2.x/json/Variation" + } + ] + } + }, + "definingContext": { + "oneOf": [ + { + "$ref": "/ga4gh/schema/gks-common/1.x/json/IRI" + }, + { + "$ref": "/ga4gh/schema/vrs/2.x/json/Allele" + } + ], + "description": "The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) object that is congruent with variants on alternate reference sequences." + } + }, + "required": [ + "definingContext", + "type" + ], + "additionalProperties": false +} \ No newline at end of file diff --git a/schema/catvrs/json/CategoricalCnv b/schema/catvrs/json/CategoricalCnv new file mode 100644 index 0000000..a00494b --- /dev/null +++ b/schema/catvrs/json/CategoricalCnv @@ -0,0 +1,111 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://w3id.org/ga4gh/schema/catvrs/1.x/json/CategoricalCnv", + "title": "CategoricalCnv", + "type": "object", + "maturity": "draft", + "description": "A categorical variation domain is defined first by a sequence derived from a canonical [Location ](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#Location) , which is representative of a collection of congruent Locations. The change or count of this sequence is also described, either by a numeric value (e.g. \"3 or more copies\") or categorical representation (e.g. \"high-level gain\"). Categorical CNVs may optionally be defined by rules specifying the location match characteristics for member CNVs.", + "properties": { + "id": { + "type": "string", + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." + }, + "label": { + "type": "string", + "description": "A primary label for the entity." + }, + "description": { + "type": "string", + "description": "A free-text description of the entity." + }, + "extensions": { + "type": "array", + "ordered": true, + "items": { + "$ref": "/ga4gh/schema/gks-common/1.x/json/Extension" + } + }, + "mappings": { + "type": "array", + "ordered": false, + "items": { + "$ref": "/ga4gh/schema/gks-common/1.x/json/Mapping" + } + }, + "type": { + "type": "string", + "const": "CategoricalCnv", + "default": "CategoricalCnv", + "description": "MUST be \"CategoricalCnv\"" + }, + "aliases": { + "description": "Aliases are alternate labels for a Domain Entity.", + "ordered": false, + "type": "array", + "items": { + "type": "string" + } + }, + "members": { + "type": "array", + "ordered": false, + "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", + "items": { + "oneOf": [ + { + "$ref": "/ga4gh/schema/gks-common/1.x/json/IRI" + }, + { + "$ref": "/ga4gh/schema/vrs/2.x/json/Variation" + } + ] + } + }, + "location": { + "type": { + "$ref": "/ga4gh/schema/vrs/2.x/json/Location" + }, + "description": "A [VRS Location](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#location) object that represents a sequence derived from that location, and is congruent with locations on alternate reference sequences." + }, + "locationMatchCharacteristic": { + "type": "string", + "enum": [ + "exact", + "partial", + "subinterval", + "superinterval" + ], + "description": "The characteristics of a valid match between a contextual CNV location (the query) and the Categorical CNV location (the domain), when both query and domain are represented on the same reference sequence. An `exact` match requires the location of the query and domain to be identical. A `subinterval` match requires the query to be a subinterval of the domain. A `superinterval` match requires the query to be a superinterval of the domain. A `partial` match requires at least 1 residue of overlap between the query and domain." + }, + "copyChange": { + "type": "string", + "enum": [ + "efo:0030069", + "efo:0020073", + "efo:0030068", + "efo:0030067", + "efo:0030064", + "efo:0030070", + "efo:0030071", + "efo:0030072" + ], + "description": "A representation of the change in copies of a sequence in a system. MUST be one of \"efo:0030069\" (complete genomic loss), \"efo:0020073\" (high-level loss), \"efo:0030068\" (low-level loss), \"efo:0030067\" (loss), \"efo:0030064\" (regional base ploidy), \"efo:0030070\" (gain), \"efo:0030071\" (low-level gain), \"efo:0030072\" (high-level gain)." + }, + "copies": { + "oneOf": [ + { + "$ref": "/ga4gh/schema/vrs/2.x/json/Range" + }, + { + "type": "integer" + } + ], + "description": "The integral number of copies of the subject in a system." + } + }, + "required": [ + "location", + "type" + ], + "additionalProperties": false +} \ No newline at end of file diff --git a/schema/catvrs/json/CategoricalVariation b/schema/catvrs/json/CategoricalVariation new file mode 100644 index 0000000..6fb028b --- /dev/null +++ b/schema/catvrs/json/CategoricalVariation @@ -0,0 +1,21 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://w3id.org/ga4gh/schema/catvrs/1.x/json/CategoricalVariation", + "title": "CategoricalVariation", + "type": "object", + "description": "A representation of a categorically-defined domain for variation, in which individual contextual variation instances may be members of the domain.", + "oneOf": [ + { + "$ref": "/ga4gh/schema/catvrs/1.x/json/CanonicalAllele" + }, + { + "$ref": "/ga4gh/schema/catvrs/1.x/json/CategoricalCnv" + }, + { + "$ref": "/ga4gh/schema/catvrs/1.x/json/DescribedVariation" + }, + { + "$ref": "/ga4gh/schema/catvrs/1.x/json/ProteinSequenceConsequence" + } + ] +} \ No newline at end of file diff --git a/schema/catvrs/json/DescribedVariation b/schema/catvrs/json/DescribedVariation new file mode 100644 index 0000000..1a458ff --- /dev/null +++ b/schema/catvrs/json/DescribedVariation @@ -0,0 +1,70 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://w3id.org/ga4gh/schema/catvrs/1.x/json/DescribedVariation", + "title": "DescribedVariation", + "type": "object", + "maturity": "draft", + "description": "Some categorical variation concepts are supported by custom nomenclatures or text-descriptive representations for which a categorical variation model does not exist. DescribedVariation is a class that adds requirements and contextual semantics to the `label` and `description` fields to indicate how a categorical variation concept should be evaluated for matching variants.", + "properties": { + "id": { + "type": "string", + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." + }, + "extensions": { + "type": "array", + "ordered": true, + "items": { + "$ref": "/ga4gh/schema/gks-common/1.x/json/Extension" + } + }, + "mappings": { + "type": "array", + "ordered": false, + "items": { + "$ref": "/ga4gh/schema/gks-common/1.x/json/Mapping" + } + }, + "type": { + "type": "string", + "const": "DescribedVariation", + "default": "DescribedVariation", + "description": "MUST be \"DescribedVariation\"" + }, + "aliases": { + "description": "Aliases are alternate labels for a Domain Entity.", + "ordered": false, + "type": "array", + "items": { + "type": "string" + } + }, + "members": { + "type": "array", + "ordered": false, + "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", + "items": { + "oneOf": [ + { + "$ref": "/ga4gh/schema/gks-common/1.x/json/IRI" + }, + { + "$ref": "/ga4gh/schema/vrs/2.x/json/Variation" + } + ] + } + }, + "label": { + "type": "string", + "description": "A primary label for the categorical variation. This required property should provide a short and descriptive textual representation of the concept." + }, + "description": { + "type": "string", + "description": "A textual description of the domain of variation that should match the categorical variation entity." + } + }, + "required": [ + "label", + "type" + ], + "additionalProperties": false +} \ No newline at end of file diff --git a/schema/catvrs/json/ProteinSequenceConsequence b/schema/catvrs/json/ProteinSequenceConsequence new file mode 100644 index 0000000..73b7a5b --- /dev/null +++ b/schema/catvrs/json/ProteinSequenceConsequence @@ -0,0 +1,81 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://w3id.org/ga4gh/schema/catvrs/1.x/json/ProteinSequenceConsequence", + "title": "ProteinSequenceConsequence", + "type": "object", + "maturity": "draft", + "description": "A change that occurs in a protein sequence as a result of genomic changes. Due to the degenerate nature of the genetic code, there are often several genomic changes that can cause a protein sequence consequence. The protein sequence consequence, like a CanonicalAllele, is defined by an `Allele ` that is representative of a collection of congruent Protein Alleles that share the same altered codon(s).", + "properties": { + "id": { + "type": "string", + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." + }, + "label": { + "type": "string", + "description": "A primary label for the entity." + }, + "description": { + "type": "string", + "description": "A free-text description of the entity." + }, + "extensions": { + "type": "array", + "ordered": true, + "items": { + "$ref": "/ga4gh/schema/gks-common/1.x/json/Extension" + } + }, + "mappings": { + "type": "array", + "ordered": false, + "items": { + "$ref": "/ga4gh/schema/gks-common/1.x/json/Mapping" + } + }, + "type": { + "type": "string", + "const": "ProteinSequenceConsequence", + "default": "ProteinSequenceConsequence", + "description": "MUST be \"ProteinSequenceConsequence\"" + }, + "aliases": { + "description": "Aliases are alternate labels for a Domain Entity.", + "ordered": false, + "type": "array", + "items": { + "type": "string" + } + }, + "members": { + "type": "array", + "ordered": false, + "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", + "items": { + "oneOf": [ + { + "$ref": "/ga4gh/schema/gks-common/1.x/json/IRI" + }, + { + "$ref": "/ga4gh/schema/vrs/2.x/json/Variation" + } + ] + } + }, + "definingContext": { + "oneOf": [ + { + "$ref": "/ga4gh/schema/gks-common/1.x/json/IRI" + }, + { + "$ref": "/ga4gh/schema/vrs/2.x/json/Allele" + } + ], + "description": "The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) object that is congruent with (projects to the same codons) as alleles on other protein reference sequences." + } + }, + "required": [ + "definingContext", + "type" + ], + "additionalProperties": false +} \ No newline at end of file diff --git a/schema/catvrs/prune.mk b/schema/catvrs/prune.mk new file mode 100644 index 0000000..5d52ce5 --- /dev/null +++ b/schema/catvrs/prune.mk @@ -0,0 +1,11 @@ +BUILD_DIR := build +SOURCES := $(wildcard *-source.yaml) +CLASS_FILTER_FILES = $(SOURCES:%-source.yaml=${BUILD_DIR}/%.classes) +FILTER_CLASSES := $(shell cat ${CLASS_FILTER_FILES}) +FILTER_JSONS = $(FILTER_CLASSES:%=json/%) +FILTER_DEFS = $(FILTER_CLASSES:%=def/%.rst) + +.DEFAULT: prune + +prune: $(filter-out ${FILTER_JSONS} ${FILTER_DEFS},$(wildcard def/* json/*)) + $(if $^,rm $^) \ No newline at end of file diff --git a/schema/gks-common b/schema/gks-common new file mode 120000 index 0000000..a5821f2 --- /dev/null +++ b/schema/gks-common @@ -0,0 +1 @@ +../submodules/vrs/schema/gks-common \ No newline at end of file diff --git a/schema/gks.common-source.yaml b/schema/gks.common-source.yaml deleted file mode 120000 index 6906fa6..0000000 --- a/schema/gks.common-source.yaml +++ /dev/null @@ -1 +0,0 @@ -../submodules/vrs/submodules/gks-common/schema/gks.common-source.yaml \ No newline at end of file diff --git a/schema/gks.common.json b/schema/gks.common.json deleted file mode 120000 index a10d0fe..0000000 --- a/schema/gks.common.json +++ /dev/null @@ -1 +0,0 @@ -../submodules/vrs/submodules/gks-common/schema/gks.common.json \ No newline at end of file diff --git a/schema/gks.common.yaml b/schema/gks.common.yaml deleted file mode 120000 index 6c6c347..0000000 --- a/schema/gks.common.yaml +++ /dev/null @@ -1 +0,0 @@ -../submodules/vrs/submodules/gks-common/schema/gks.common.yaml \ No newline at end of file diff --git a/schema/helpers.py b/schema/helpers.py deleted file mode 100644 index e68209d..0000000 --- a/schema/helpers.py +++ /dev/null @@ -1,28 +0,0 @@ -"""miscellaneous functions for vrs-python - -""" - -import logging - -_logger = logging.getLogger(__name__) - - -def pjs_filter(yaml_dict): - """filter out schema elements that are not supported by python - jsonschema objects (yet)""" - - key = 'definitions' - try: - yaml_dict[key] - except KeyError: - key = '$defs' - - for message_name, message_definition in yaml_dict[key].items(): - if 'anyOf' in message_definition: - _logger.warning(f'Removing anyOf attribute from {message_name} to avoid pjs error.') - del message_definition['anyOf'] - if 'allOf' in message_definition: - _logger.warning(f'Removing allOf attribute from {message_name} to avoid pjs error.') - del message_definition['allOf'] - return yaml_dict - diff --git a/schema/merged.json b/schema/merged.json deleted file mode 100644 index 0b0d565..0000000 --- a/schema/merged.json +++ /dev/null @@ -1,1840 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft/2020-12/schema", - "title": "GA4GH-Cat-VRS-Definitions-Merged-Imports", - "type": "object", - "$defs": { - "CategoricalVariation": { - "description": "A representation of a categorically-defined domain for variation, in which individual contextual variation instances may be members of the domain.", - "oneOf": [ - { - "$ref": "#/$defs/CanonicalAllele" - }, - { - "$ref": "#/$defs/CategoricalCnv" - }, - { - "$ref": "#/$defs/DescribedVariation" - }, - { - "$ref": "#/$defs/ProteinSequenceConsequence" - } - ] - }, - "ProteinSequenceConsequence": { - "maturity": "draft", - "type": "object", - "description": "A change that occurs in a protein sequence as a result of genomic changes. Due to the degenerate nature of the genetic code, there are often several genomic changes that can cause a protein sequence consequence. The protein sequence consequence, like a CanonicalAllele, is defined by an `Allele ` that is representative of a collection of congruent Protein Alleles that share the same altered codon(s).", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "ProteinSequenceConsequence", - "default": "ProteinSequenceConsequence", - "description": "MUST be \"ProteinSequenceConsequence\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "members": { - "type": "array", - "ordered": false, - "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - "items": { - "oneOf": [ - { - "$ref": "#/$defs/Adjacency" - }, - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/CopyNumberChange" - }, - { - "$ref": "#/$defs/CopyNumberCount" - }, - { - "$ref": "#/$defs/Haplotype" - }, - { - "$ref": "#/$defs/IRI" - } - ] - } - }, - "definingContext": { - "oneOf": [ - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/IRI" - } - ], - "description": "The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) object that is congruent with (projects to the same codons) as alleles on other protein reference sequences." - } - }, - "required": [ - "definingContext", - "type" - ], - "additionalProperties": false - }, - "CanonicalAllele": { - "maturity": "draft", - "description": "A canonical allele is defined by an `Allele ` that is representative of a collection of congruent Alleles, each of which depict the same nucleic acid change on different underlying reference sequences. Congruent representations of an Allele often exist across different genome assemblies and associated cDNA transcript representations.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "CanonicalAllele", - "default": "CanonicalAllele", - "description": "MUST be \"CanonicalAllele\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "members": { - "type": "array", - "ordered": false, - "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - "items": { - "oneOf": [ - { - "$ref": "#/$defs/Adjacency" - }, - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/CopyNumberChange" - }, - { - "$ref": "#/$defs/CopyNumberCount" - }, - { - "$ref": "#/$defs/Haplotype" - }, - { - "$ref": "#/$defs/IRI" - } - ] - } - }, - "definingContext": { - "oneOf": [ - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/IRI" - } - ], - "description": "The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) object that is congruent with variants on alternate reference sequences." - } - }, - "required": [ - "definingContext", - "type" - ], - "additionalProperties": false - }, - "CategoricalCnv": { - "maturity": "draft", - "type": "object", - "description": "A categorical variation domain is defined first by a sequence derived from a canonical [Location ](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#Location) , which is representative of a collection of congruent Locations. The change or count of this sequence is also described, either by a numeric value (e.g. \"3 or more copies\") or categorical representation (e.g. \"high-level gain\"). Categorical CNVs may optionally be defined by rules specifying the location match characteristics for member CNVs.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "CategoricalCnv", - "default": "CategoricalCnv", - "description": "MUST be \"CategoricalCnv\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "members": { - "type": "array", - "ordered": false, - "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - "items": { - "oneOf": [ - { - "$ref": "#/$defs/Adjacency" - }, - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/CopyNumberChange" - }, - { - "$ref": "#/$defs/CopyNumberCount" - }, - { - "$ref": "#/$defs/Haplotype" - }, - { - "$ref": "#/$defs/IRI" - } - ] - } - }, - "location": { - "type": { - "$ref": "#/$defs/Location" - }, - "description": "A [VRS Location](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#location) object that represents a sequence derived from that location, and is congruent with locations on alternate reference sequences." - }, - "locationMatchCharacteristic": { - "type": "string", - "enum": [ - "exact", - "partial", - "subinterval", - "superinterval" - ], - "description": "The characteristics of a valid match between a contextual CNV location (the query) and the Categorical CNV location (the domain), when both query and domain are represented on the same reference sequence. An `exact` match requires the location of the query and domain to be identical. A `subinterval` match requires the query to be a subinterval of the domain. A `superinterval` match requires the query to be a superinterval of the domain. A `partial` match requires at least 1 residue of overlap between the query and domain." - }, - "copyChange": { - "type": "string", - "enum": [ - "efo:0030069", - "efo:0020073", - "efo:0030068", - "efo:0030067", - "efo:0030064", - "efo:0030070", - "efo:0030071", - "efo:0030072" - ], - "description": "A representation of the change in copies of a sequence in a system. MUST be one of \"efo:0030069\" (complete genomic loss), \"efo:0020073\" (high-level loss), \"efo:0030068\" (low-level loss), \"efo:0030067\" (loss), \"efo:0030064\" (regional base ploidy), \"efo:0030070\" (gain), \"efo:0030071\" (low-level gain), \"efo:0030072\" (high-level gain)." - }, - "copies": { - "oneOf": [ - { - "$ref": "#/$defs/Range" - }, - { - "type": "integer" - } - ], - "description": "The integral number of copies of the subject in a system." - } - }, - "required": [ - "location", - "type" - ], - "additionalProperties": false - }, - "DescribedVariation": { - "maturity": "draft", - "type": "object", - "description": "Some categorical variation concepts are supported by custom nomenclatures or text-descriptive representations for which a categorical variation model does not exist. DescribedVariation is a class that adds requirements and contextual semantics to the `label` and `description` fields to indicate how a categorical variation concept should be evaluated for matching variants.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "DescribedVariation", - "default": "DescribedVariation", - "description": "MUST be \"DescribedVariation\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "members": { - "type": "array", - "ordered": false, - "description": "A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - "items": { - "oneOf": [ - { - "$ref": "#/$defs/Adjacency" - }, - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/CopyNumberChange" - }, - { - "$ref": "#/$defs/CopyNumberCount" - }, - { - "$ref": "#/$defs/Haplotype" - }, - { - "$ref": "#/$defs/IRI" - } - ] - } - }, - "label": { - "type": "string", - "description": "A primary label for the categorical variation. This required property should provide a short and descriptive textual representation of the concept." - }, - "description": { - "type": "string", - "description": "A textual description of the domain of variation that should match the categorical variation entity." - } - }, - "required": [ - "label", - "type" - ], - "additionalProperties": false - }, - "Coding": { - "type": "object", - "maturity": "draft", - "description": "a concept codified by a terminology system.", - "properties": { - "label": { - "type": "string", - "description": "A primary label for the coding." - }, - "system": { - "type": "string", - "description": "Identity of the terminology system." - }, - "version": { - "type": "string", - "description": "Version of the terminology system." - }, - "code": { - "$ref": "#/$defs/Code", - "description": "Symbol in syntax defined by the terminology system." - } - }, - "required": [ - "code", - "system" - ], - "additionalProperties": false - }, - "Mapping": { - "type": "object", - "maturity": "draft", - "description": "A mapping to a concept in a terminology system.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "coding": { - "$ref": "#/$defs/Coding" - }, - "relation": { - "description": "A mapping relation between concepts as defined by the Simple Knowledge Organization System (SKOS).", - "type": "string", - "enum": [ - "closeMatch", - "exactMatch", - "broadMatch", - "narrowMatch", - "relatedMatch" - ] - } - }, - "required": [ - "coding", - "relation" - ], - "additionalProperties": false - }, - "Extension": { - "type": "object", - "maturity": "draft", - "description": "The Extension class provides VODs with a means to extend descriptions with other attributes unique to a content provider. These extensions are not expected to be natively understood under VRSATILE, but may be used for pre-negotiated exchange of message attributes when needed.", - "properties": { - "type": { - "type": "string", - "const": "Extension", - "description": "MUST be \"Extension\"." - }, - "name": { - "type": "string", - "description": "A name for the Extension" - }, - "value": { - "type": [ - "number", - "string", - "boolean", - "object", - "array", - "null" - ], - "description": "Any primitive or structured object" - } - }, - "required": [ - "name" - ], - "additionalProperties": false - }, - "Code": { - "maturity": "draft", - "description": "Indicates that the value is taken from a set of controlled strings defined elsewhere. Technically, a code is restricted to a string which has at least one character and no leading or trailing whitespace, and where there is no whitespace other than single spaces in the contents.", - "type": "string", - "pattern": "\\S+( \\S+)*", - "example": "ENSG00000139618" - }, - "IRI": { - "maturity": "draft", - "description": "An IRI Reference (either an IRI or a relative-reference), according to `RFC3986 section 4.1 ` and `RFC3987 section 2.1 `. MAY be a JSON Pointer as an IRI fragment, as described by `RFC6901 section 6 `.", - "type": "string", - "format": "iri-reference" - }, - "Condition": { - "description": "A disease or other medical disorder.", - "oneOf": [ - { - "$ref": "#/$defs/Disease" - }, - { - "$ref": "#/$defs/Phenotype" - }, - { - "$ref": "#/$defs/TraitSet" - } - ] - }, - "TraitSet": { - "type": "object", - "maturity": "draft", - "description": "A set of phenotype and/or disease concepts that together constitute a condition.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "const": "TraitSet", - "default": "TraitSet", - "description": "MUST be \"TraitSet\"." - }, - "traits": { - "type": "array", - "ordered": false, - "items": { - "anyOf": [ - { - "$ref": "#/$defs/Disease" - }, - { - "$ref": "#/$defs/Phenotype" - } - ] - }, - "minItems": 2 - } - }, - "required": [ - "traits", - "type" - ], - "additionalProperties": false - }, - "Disease": { - "type": "object", - "maturity": "draft", - "description": "A particular abnormal condition that negatively affects the structure or function of all or part of an organism and is not immediately due to any external injury.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "const": "Disease", - "default": "Disease", - "description": "MUST be \"Disease\"." - } - }, - "required": [ - "type" - ], - "additionalProperties": false - }, - "Phenotype": { - "type": "object", - "maturity": "draft", - "description": "An observable characteristic or trait of an organism.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "const": "Phenotype", - "default": "Phenotype", - "description": "MUST be \"Phenotype\"." - } - }, - "required": [ - "type" - ], - "additionalProperties": false - }, - "TherapeuticProcedure": { - "description": "An action or administration of therapeutic agents to produce an effect that is intended to alter or stop a pathologic process.", - "oneOf": [ - { - "$ref": "#/$defs/CombinationTherapy" - }, - { - "$ref": "#/$defs/TherapeuticAction" - }, - { - "$ref": "#/$defs/TherapeuticAgent" - }, - { - "$ref": "#/$defs/TherapeuticSubstituteGroup" - } - ] - }, - "TherapeuticAction": { - "type": "object", - "maturity": "draft", - "description": "A therapeutic action taken that is intended to alter or stop a pathologic process.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "const": "TherapeuticAction", - "default": "TherapeuticAction", - "description": "MUST be \"TherapeuticAction\"." - } - }, - "required": [ - "type" - ], - "additionalProperties": false - }, - "TherapeuticAgent": { - "type": "object", - "maturity": "draft", - "description": "An administered therapeutic agent that is intended to alter or stop a pathologic process.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "const": "TherapeuticAgent", - "default": "TherapeuticAgent", - "description": "MUST be \"TherapeuticAgent\"." - } - }, - "required": [ - "type" - ], - "additionalProperties": false - }, - "CombinationTherapy": { - "type": "object", - "maturity": "draft", - "description": "A therapeutic procedure that involves multiple different therapeutic procedures performed in combination.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "const": "CombinationTherapy", - "default": "CombinationTherapy", - "description": "MUST be \"CombinationTherapy\"." - }, - "components": { - "type": "array", - "ordered": false, - "description": "The individual therapeutic procedure components that constitute the combination therapy.", - "items": { - "anyOf": [ - { - "$ref": "#/$defs/TherapeuticSubstituteGroup" - }, - { - "$ref": "#/$defs/TherapeuticAction" - }, - { - "$ref": "#/$defs/TherapeuticAgent" - } - ] - }, - "minItems": 2 - } - }, - "required": [ - "components", - "type" - ], - "additionalProperties": false - }, - "TherapeuticSubstituteGroup": { - "type": "object", - "maturity": "draft", - "description": "A group of therapeutic procedures that may be treated as substitutes for one another.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "const": "TherapeuticSubstituteGroup", - "default": "TherapeuticSubstituteGroup", - "description": "MUST be \"TherapeuticSubstituteGroup\"." - }, - "substitutes": { - "type": "array", - "ordered": false, - "description": "The individual therapeutic procedures that may be treated as substitutes.", - "items": { - "anyOf": [ - { - "$ref": "#/$defs/TherapeuticAction" - }, - { - "$ref": "#/$defs/TherapeuticAgent" - } - ] - }, - "minItems": 2 - } - }, - "required": [ - "substitutes", - "type" - ], - "additionalProperties": false - }, - "Gene": { - "maturity": "draft", - "description": "A basic physical and functional unit of heredity.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "mappings": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Mapping" - } - }, - "type": { - "type": "string", - "const": "Gene", - "default": "Gene", - "description": "MUST be \"Gene\"" - }, - "aliases": { - "description": "Aliases are alternate labels for a Domain Entity.", - "ordered": false, - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "type" - ], - "additionalProperties": false - }, - "Variation": { - "description": "A representation of the state of one or more biomolecules.", - "oneOf": [ - { - "$ref": "#/$defs/Adjacency" - }, - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/CopyNumberChange" - }, - { - "$ref": "#/$defs/CopyNumberCount" - }, - { - "$ref": "#/$defs/Haplotype" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "Expression": { - "type": "object", - "privateTo": "Variation", - "maturity": "draft", - "description": "Representation of a variation by a specified nomenclature or syntax for a Variation object. Common examples of expressions for the description of molecular variation include the HGVS and ISCN nomenclatures.", - "properties": { - "syntax": { - "type": "string", - "enum": [ - "hgvs.c", - "hgvs.p", - "hgvs.g", - "hgvs.m", - "hgvs.n", - "hgvs.r", - "iscn", - "gnomad", - "spdi" - ] - }, - "value": { - "type": "string" - }, - "syntax_version": { - "type": "string" - } - }, - "required": [ - "syntax", - "value" - ], - "additionalProperties": false - }, - "MolecularVariation": { - "description": "A variation on a contiguous molecule.", - "oneOf": [ - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/Haplotype" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "SystemicVariation": { - "description": "A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.", - "oneOf": [ - { - "$ref": "#/$defs/CopyNumberChange" - }, - { - "$ref": "#/$defs/CopyNumberCount" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "Allele": { - "maturity": "draft", - "ga4ghDigest": { - "prefix": "VA", - "keys": [ - "location", - "state", - "type" - ] - }, - "description": "The state of a molecule at a Location.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "Allele", - "default": "Allele", - "description": "MUST be \"Allele\"" - }, - "digest": { - "description": "A sha512t24u digest created using the VRS Computed Identifier algorithm.", - "type": "string", - "pattern": "^[0-9A-Za-z_\\-]{32}$" - }, - "expressions": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Expression" - } - }, - "location": { - "oneOf": [ - { - "$ref": "#/$defs/IRI" - }, - { - "$ref": "#/$defs/SequenceLocation" - } - ], - "description": "The location of the Allele" - }, - "state": { - "description": "An expression of the sequence state", - "oneOf": [ - { - "$ref": "#/$defs/LengthExpression" - }, - { - "$ref": "#/$defs/LiteralSequenceExpression" - }, - { - "$ref": "#/$defs/ReferenceLengthExpression" - } - ] - } - }, - "required": [ - "location", - "state" - ], - "additionalProperties": false - }, - "Haplotype": { - "maturity": "draft", - "ga4ghDigest": { - "prefix": "HT", - "keys": [ - "members", - "type" - ] - }, - "description": "An ordered set of co-occurring variants on the same molecule.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "Haplotype", - "default": "Haplotype", - "description": "MUST be \"Haplotype\"" - }, - "digest": { - "description": "A sha512t24u digest created using the VRS Computed Identifier algorithm.", - "type": "string", - "pattern": "^[0-9A-Za-z_\\-]{32}$" - }, - "expressions": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Expression" - } - }, - "members": { - "type": "array", - "ordered": true, - "minItems": 2, - "uniqueItems": false, - "items": { - "oneOf": [ - { - "$ref": "#/$defs/Adjacency" - }, - { - "$ref": "#/$defs/Allele" - }, - { - "$ref": "#/$defs/IRI" - } - ] - }, - "description": "A list of Alleles that comprise a Haplotype. Members must share the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented on a DNA inversion (following an Adjacency with end-defined adjoinedSequences), in which case they should be ordered in descending coordinates. Sequence references MUST be consistent for all members between and including the end of one Adjacency and the beginning of another." - } - }, - "required": [ - "members" - ], - "additionalProperties": false - }, - "CopyNumberCount": { - "maturity": "draft", - "ga4ghDigest": { - "keys": [ - "copies", - "location", - "type" - ], - "prefix": "CN" - }, - "type": "object", - "description": "The absolute count of discrete copies of a Location or Gene, within a system (e.g. genome, cell, etc.).", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "CopyNumberCount", - "default": "CopyNumberCount", - "description": "MUST be \"CopyNumberCount\"" - }, - "digest": { - "description": "A sha512t24u digest created using the VRS Computed Identifier algorithm.", - "type": "string", - "pattern": "^[0-9A-Za-z_\\-]{32}$" - }, - "expressions": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Expression" - } - }, - "location": { - "oneOf": [ - { - "$ref": "#/$defs/IRI" - }, - { - "$ref": "#/$defs/SequenceLocation" - } - ], - "description": "A location for which the number of systemic copies is described." - }, - "copies": { - "oneOf": [ - { - "$ref": "#/$defs/Range" - }, - { - "type": "integer" - } - ], - "description": "The integral number of copies of the subject in a system" - } - }, - "required": [ - "copies", - "location" - ], - "additionalProperties": false - }, - "CopyNumberChange": { - "maturity": "draft", - "ga4ghDigest": { - "keys": [ - "copyChange", - "location", - "type" - ], - "prefix": "CX" - }, - "type": "object", - "description": "An assessment of the copy number of a Location or a Gene within a system (e.g. genome, cell, etc.) relative to a baseline ploidy.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "CopyNumberChange", - "default": "CopyNumberChange", - "description": "MUST be \"CopyNumberChange\"" - }, - "digest": { - "description": "A sha512t24u digest created using the VRS Computed Identifier algorithm.", - "type": "string", - "pattern": "^[0-9A-Za-z_\\-]{32}$" - }, - "expressions": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Expression" - } - }, - "location": { - "oneOf": [ - { - "$ref": "#/$defs/IRI" - }, - { - "$ref": "#/$defs/SequenceLocation" - } - ], - "description": "A location for which the number of systemic copies is described." - }, - "copyChange": { - "type": "string", - "enum": [ - "efo:0030069", - "efo:0020073", - "efo:0030068", - "efo:0030067", - "efo:0030064", - "efo:0030070", - "efo:0030071", - "efo:0030072" - ], - "description": "MUST be one of \"efo:0030069\" (complete genomic loss), \"efo:0020073\" (high-level loss), \"efo:0030068\" (low-level loss), \"efo:0030067\" (loss), \"efo:0030064\" (regional base ploidy), \"efo:0030070\" (gain), \"efo:0030071\" (low-level gain), \"efo:0030072\" (high-level gain)." - } - }, - "required": [ - "copyChange", - "location" - ], - "additionalProperties": false - }, - "SequenceLocation": { - "maturity": "draft", - "ga4ghDigest": { - "keys": [ - "end", - "sequenceReference", - "start", - "type" - ], - "prefix": "SL" - }, - "description": "A Location defined by an interval on a referenced Sequence.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "SequenceLocation", - "default": "SequenceLocation", - "description": "MUST be \"SequenceLocation\"" - }, - "digest": { - "description": "A sha512t24u digest created using the VRS Computed Identifier algorithm.", - "type": "string", - "pattern": "^[0-9A-Za-z_\\-]{32}$" - }, - "sequenceReference": { - "oneOf": [ - { - "$ref": "#/$defs/IRI" - }, - { - "$ref": "#/$defs/SequenceReference" - } - ], - "description": "A SequenceReference." - }, - "start": { - "oneOf": [ - { - "$ref": "#/$defs/Range" - }, - { - "type": "integer" - } - ], - "description": "The start coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range less than or equal to the value of `end`." - }, - "end": { - "oneOf": [ - { - "$ref": "#/$defs/Range" - }, - { - "type": "integer" - } - ], - "description": "The end coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range greater than or equal to the value of `start`." - } - }, - "required": [], - "additionalProperties": false - }, - "SequenceReference": { - "maturity": "draft", - "ga4ghDigest": { - "assigned": true - }, - "type": "object", - "description": "A sequence of nucleic or amino acid character codes.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "SequenceReference" - }, - "refgetAccession": { - "description": "A `GA4GH RefGet ` identifier for the referenced sequence, using the sha512t24u digest.", - "type": "string", - "pattern": "^SQ.[0-9A-Za-z_\\-]{32}$" - }, - "residueAlphabet": { - "type": "string", - "description": "The interpretation of the character codes referred to by the refget accession, where \"aa\" specifies an amino acid character set, and \"na\" specifies a nucleic acid character set.", - "enum": [ - "aa", - "na" - ] - } - }, - "required": [ - "refgetAccession" - ], - "additionalProperties": false - }, - "SequenceExpression": { - "description": "An expression describing a Sequence.", - "oneOf": [ - { - "$ref": "#/$defs/LiteralSequenceExpression" - }, - { - "$ref": "#/$defs/ReferenceLengthExpression" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "ReferenceLengthExpression": { - "maturity": "draft", - "ga4ghDigest": { - "keys": [ - "length", - "repeatSubunitLength", - "type" - ] - }, - "description": "An expression of a length of a sequence from a repeating reference.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "ReferenceLengthExpression", - "default": "ReferenceLengthExpression", - "description": "MUST be \"ReferenceLengthExpression\"" - }, - "length": { - "oneOf": [ - { - "$ref": "#/$defs/Range" - }, - { - "type": "integer" - } - ], - "description": "The number of residues in the expressed sequence." - }, - "sequence": { - "$ref": "#/$defs/SequenceString", - "description": "the Sequence encoded by the Reference Length Expression." - }, - "repeatSubunitLength": { - "type": "integer", - "description": "The number of residues in the repeat subunit." - } - }, - "required": [ - "length", - "repeatSubunitLength", - "type" - ], - "additionalProperties": false - }, - "LengthExpression": { - "maturity": "draft", - "ga4ghDigest": { - "keys": [ - "length", - "type" - ] - }, - "type": "object", - "description": "A sequence expressed only by its length.", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "LengthExpression", - "default": "LengthExpression", - "description": "MUST be \"LengthExpression\"" - }, - "length": { - "oneOf": [ - { - "$ref": "#/$defs/Range" - }, - { - "type": "integer" - } - ] - } - }, - "required": [ - "type" - ], - "additionalProperties": false - }, - "LiteralSequenceExpression": { - "maturity": "draft", - "ga4ghDigest": { - "keys": [ - "sequence", - "type" - ] - }, - "description": "An explicit expression of a Sequence.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "LiteralSequenceExpression", - "default": "LiteralSequenceExpression", - "description": "MUST be \"LiteralSequenceExpression\"" - }, - "sequence": { - "$ref": "#/$defs/SequenceString", - "description": "the literal sequence" - } - }, - "required": [ - "sequence", - "type" - ], - "additionalProperties": false - }, - "Range": { - "maturity": "draft", - "description": "An inclusive range of values bounded by one or more integers.", - "type": "array", - "ordered": true, - "items": { - "oneOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - }, - "maxItems": 2, - "minItems": 2 - }, - "Residue": { - "maturity": "draft", - "description": "A character representing a specific residue (i.e., molecular species) or groupings of these (\"ambiguity codes\"), using [one-letter IUPAC abbreviations](https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry#Amino_acid_and_nucleotide_base_codes) for nucleic acids and amino acids.", - "type": "string", - "pattern": "[A-Z*\\-]" - }, - "SequenceString": { - "maturity": "draft", - "description": "A character string of Residues that represents a biological sequence using the conventional sequence order (5\u2019-to-3\u2019 for nucleic acid sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity codes are permitted in Sequence Strings.", - "type": "string", - "pattern": "^[A-Z*\\-]*$" - }, - "Adjacency": { - "maturity": "draft", - "ga4ghDigest": { - "prefix": "AJ", - "keys": [ - "adjoinedSequences", - "linker", - "type" - ] - }, - "description": "The `Adjacency` class can represent either the termination of a sequence or the adjoining of the end of a sequence with the beginning of an adjacent sequence, potentially with an intervening linker sequence.", - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)." - }, - "label": { - "type": "string", - "description": "A primary label for the entity." - }, - "description": { - "type": "string", - "description": "A free-text description of the entity." - }, - "extensions": { - "type": "array", - "ordered": true, - "items": { - "$ref": "#/$defs/Extension" - } - }, - "type": { - "type": "string", - "const": "Adjacency", - "default": "Adjacency", - "description": "MUST be \"Adjacency\"." - }, - "digest": { - "description": "A sha512t24u digest created using the VRS Computed Identifier algorithm.", - "type": "string", - "pattern": "^[0-9A-Za-z_\\-]{32}$" - }, - "expressions": { - "type": "array", - "ordered": false, - "items": { - "$ref": "#/$defs/Expression" - } - }, - "adjoinedSequences": { - "type": "array", - "uniqueItems": false, - "ordered": true, - "items": { - "oneOf": [ - { - "$ref": "#/$defs/IRI" - }, - { - "$ref": "#/$defs/SequenceLocation" - } - ] - }, - "description": "The terminal sequence or pair of adjoined sequences that defines in the adjacency.", - "minItems": 1, - "maxItems": 2 - }, - "linker": { - "description": "The sequence found between adjoined sequences.", - "oneOf": [ - { - "$ref": "#/$defs/LengthExpression" - }, - { - "$ref": "#/$defs/LiteralSequenceExpression" - }, - { - "$ref": "#/$defs/ReferenceLengthExpression" - } - ] - } - }, - "required": [ - "adjoinedSequences" - ], - "additionalProperties": false - } - } -} \ No newline at end of file diff --git a/schema/merged.yaml b/schema/merged.yaml deleted file mode 100644 index fb3974f..0000000 --- a/schema/merged.yaml +++ /dev/null @@ -1,1420 +0,0 @@ -$schema: http://json-schema.org/draft/2020-12/schema -title: GA4GH-Cat-VRS-Definitions-Merged-Imports -type: object -$defs: - CategoricalVariation: - description: A representation of a categorically-defined domain for variation, - in which individual contextual variation instances may be members of the domain. - oneOf: - - $ref: '#/$defs/CanonicalAllele' - - $ref: '#/$defs/CategoricalCnv' - - $ref: '#/$defs/DescribedVariation' - - $ref: '#/$defs/ProteinSequenceConsequence' - ProteinSequenceConsequence: - maturity: draft - type: object - description: A change that occurs in a protein sequence as a result of genomic - changes. Due to the degenerate nature of the genetic code, there are often several - genomic changes that can cause a protein sequence consequence. The protein sequence - consequence, like a CanonicalAllele, is defined by an `Allele ` - that is representative of a collection of congruent Protein Alleles that share - the same altered codon(s). - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - type: - type: string - const: ProteinSequenceConsequence - default: ProteinSequenceConsequence - description: MUST be "ProteinSequenceConsequence" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - members: - type: array - ordered: false - description: A non-exhaustive list of VRS variation contexts that satisfy - the constraints of this categorical variant. - items: - oneOf: - - $ref: '#/$defs/Adjacency' - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/CopyNumberChange' - - $ref: '#/$defs/CopyNumberCount' - - $ref: '#/$defs/Haplotype' - - $ref: '#/$defs/IRI' - definingContext: - oneOf: - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/IRI' - description: The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) - object that is congruent with (projects to the same codons) as alleles on - other protein reference sequences. - required: - - definingContext - - type - additionalProperties: false - CanonicalAllele: - maturity: draft - description: A canonical allele is defined by an `Allele ` that - is representative of a collection of congruent Alleles, each of which depict - the same nucleic acid change on different underlying reference sequences. Congruent - representations of an Allele often exist across different genome assemblies - and associated cDNA transcript representations. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - type: - type: string - const: CanonicalAllele - default: CanonicalAllele - description: MUST be "CanonicalAllele" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - members: - type: array - ordered: false - description: A non-exhaustive list of VRS variation contexts that satisfy - the constraints of this categorical variant. - items: - oneOf: - - $ref: '#/$defs/Adjacency' - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/CopyNumberChange' - - $ref: '#/$defs/CopyNumberCount' - - $ref: '#/$defs/Haplotype' - - $ref: '#/$defs/IRI' - definingContext: - oneOf: - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/IRI' - description: The [VRS Allele](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#allele) - object that is congruent with variants on alternate reference sequences. - required: - - definingContext - - type - additionalProperties: false - CategoricalCnv: - maturity: draft - type: object - description: A categorical variation domain is defined first by a sequence derived - from a canonical [Location ](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#Location) - , which is representative of a collection of congruent Locations. The change - or count of this sequence is also described, either by a numeric value (e.g. - "3 or more copies") or categorical representation (e.g. "high-level gain"). Categorical - CNVs may optionally be defined by rules specifying the location match characteristics - for member CNVs. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - type: - type: string - const: CategoricalCnv - default: CategoricalCnv - description: MUST be "CategoricalCnv" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - members: - type: array - ordered: false - description: A non-exhaustive list of VRS variation contexts that satisfy - the constraints of this categorical variant. - items: - oneOf: - - $ref: '#/$defs/Adjacency' - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/CopyNumberChange' - - $ref: '#/$defs/CopyNumberCount' - - $ref: '#/$defs/Haplotype' - - $ref: '#/$defs/IRI' - location: - type: - $ref: '#/$defs/Location' - description: A [VRS Location](https://vrs.ga4gh.org/en/2.0/terms_and_model.html#location) - object that represents a sequence derived from that location, and is congruent - with locations on alternate reference sequences. - locationMatchCharacteristic: - type: string - enum: - - exact - - partial - - subinterval - - superinterval - description: The characteristics of a valid match between a contextual CNV - location (the query) and the Categorical CNV location (the domain), when - both query and domain are represented on the same reference sequence. An - `exact` match requires the location of the query and domain to be identical. A - `subinterval` match requires the query to be a subinterval of the domain. - A `superinterval` match requires the query to be a superinterval of the - domain. A `partial` match requires at least 1 residue of overlap between - the query and domain. - copyChange: - type: string - enum: - - efo:0030069 - - efo:0020073 - - efo:0030068 - - efo:0030067 - - efo:0030064 - - efo:0030070 - - efo:0030071 - - efo:0030072 - description: A representation of the change in copies of a sequence in a system. - MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" (high-level - loss), "efo:0030068" (low-level loss), "efo:0030067" (loss), "efo:0030064" - (regional base ploidy), "efo:0030070" (gain), "efo:0030071" (low-level gain), - "efo:0030072" (high-level gain). - copies: - oneOf: - - $ref: '#/$defs/Range' - - type: integer - description: The integral number of copies of the subject in a system. - required: - - location - - type - additionalProperties: false - DescribedVariation: - maturity: draft - type: object - description: Some categorical variation concepts are supported by custom nomenclatures - or text-descriptive representations for which a categorical variation model - does not exist. DescribedVariation is a class that adds requirements and contextual - semantics to the `label` and `description` fields to indicate how a categorical - variation concept should be evaluated for matching variants. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - type: - type: string - const: DescribedVariation - default: DescribedVariation - description: MUST be "DescribedVariation" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - members: - type: array - ordered: false - description: A non-exhaustive list of VRS variation contexts that satisfy - the constraints of this categorical variant. - items: - oneOf: - - $ref: '#/$defs/Adjacency' - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/CopyNumberChange' - - $ref: '#/$defs/CopyNumberCount' - - $ref: '#/$defs/Haplotype' - - $ref: '#/$defs/IRI' - label: - type: string - description: A primary label for the categorical variation. This required - property should provide a short and descriptive textual representation - of the concept. - description: - type: string - description: A textual description of the domain of variation that should - match the categorical variation entity. - required: - - label - - type - additionalProperties: false - Coding: - type: object - maturity: draft - description: a concept codified by a terminology system. - properties: - label: - type: string - description: A primary label for the coding. - system: - type: string - description: Identity of the terminology system. - version: - type: string - description: Version of the terminology system. - code: - $ref: '#/$defs/Code' - description: Symbol in syntax defined by the terminology system. - required: - - code - - system - additionalProperties: false - Mapping: - type: object - maturity: draft - description: A mapping to a concept in a terminology system. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - coding: - $ref: '#/$defs/Coding' - relation: - description: A mapping relation between concepts as defined by the Simple - Knowledge Organization System (SKOS). - type: string - enum: - - closeMatch - - exactMatch - - broadMatch - - narrowMatch - - relatedMatch - required: - - coding - - relation - additionalProperties: false - Extension: - type: object - maturity: draft - description: The Extension class provides VODs with a means to extend descriptions - with other attributes unique to a content provider. These extensions are not - expected to be natively understood under VRSATILE, but may be used for pre-negotiated - exchange of message attributes when needed. - properties: - type: - type: string - const: Extension - description: MUST be "Extension". - name: - type: string - description: A name for the Extension - value: - type: - - number - - string - - boolean - - object - - array - - 'null' - description: Any primitive or structured object - required: - - name - additionalProperties: false - Code: - maturity: draft - description: Indicates that the value is taken from a set of controlled strings - defined elsewhere. Technically, a code is restricted to a string which has at - least one character and no leading or trailing whitespace, and where there - is no whitespace other than single spaces in the contents. - type: string - pattern: \S+( \S+)* - example: ENSG00000139618 - IRI: - maturity: draft - description: An IRI Reference (either an IRI or a relative-reference), according - to `RFC3986 section 4.1 ` - and `RFC3987 section 2.1 `. - MAY be a JSON Pointer as an IRI fragment, as described by `RFC6901 section - 6 `. - type: string - format: iri-reference - Condition: - description: A disease or other medical disorder. - oneOf: - - $ref: '#/$defs/Disease' - - $ref: '#/$defs/Phenotype' - - $ref: '#/$defs/TraitSet' - TraitSet: - type: object - maturity: draft - description: A set of phenotype and/or disease concepts that together constitute - a condition. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - type: - type: string - const: TraitSet - default: TraitSet - description: MUST be "TraitSet". - traits: - type: array - ordered: false - items: - anyOf: - - $ref: '#/$defs/Disease' - - $ref: '#/$defs/Phenotype' - minItems: 2 - required: - - traits - - type - additionalProperties: false - Disease: - type: object - maturity: draft - description: A particular abnormal condition that negatively affects the structure - or function of all or part of an organism and is not immediately due to any - external injury. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - type: - type: string - const: Disease - default: Disease - description: MUST be "Disease". - required: - - type - additionalProperties: false - Phenotype: - type: object - maturity: draft - description: An observable characteristic or trait of an organism. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - type: - type: string - const: Phenotype - default: Phenotype - description: MUST be "Phenotype". - required: - - type - additionalProperties: false - TherapeuticProcedure: - description: An action or administration of therapeutic agents to produce an effect that - is intended to alter or stop a pathologic process. - oneOf: - - $ref: '#/$defs/CombinationTherapy' - - $ref: '#/$defs/TherapeuticAction' - - $ref: '#/$defs/TherapeuticAgent' - - $ref: '#/$defs/TherapeuticSubstituteGroup' - TherapeuticAction: - type: object - maturity: draft - description: A therapeutic action taken that is intended to alter or stop a pathologic - process. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - type: - type: string - const: TherapeuticAction - default: TherapeuticAction - description: MUST be "TherapeuticAction". - required: - - type - additionalProperties: false - TherapeuticAgent: - type: object - maturity: draft - description: An administered therapeutic agent that is intended to alter or stop - a pathologic process. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - type: - type: string - const: TherapeuticAgent - default: TherapeuticAgent - description: MUST be "TherapeuticAgent". - required: - - type - additionalProperties: false - CombinationTherapy: - type: object - maturity: draft - description: A therapeutic procedure that involves multiple different therapeutic - procedures performed in combination. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - type: - type: string - const: CombinationTherapy - default: CombinationTherapy - description: MUST be "CombinationTherapy". - components: - type: array - ordered: false - description: The individual therapeutic procedure components that constitute - the combination therapy. - items: - anyOf: - - $ref: '#/$defs/TherapeuticSubstituteGroup' - - $ref: '#/$defs/TherapeuticAction' - - $ref: '#/$defs/TherapeuticAgent' - minItems: 2 - required: - - components - - type - additionalProperties: false - TherapeuticSubstituteGroup: - type: object - maturity: draft - description: A group of therapeutic procedures that may be treated as substitutes - for one another. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - type: - type: string - const: TherapeuticSubstituteGroup - default: TherapeuticSubstituteGroup - description: MUST be "TherapeuticSubstituteGroup". - substitutes: - type: array - ordered: false - description: The individual therapeutic procedures that may be treated as - substitutes. - items: - anyOf: - - $ref: '#/$defs/TherapeuticAction' - - $ref: '#/$defs/TherapeuticAgent' - minItems: 2 - required: - - substitutes - - type - additionalProperties: false - Gene: - maturity: draft - description: A basic physical and functional unit of heredity. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - mappings: - type: array - ordered: false - items: - $ref: '#/$defs/Mapping' - type: - type: string - const: Gene - default: Gene - description: MUST be "Gene" - aliases: - description: Aliases are alternate labels for a Domain Entity. - ordered: false - type: array - items: - type: string - required: - - type - additionalProperties: false - Variation: - description: A representation of the state of one or more biomolecules. - oneOf: - - $ref: '#/$defs/Adjacency' - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/CopyNumberChange' - - $ref: '#/$defs/CopyNumberCount' - - $ref: '#/$defs/Haplotype' - discriminator: - propertyName: type - Expression: - type: object - privateTo: Variation - maturity: draft - description: Representation of a variation by a specified nomenclature or syntax - for a Variation object. Common examples of expressions for the description - of molecular variation include the HGVS and ISCN nomenclatures. - properties: - syntax: - type: string - enum: - - hgvs.c - - hgvs.p - - hgvs.g - - hgvs.m - - hgvs.n - - hgvs.r - - iscn - - gnomad - - spdi - value: - type: string - syntax_version: - type: string - required: - - syntax - - value - additionalProperties: false - MolecularVariation: - description: A variation on a contiguous molecule. - oneOf: - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/Haplotype' - discriminator: - propertyName: type - SystemicVariation: - description: A Variation of multiple molecules in the context of a system, e.g. - a genome, sample, or homologous chromosomes. - oneOf: - - $ref: '#/$defs/CopyNumberChange' - - $ref: '#/$defs/CopyNumberCount' - discriminator: - propertyName: type - Allele: - maturity: draft - ga4ghDigest: - prefix: VA - keys: - - location - - state - - type - description: The state of a molecule at a Location. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: Allele - default: Allele - description: MUST be "Allele" - digest: - description: A sha512t24u digest created using the VRS Computed Identifier - algorithm. - type: string - pattern: ^[0-9A-Za-z_\-]{32}$ - expressions: - type: array - ordered: false - items: - $ref: '#/$defs/Expression' - location: - oneOf: - - $ref: '#/$defs/IRI' - - $ref: '#/$defs/SequenceLocation' - description: The location of the Allele - state: - description: An expression of the sequence state - oneOf: - - $ref: '#/$defs/LengthExpression' - - $ref: '#/$defs/LiteralSequenceExpression' - - $ref: '#/$defs/ReferenceLengthExpression' - required: - - location - - state - additionalProperties: false - Haplotype: - maturity: draft - ga4ghDigest: - prefix: HT - keys: - - members - - type - description: An ordered set of co-occurring variants on the same molecule. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: Haplotype - default: Haplotype - description: MUST be "Haplotype" - digest: - description: A sha512t24u digest created using the VRS Computed Identifier - algorithm. - type: string - pattern: ^[0-9A-Za-z_\-]{32}$ - expressions: - type: array - ordered: false - items: - $ref: '#/$defs/Expression' - members: - type: array - ordered: true - minItems: 2 - uniqueItems: false - items: - oneOf: - - $ref: '#/$defs/Adjacency' - - $ref: '#/$defs/Allele' - - $ref: '#/$defs/IRI' - description: A list of Alleles that comprise a Haplotype. Members must share - the same reference sequence as adjacent members. Alleles should not have - overlapping or adjacent coordinates with neighboring Alleles. Neighboring - alleles should be ordered by ascending coordinates, unless represented - on a DNA inversion (following an Adjacency with end-defined adjoinedSequences), - in which case they should be ordered in descending coordinates. Sequence - references MUST be consistent for all members between and including the - end of one Adjacency and the beginning of another. - required: - - members - additionalProperties: false - CopyNumberCount: - maturity: draft - ga4ghDigest: - keys: - - copies - - location - - type - prefix: CN - type: object - description: The absolute count of discrete copies of a Location or Gene, within - a system (e.g. genome, cell, etc.). - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: CopyNumberCount - default: CopyNumberCount - description: MUST be "CopyNumberCount" - digest: - description: A sha512t24u digest created using the VRS Computed Identifier - algorithm. - type: string - pattern: ^[0-9A-Za-z_\-]{32}$ - expressions: - type: array - ordered: false - items: - $ref: '#/$defs/Expression' - location: - oneOf: - - $ref: '#/$defs/IRI' - - $ref: '#/$defs/SequenceLocation' - description: A location for which the number of systemic copies is described. - copies: - oneOf: - - $ref: '#/$defs/Range' - - type: integer - description: The integral number of copies of the subject in a system - required: - - copies - - location - additionalProperties: false - CopyNumberChange: - maturity: draft - ga4ghDigest: - keys: - - copyChange - - location - - type - prefix: CX - type: object - description: An assessment of the copy number of a Location or a Gene within a - system (e.g. genome, cell, etc.) relative to a baseline ploidy. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: CopyNumberChange - default: CopyNumberChange - description: MUST be "CopyNumberChange" - digest: - description: A sha512t24u digest created using the VRS Computed Identifier - algorithm. - type: string - pattern: ^[0-9A-Za-z_\-]{32}$ - expressions: - type: array - ordered: false - items: - $ref: '#/$defs/Expression' - location: - oneOf: - - $ref: '#/$defs/IRI' - - $ref: '#/$defs/SequenceLocation' - description: A location for which the number of systemic copies is described. - copyChange: - type: string - enum: - - efo:0030069 - - efo:0020073 - - efo:0030068 - - efo:0030067 - - efo:0030064 - - efo:0030070 - - efo:0030071 - - efo:0030072 - description: MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" - (high-level loss), "efo:0030068" (low-level loss), "efo:0030067" (loss), - "efo:0030064" (regional base ploidy), "efo:0030070" (gain), "efo:0030071" - (low-level gain), "efo:0030072" (high-level gain). - required: - - copyChange - - location - additionalProperties: false - SequenceLocation: - maturity: draft - ga4ghDigest: - keys: - - end - - sequenceReference - - start - - type - prefix: SL - description: A Location defined by an interval on a referenced Sequence. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: SequenceLocation - default: SequenceLocation - description: MUST be "SequenceLocation" - digest: - description: A sha512t24u digest created using the VRS Computed Identifier - algorithm. - type: string - pattern: ^[0-9A-Za-z_\-]{32}$ - sequenceReference: - oneOf: - - $ref: '#/$defs/IRI' - - $ref: '#/$defs/SequenceReference' - description: A SequenceReference. - start: - oneOf: - - $ref: '#/$defs/Range' - - type: integer - description: The start coordinate or range of the SequenceLocation. The minimum - value of this coordinate or range is 0. MUST represent a coordinate or range - less than or equal to the value of `end`. - end: - oneOf: - - $ref: '#/$defs/Range' - - type: integer - description: The end coordinate or range of the SequenceLocation. The minimum - value of this coordinate or range is 0. MUST represent a coordinate or range - greater than or equal to the value of `start`. - required: [] - additionalProperties: false - SequenceReference: - maturity: draft - ga4ghDigest: - assigned: true - type: object - description: A sequence of nucleic or amino acid character codes. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: SequenceReference - refgetAccession: - description: A `GA4GH RefGet ` - identifier for the referenced sequence, using the sha512t24u digest. - type: string - pattern: ^SQ.[0-9A-Za-z_\-]{32}$ - residueAlphabet: - type: string - description: The interpretation of the character codes referred to by the - refget accession, where "aa" specifies an amino acid character set, and - "na" specifies a nucleic acid character set. - enum: - - aa - - na - required: - - refgetAccession - additionalProperties: false - SequenceExpression: - description: An expression describing a Sequence. - oneOf: - - $ref: '#/$defs/LiteralSequenceExpression' - - $ref: '#/$defs/ReferenceLengthExpression' - discriminator: - propertyName: type - ReferenceLengthExpression: - maturity: draft - ga4ghDigest: - keys: - - length - - repeatSubunitLength - - type - description: An expression of a length of a sequence from a repeating reference. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: ReferenceLengthExpression - default: ReferenceLengthExpression - description: MUST be "ReferenceLengthExpression" - length: - oneOf: - - $ref: '#/$defs/Range' - - type: integer - description: The number of residues in the expressed sequence. - sequence: - $ref: '#/$defs/SequenceString' - description: the Sequence encoded by the Reference Length Expression. - repeatSubunitLength: - type: integer - description: The number of residues in the repeat subunit. - required: - - length - - repeatSubunitLength - - type - additionalProperties: false - LengthExpression: - maturity: draft - ga4ghDigest: - keys: - - length - - type - type: object - description: A sequence expressed only by its length. - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: LengthExpression - default: LengthExpression - description: MUST be "LengthExpression" - length: - oneOf: - - $ref: '#/$defs/Range' - - type: integer - required: - - type - additionalProperties: false - LiteralSequenceExpression: - maturity: draft - ga4ghDigest: - keys: - - sequence - - type - description: An explicit expression of a Sequence. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: LiteralSequenceExpression - default: LiteralSequenceExpression - description: MUST be "LiteralSequenceExpression" - sequence: - $ref: '#/$defs/SequenceString' - description: the literal sequence - required: - - sequence - - type - additionalProperties: false - Range: - maturity: draft - description: An inclusive range of values bounded by one or more integers. - type: array - ordered: true - items: - oneOf: - - type: integer - - type: 'null' - maxItems: 2 - minItems: 2 - Residue: - maturity: draft - description: A character representing a specific residue (i.e., molecular species) - or groupings of these ("ambiguity codes"), using [one-letter IUPAC abbreviations](https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry#Amino_acid_and_nucleotide_base_codes) - for nucleic acids and amino acids. - type: string - pattern: '[A-Z*\-]' - SequenceString: - maturity: draft - description: "A character string of Residues that represents a biological sequence\ - \ using the conventional sequence order (5\u2019-to-3\u2019 for nucleic acid\ - \ sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity\ - \ codes are permitted in Sequence Strings." - type: string - pattern: ^[A-Z*\-]*$ - Adjacency: - maturity: draft - ga4ghDigest: - prefix: AJ - keys: - - adjoinedSequences - - linker - - type - description: The `Adjacency` class can represent either the termination of a sequence - or the adjoining of the end of a sequence with the beginning of an adjacent - sequence, potentially with an intervening linker sequence. - type: object - properties: - id: - type: string - description: The 'logical' identifier of the entity in the system of record, - e.g. a UUID. This 'id' is unique within a given system. The identified - entity may have a different 'id' in a different system, or may refer to - an 'id' for the shared concept in another system (e.g. a CURIE). - label: - type: string - description: A primary label for the entity. - description: - type: string - description: A free-text description of the entity. - extensions: - type: array - ordered: true - items: - $ref: '#/$defs/Extension' - type: - type: string - const: Adjacency - default: Adjacency - description: MUST be "Adjacency". - digest: - description: A sha512t24u digest created using the VRS Computed Identifier - algorithm. - type: string - pattern: ^[0-9A-Za-z_\-]{32}$ - expressions: - type: array - ordered: false - items: - $ref: '#/$defs/Expression' - adjoinedSequences: - type: array - uniqueItems: false - ordered: true - items: - oneOf: - - $ref: '#/$defs/IRI' - - $ref: '#/$defs/SequenceLocation' - description: The terminal sequence or pair of adjoined sequences that defines - in the adjacency. - minItems: 1 - maxItems: 2 - linker: - description: The sequence found between adjoined sequences. - oneOf: - - $ref: '#/$defs/LengthExpression' - - $ref: '#/$defs/LiteralSequenceExpression' - - $ref: '#/$defs/ReferenceLengthExpression' - required: - - adjoinedSequences - additionalProperties: false diff --git a/schema/vrs b/schema/vrs new file mode 120000 index 0000000..e412cb7 --- /dev/null +++ b/schema/vrs @@ -0,0 +1 @@ +../submodules/vrs/schema/vrs \ No newline at end of file diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml deleted file mode 120000 index 6c5c5bf..0000000 --- a/schema/vrs-source.yaml +++ /dev/null @@ -1 +0,0 @@ -../submodules/vrs/schema/vrs-source.yaml \ No newline at end of file diff --git a/schema/vrs.json b/schema/vrs.json deleted file mode 120000 index 68e664b..0000000 --- a/schema/vrs.json +++ /dev/null @@ -1 +0,0 @@ -../submodules/vrs/schema/vrs.json \ No newline at end of file diff --git a/schema/vrs.yaml b/schema/vrs.yaml deleted file mode 120000 index 7d26cf7..0000000 --- a/schema/vrs.yaml +++ /dev/null @@ -1 +0,0 @@ -../submodules/vrs/schema/vrs.yaml \ No newline at end of file diff --git a/submodules/vrs b/submodules/vrs index 62271fe..3ba7ce1 160000 --- a/submodules/vrs +++ b/submodules/vrs @@ -1 +1 @@ -Subproject commit 62271fe053be437d6238b42bd9d40f1a576fa5cf +Subproject commit 3ba7ce11cb84bfdb2c1157d01f3ec0ae133f55fa diff --git a/tests/config.py b/tests/config.py index afa98c8..5ff7c69 100644 --- a/tests/config.py +++ b/tests/config.py @@ -1,16 +1,47 @@ from pathlib import Path +import json +from referencing import Registry, Resource +from referencing.jsonschema import DRAFT202012 +from jsonschema import Draft202012Validator +import re -root_path = Path(__file__).parent.parent +root_path = Path(__file__).parents[1] schema_path = root_path / "schema" -catvar_yaml_path = schema_path / "catvrs-source.yaml" -catvar_json_path = schema_path / "catvrs.json" -catvar_merged_yaml_path = schema_path / "merged.yaml" +catvrs_schema_path = schema_path / "catvrs" +catvrs_jsons_path = catvrs_schema_path / "json" +catvar_yaml_path = catvrs_schema_path / "catvrs-source.yaml" test_path = root_path / 'tests' fixtures_path = root_path / 'examples' +ga4gh_re = re.compile(r'.*\/ga4gh\/schema\/([\w\-\.]+)\/[\w\.]+\/(.*)$') -def get_schema_ref(schema_file, schema_class, kw="$defs"): - return { - "$ref": schema_path.as_uri() + f"/{schema_file}.json#/{kw}/{schema_class}" - } + +def retrieve_rel_ref(ga4gh_ref: str): + ga4gh_match = ga4gh_re.match(ga4gh_ref) + if ga4gh_match is None: + raise ValueError(f'ga4gh_ref {ga4gh_ref} is not a root GA4GH reference') + schema_module = ga4gh_match.group(1) + local_path = ga4gh_match.group(2) + resolved_path = (schema_path / schema_module / local_path).resolve() + schema = json.loads(resolved_path.read_text()) + return Resource.from_contents(schema) + + +js_registry = Registry(retrieve=retrieve_rel_ref) +js_def = dict() +validator = dict() + +for schema_path in schema_path.glob('*/json/*'): + content = json.loads(schema_path.read_text()) + schema_uri = schema_path.as_uri() + content['id'] = schema_uri + schema_resource = Resource(contents=content, specification=DRAFT202012) + js_def[schema_path.stem] = content + js_registry = js_registry.with_resources([ + (schema_path.name, schema_resource), + (schema_uri, schema_resource) + ]) + +for cls in js_def: + validator[cls] = Draft202012Validator(js_def[cls], registry=js_registry) \ No newline at end of file diff --git a/tests/test_basic.py b/tests/test_basic.py index 2d247e2..8903419 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,30 +1,50 @@ -import json - from jsonschema import validate import yaml from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor -from config import catvar_json_path, catvar_yaml_path, catvar_merged_yaml_path, fixtures_path, test_path, get_schema_ref +from config import catvar_yaml_path, fixtures_path, test_path, validator # Are the yaml and json parsable and do they match? p = YamlSchemaProcessor(catvar_yaml_path) -j = json.load(open(catvar_json_path)) -m = yaml.safe_load(open(catvar_merged_yaml_path)) - - -def test_json_yaml_match(): - assert p.for_js == j, "parsed yaml and json do not match" - - -def test_examples(): - with open(test_path / 'test_definitions.yaml') as def_file: - test_spec = yaml.safe_load(def_file) - for test in test_spec['tests']: - with open(fixtures_path / test['test_file']) as datafile: - data = yaml.safe_load(datafile) - schema = get_schema_ref( - test['schema'], - test['definition'], - test.get('kw', '$defs') - ) - assert validate(data, schema) is None \ No newline at end of file + +def test_yaml_process(): + assert p.for_js, "processor loads and processes yaml" + + +def test_all_value_objects_with_digest_keys(): + for pc in p.processed_classes: + if p.class_is_abstract(pc) or p.class_is_primitive(pc) or not p.class_is_subclass(pc, 'ValueObject'): + continue + pc_properties = set(p.defs[pc]['properties'].keys()) + try: + pc_digest_keys = set(p.defs[pc]['ga4ghDigest']['keys']) + except KeyError: + if p.defs[pc]['ga4ghDigest']['assigned']: + continue + raise KeyError(f'{pc} has no keys defined.') + assert pc_digest_keys <= pc_properties + + +# Does the schema validate against a simple sequence location? +def test_simple_sequence_location(): + sl = { + 'sequenceReference': { + 'refgetAccession': 'SQ.9W6SPR3RMCHWCSGJLQHE6KBOD285V5SW', + 'type':'SequenceReference' + }, + 'start': 100, + 'end': [None, 150], + 'type': 'SequenceLocation' + } + validator['SequenceLocation'].validate(sl) + + a = { + 'location': sl, + 'state': { + 'type': 'ReferenceLengthExpression', + 'length': [32, 35], + 'repeatSubunitLength': 3 + }, + 'type': 'Allele' + } + validator['Allele'].validate(a) diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 0000000..2d2ab89 --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,16 @@ +from config import test_path, fixtures_path +import yaml +from config import validator + + +def test_examples(): + with open(test_path / 'test_definitions.yaml') as def_file: + test_spec = yaml.safe_load(def_file) + for test in test_spec['tests']: + with open(fixtures_path / test['test_file']) as datafile: + data = yaml.safe_load(datafile) + class_validator = validator[test['definition']] + try: + assert class_validator.validate(data) is None + except AssertionError as e: + raise AssertionError(f"AssertionError in {test['test_file']}: {e}")