diff --git a/.travis.yml b/.travis.yml index fc4ef1812..374e44e2c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,5 +3,13 @@ language: python # python versions to be used for testing python: - "3.6" - - + +install: + - pip install -r requirements.txt + +script: + - cd src + # python test.py + ## temporary patch + - python simple_test.py + - python sample_test.py diff --git a/README.md b/README.md index 334e98825..e92ae8074 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,12 @@ +[![Build Status](https://travis-ci.org/HumanCellAtlas/metadata-schema.svg)](https://travis-ci.org/HumanCellAtlas/metadata-schema) + + # The Human Cell Atlas Metadata Schema -This repo contains the HCA metadata metadata json schemas. +This repo contains the HCA metadata JSON schemas. -The **design-principles** can be read in the [linked google-doc](https://docs.google.com/document/d/1eUVpYDLu2AxmxRw2ZUMM-jpKNxQudJbznNyNRp35nLc/edit?usp=sharing) +The **design-principles** can be read in the [linked Google Doc](https://docs.google.com/document/d/1eUVpYDLu2AxmxRw2ZUMM-jpKNxQudJbznNyNRp35nLc/edit?usp=sharing). -How to contribute is described in [contributing.md](https://github.com/HumanCellAtlas/metadata-schema/blob/master/contributing.md) +How to contribute is described in [contributing.md](https://github.com/HumanCellAtlas/metadata-schema/blob/master/contributing.md). diff --git a/contributing.md b/contributing.md index dbdca4100..d412e43bf 100644 --- a/contributing.md +++ b/contributing.md @@ -2,4 +2,4 @@ This file is a place holder for the update process procedure. -You can read the work in progress in [HCA metadata lifecycle and versioning](https://docs.google.com/document/d/1eUVpYDLu2AxmxRw2ZUMM-jpKNxQudJbznNyNRp35nLc/edit?usp=sharing) +You can read the work in progress in [HCA metadata lifecycle and versioning](https://docs.google.com/document/d/1eUVpYDLu2AxmxRw2ZUMM-jpKNxQudJbznNyNRp35nLc/edit?usp=sharing). diff --git a/json_meta_schema/ontology_meta.json b/json_meta_schema/ontology_meta.json new file mode 100644 index 000000000..fb544df31 --- /dev/null +++ b/json_meta_schema/ontology_meta.json @@ -0,0 +1,35 @@ +{ + "graph_restriction": { + "description": "A JSON schema extension field used to specify which ontology terms are valid for a\n field, based on their graph location. A graph consists of edges representing \n classification (subClassOf) and simple existential restrictions \n (e.g. eye subClassOf part_of some head). Graph location is specified via a list \n of objectProperties to follow and a list of grouping classes. \n All ontologies and OWL entities are specified using CURIEs. \n CURIE resolution depends on an extenal resolution service (e.g. identifiers.org) \n or JSON-LD mapping file. \n", + "type": "object", + "required": [ + "ontologies", + "relations", + "classes", + "include_self", + "direct" + ], + "additionalProperties": false, + "properties": { + "ontologies": { + "description": "A list of valid ontologies. Each ontology is specified using a CURIE.\n", + "type": "array", + "items": "string" + } + }, + "relations": { + "description": "A list of relations (object properties) to use for graph-based term grouping. Each property is specified using a CURIE. type: array Items: string \n" + }, + "classes": { + "description": "A list of grouping classes. Each class is specified using a CURIE\n type: array\n items: string \n" + }, + "include_self": { + "description": "A boolean allowing specification of whether the graph query should return the specified grouping classes(es).\n", + "type": "boolean" + }, + "direct": { + "description": "A bolean specifying return of all descendant classes (False) or direct child classes only (True).\n", + "type": "boolean" + } + } +} \ No newline at end of file diff --git a/json_meta_schema/ontology_meta.yaml b/json_meta_schema/ontology_meta.yaml new file mode 100644 index 000000000..47860c855 --- /dev/null +++ b/json_meta_schema/ontology_meta.yaml @@ -0,0 +1,38 @@ +graph_restriction: + description: > + A JSON schema extension field used to specify which ontology terms are valid for a + field, based on their graph location. A graph consists of edges representing + classification (subClassOf) and simple existential restrictions + (e.g. eye subClassOf part_of some head). Graph location is specified via a list + of objectProperties to follow and a list of grouping classes. + All ontologies and OWL entities are specified using CURIEs. + CURIE resolution depends on an extenal resolution service (e.g. identifiers.org) + or JSON-LD mapping file. + # Question: Should the mapping service or file be specified in schema? + type: object + required: [ontologies, relations, classes, include_self, direct] # we could potentially make relations optional if subClassOf-only is default. + additionalProperties: False + properties: + ontologies: + description: > + A list of valid ontologies. Each ontology is specified using a CURIE. + type: array + items: string + relations: + description: > + A list of relations (object properties) to use for graph-based term grouping. Each property is specified using a CURIE. + type: array + Items: string + classes: + description: > + A list of grouping classes. Each class is specified using a CURIE + type: array + items: string + include_self: + description: > + A boolean allowing specification of whether the graph query should return the specified grouping classes(es). + type: boolean + direct: + description: > + A bolean specifying return of all descendant classes (False) or direct child classes only (True). + type: boolean diff --git a/json_schema/analysis.json b/json_schema/analysis.json index a4dd9ef6d..36cc5fdec 100644 --- a/json_schema/analysis.json +++ b/json_schema/analysis.json @@ -1,142 +1,155 @@ { - "additionalProperties" : true, - "properties" : { - "tasks" : { - "description" : "Descriptions of tasks in the workflow.", - "type" : "array", - "items" : { - "type" : { - "$ref" : "#/definitions/task" + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": true, + "properties": { + "tasks": { + "description": "Descriptions of tasks in the workflow.", + "type": "array", + "items": { + "type": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/analysis.json#/definitions/task" + } } - } - }, - "analysis_id" : { - "description" : "ID give to the analysis run which refers back to the analysis instance in the green box.", - "type" : "number" - }, - "computational_method" : { - "description" : "A URI to a versioned workflow and versioned execution environment in a GA4GH compliant repository.", - "type" : "uri" - }, - "outputs" : { - "items" : { - "type" : { - "$ref" : "#/definitions/file" - } - }, - "description" : "Output generated by the pipeline run.", - "type" : "array" - }, - "input_bundles" : { - "items" : { - "type" : "uri" - }, - "description" : "The input bundles used in this analysis run.", - "type" : "array" - }, - "timestamp_stop_utc" : { - "type" : "date-time", - "description" : "Terminal stop time of the full pipeline." - }, - "metadata_schema" : { - "description" : "The version of the metadata schemas used for the json files.", - "type" : "string" - }, - "timestamp_start_utc" : { - "description" : "Initial start time of the full pipeline.", - "type" : "date-time" - }, - "inputs" : { - "items" : { - "type" : { - "$ref" : "#/definitions/parameter" - } - }, - "type" : "array", - "description" : "Input parameters used in the pipeline run, these can be files or string values (settings)." - }, - "analysis_run_type" : { - "enum" : [ - "run", - "copy-forward" - ], - "description" : "Indicator if the run was a actually ran or just copied forward as an optimization.", - "type" : "string" - }, - "reference_bundle" : { - "description" : "Bundle containing the reference used in running the pipeline.", - "type" : "uri" - } - }, - "type" : "object", - "definitions" : { - "file" : { - "additionalProperties" : false, - "required" : [ - "checksum", - "file_path", - "format" - ], - "properties" : { - "format" : "string", - "file_path" : "uri", - "checksum" : "string" - }, - "type" : "object" - }, - "task" : { - "properties" : { - "start_time" : "date-time", - "stop_time" : "date-time", - "name" : "string", - "log_err" : "uri", - "log_out" : "uri", - "disk_size" : "string", - "docker_image" : "string", - "cpus" : "int", - "memory" : "string", - "zone" : "string" - }, - "required" : [ - "name", - "start_time", - "stop_time", - "log", - "disk_size", - "docker_image", - "cpus", - "memory", - "zone" - ], - "additionalProperties" : false, - "type" : "object" - }, - "parameter" : { - "additionalProperties" : false, - "required" : [ - "name", - "value" - ], - "properties" : { - "checksum" : "string", - "value" : "string", - "name" : "string" - }, - "type" : "object" - } - }, - "$schema" : "https://json-schema.org/draft-04/schema#", - "required" : [ - "timestamp_start_utc", - "timestamp_stop_utc", - "computational_method", - "input_bundles", - "reference_bundle", - "analysis_id", - "analysis_run_type", - "metadata_schema", - "tasks", - "inputs", - "outputs" - ] -} + }, + "analysis_id": { + "description": "A unique ID for this analysis.", + "type": "number" + }, + "name": { + "description": "A short, descriptive name for the analysis that need not be unique.", + "type": "string" + }, + "description": { + "description": "A general description of the analysis.", + "type": "string" + }, + "computational_method": { + "description": "A URI to a versioned workflow and versioned execution environment in a GA4GH-compliant repository.", + "type": "uri" + }, + "outputs": { + "items": { + "type": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/analysis.json#/definitions/file" + } + }, + "description": "Output generated by the pipeline run.", + "type": "array" + }, + "input_bundles": { + "items": { + "type": "uri" + }, + "description": "The input bundles used in this analysis run.", + "type": "array" + }, + "timestamp_stop_utc": { + "type": "date-time", + "description": "Terminal stop time of the full pipeline." + }, + "metadata_schema": { + "description": "The version of the metadata schemas used for the json files.", + "type": "string" + }, + "timestamp_start_utc": { + "description": "Initial start time of the full pipeline.", + "type": "date-time" + }, + "inputs": { + "items": { + "type": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/analysis.json#/definitions/parameter" + } + }, + "type": "array", + "description": "Input parameters used in the pipeline run, these can be files or string values (settings)." + }, + "analysis_run_type": { + "enum": [ + "run", + "copy-forward" + ], + "description": "Indicator of whether the analysis actually ran or was just copied forward as an optimization.", + "type": "string" + }, + "reference_bundle": { + "description": "Bundle containing the reference used in running the pipeline.", + "type": "uri" + }, + "core": { + "description": "Type and schema for this object.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/core.json" + } + }, + "type": "object", + "definitions": { + "file": { + "additionalProperties": false, + "required": [ + "checksum", + "file_path", + "format" + ], + "properties": { + "format": "string", + "file_path": "uri", + "checksum": "string" + }, + "type": "object" + }, + "task": { + "properties": { + "start_time": "date-time", + "stop_time": "date-time", + "name": "string", + "log_err": "uri", + "log_out": "uri", + "disk_size": "string", + "docker_image": "string", + "cpus": "int", + "memory": "string", + "zone": "string" + }, + "required": [ + "name", + "start_time", + "stop_time", + "log", + "disk_size", + "docker_image", + "cpus", + "memory", + "zone" + ], + "additionalProperties": false, + "type": "object" + }, + "parameter": { + "additionalProperties": false, + "required": [ + "name", + "value" + ], + "properties": { + "checksum": "string", + "value": "string", + "name": "string" + }, + "type": "object" + } + }, + "title": "analysis", + "required": [ + "timestamp_start_utc", + "timestamp_stop_utc", + "computational_method", + "input_bundles", + "reference_bundle", + "analysis_id", + "analysis_run_type", + "metadata_schema", + "tasks", + "inputs", + "outputs" + ] +} \ No newline at end of file diff --git a/json_schema/assay.json b/json_schema/assay.json index ca6d4cb2a..33176fbda 100644 --- a/json_schema/assay.json +++ b/json_schema/assay.json @@ -1,39 +1,46 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "An assay contains Information relevant to how RNA expression levels were assayed or otherwise the sample was converted into digital information", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "definitions": {}, + "description": "An assay contains information relevant to how a sample (in vitro) was converted into digital information (in silico).", "properties": { "core": { - "description": "type and schema for this object", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, - "id": { - "description": "unique id for this assay", + "description": "Type and schema for this object.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/core.json" + }, + "assay_id": { + "description": "A unique ID for this assay.", "type": "string" - }, - "sample_id": { - "description": "id of sample that was assayed", + }, + "name": { + "description": "A short, descriptive name for the assay that need not be unique.", "type": "string" - }, + }, + "description": { + "description": "A general description of the assay.", + "type": "string" + }, + "imaging": { + "description": "Information on image based RNA quantification assays", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/imaging.json" + }, "rna": { - "description": "Information about how RNA was converted to DNA or otherwise prepared for assay", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/787f6175423222eee531e52834dc3749c2bd3f4e/json_schema/rna.json" - }, + "description": "Information about how RNA was converted to cDNA.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/rna.json" + }, "seq": { - "description": "Information about how a sample was sequenced.", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/787f6175423222eee531e52834dc3749c2bd3f4e/json_schema/seq.json" - }, + "description": "Information about how a cDNA sample was sequenced.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/seq.json" + }, "single_cell": { - "description": "Contains information on single cell aspects of an assay.", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/787f6175423222eee531e52834dc3749c2bd3f4e/json_schema/single_cell.json" + "description": "Information on single-cell aspects of an assay.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/single_cell.json" } - }, + }, "required": [ - "core", - "id", - "sample_id" + "core", + "assay_id" ], - "title": "assay", + "title": "assay", "type": "object" } diff --git a/json_schema/assay_bundle.json b/json_schema/assay_bundle.json new file mode 100644 index 000000000..b43060463 --- /dev/null +++ b/json_schema/assay_bundle.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "A schema for an assay bundle", + "type": "array", + "definitions": { + "assay_ingest": { + "type": "object", + "required": [ + "hca_ingest", + "content" + ], + "properties": { + "hca_ingest": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ingest.json", + "description": "core fields added by HCA ingest service", + "type": "object" + }, + "content": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/assay.json", + "description": "Assay content", + "type": "object" + }, + "derivation_protocols": { + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/protocol.json", + "description": "An array of protocols used in derivation of this sample." + }, + "type": "array" + } + } + } + }, + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/assay_bundle.json#/definitions/assay_ingest" + } +} \ No newline at end of file diff --git a/json_schema/barcode.json b/json_schema/barcode.json index 2ccd5d42b..37ee70394 100644 --- a/json_schema/barcode.json +++ b/json_schema/barcode.json @@ -1,41 +1,39 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "description": "This object describes where a particular type of barcode is in a read", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "This object describes where a particular type of barcode is in a read.", "properties": { "offset": { - "description": "Offset in read of barcode. 0 for beginning of read", - "maximum": 50, - "minimum": 0, + "description": "Offset in read of barcode. 0 for beginning of read.", + "maximum": 50, + "minimum": 0, "type": "integer" - }, + }, "size": { - "description": "Size of barcode.", - "maximum": 50, - "minimum": 0, + "description": "Size of barcode in nucleotides.", + "maximum": 50, + "minimum": 0, "type": "integer" - }, + }, "read": { - "description": "Which read barcode is found on. Is either \"read1,\" \"read2,\" \"reads\" or \"index\" or some TBD field of seq.lane.", + "description": "The read that the barcode is found in. Should be one of Read 1, Read 2, i7 Index, or i5 Index.", "enum": [ - "r1", - "r2", - "i1" + "Read 1", + "Read 2", + "i7 Index", + "i5 Index" ] - }, - "white_list_file": { - "description": "Name of file containing legitimate bar code sequences. Unused for randomly generated barcodes." - }, - "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" + }, + "white_list_file": { + "description": "Name of file containing legitimate barcode sequences. Unused for randomly generated barcodes.", + "type": "string" } - }, + }, "required": [ - "core", - "offset", - "size", - "read" - ] , - "title": "barcode", + "offset", + "size", + "read" + ], + "title": "barcode", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/cell_line.json b/json_schema/cell_line.json deleted file mode 100644 index 9f1592565..000000000 --- a/json_schema/cell_line.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "Information about the cell line used in the sample", - "properties": { - "catalog_number": { - "description": "the supplier catalogue number for the cell line", - "type": "string" - }, - "catalog_url": { - "description": "the supplier catalogue url for the cell line", - "type": "string" - }, - "cell_type": { - "description": "what cell type was the line derived from, CLO ontology usually", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - }, - "core": { - "description": "type and schema for this object", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, - "culture_protocol": { - "description": "pointer to the culture protocol", - "pattern": "^P-....-.*$", - "type": "string" - }, - "date_established": { - "description": "when was the cell line established", - "pattern": "^....-..-..$", - "type": "string" - }, - "derived_from": { - "description": "Which other biosample is this cell_line derived from, if available", - "type": "string" - }, - "disease": { - "description": "text describing disease association, preferable a disease found in EFO", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - }, - "karyotype": { - "description": "the karyotype of the cell line", - "type": "string" - }, - "name": { - "description": "what is the official name for the cell line?", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - }, - "passage_number": { - "description": "how many passages the cell line as been through", - "maximum": 1000, - "minimum": 0, - "type": "integer" - }, - "publication": { - "description": "what is the official name for the cell line?", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/publication.json" - } - }, - "title": "cell_line", - "type": "object" -} diff --git a/json_schema/cell_suspension.json b/json_schema/cell_suspension.json new file mode 100644 index 000000000..7bdae59dc --- /dev/null +++ b/json_schema/cell_suspension.json @@ -0,0 +1,31 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "Information about the cell suspension derived from the collected or cultured specimen", + "properties": { + "target_cell_type": { + "description": "Cell types present in the suspension.", + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology_json/cell_type_ontology.json" + } + }, + "enrichment": { + "description": "How sample was enriched for specific cell type(s).", + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/enrichment.json" + } + }, + "total_estimated_cells": { + "description": "Total estimated number of cells in sample. May be 1 for well-based assays.", + "maximum": 1000000000.0, + "minimum": 0.1, + "type": "number" + }, + "well": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/well.json", + "description": "Information about wells in a plate or chip used for single-cell isolation." + } + }, + "title": "cell_suspension" +} \ No newline at end of file diff --git a/json_schema/contact.json b/json_schema/contact.json index d720d8aa6..b873fd7cf 100644 --- a/json_schema/contact.json +++ b/json_schema/contact.json @@ -1,120 +1,117 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "contact schema generate by tagSchemaToJson from contact", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "definitions": {}, + "description": "contact schema generate by tagSchemaToJson from contact", "properties": { "address": { - "description": "Full mailing address except for name. Includes city, state, postal code, country.", + "description": "Full mailing address where contact works. Should include street name and number, city, state, postal code, country.", "type": "string" - }, + }, "city": { - "description": "City name. See also contact.address", + "description": "Name of city where contact works.", "type": "string" - }, - "core": { - "description": "type and schema for this object", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, + }, "country": { - "description": "Country (USA or UK for those countries, otherwise spelled out)", + "description": "Name of country where contact works. Please spell out (except for USA or UK).", "enum": [ - "Austria", - "Canada", - "China", - "France", - "Germany", - "Japan", - "Sweden", - "Switzerland", - "UK", + "Austria", + "Canada", + "China", + "France", + "Germany", + "Israel", + "Japan", + "Sweden", + "Switzerland", + "UK", "USA" ] - }, + }, "country_division": { - "description": "State in the USA, provence, canton, or other subdivisions for other countries.", + "description": "Name of state, province, canton, or other country subdivision.", "enum": [ - "AL", - "AK", - "AR", - "AZ", - "CA", - "CO", - "CT", - "DE", - "FL", - "GA", - "HI", - "ID", - "IA", - "IL", - "IN", - "KS", - "KY", - "LA", - "MA", - "MD", - "ME", - "MI", - "MN", - "MO", - "MS", - "MT", - "NC", - "ND", - "NE", - "NH", - "NJ", - "NM", - "NV", - "NY", - "OH", - "OK", - "OR", - "PA", - "RI", - "SC", - "SD", - "TN", - "TX", - "UT", - "VT", - "VA", - "WA", - "WV", - "WI", - "XY", - "FU", - "PR", + "AL", + "AK", + "AR", + "AZ", + "CA", + "CO", + "CT", + "DE", + "FL", + "GA", + "HI", + "ID", + "IA", + "IL", + "IN", + "KS", + "KY", + "LA", + "MA", + "MD", + "ME", + "MI", + "MN", + "MO", + "MS", + "MT", + "NC", + "ND", + "NE", + "NH", + "NJ", + "NM", + "NV", + "NY", + "OH", + "OK", + "OR", + "PA", + "RI", + "SC", + "SD", + "TN", + "TX", + "UT", + "VT", + "VA", + "WA", + "WV", + "WI", + "XY", + "FU", + "PR", "VI" ] - }, + }, "email": { - "description": "Email of contact for contributingauthors", - "pattern": "^.*@.*..*$", + "description": "An email address for the contact.", + "pattern": "^.*@.*..*$", "type": "string" - }, - "id": { - "description": "describe_me_please", - "type": "string" - }, + }, "institution": { - "description": "Name of primary institute where contact works", + "description": "Name of primary institute where contact works.", "type": "string" - }, + }, "laboratory": { - "description": "Name of lab (often the PI name) within institute where contact works", + "description": "Name of lab (often the PI name) within institute where contact works.", "type": "string" - }, + }, "name": { - "description": "Text formatted with first name, middle, last in that order separated by commas. Usually middle is just an initial.", + "description": "The contact's name. Should be in the format first, middle, last name. Middle can be initial or left blank. e.g. John,D,Doe or Jane,,Smith", + "type": "string" + }, + "phone": { + "description": "Phone number (including country code) of contact or contact's lab.", "type": "string" } - }, + }, "required": [ - "core", - "name" - ], - "title": "contact", + "name", + "email" + ], + "title": "contact", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/core.json b/json_schema/core.json index 43ba5e26f..7e3f1da8b 100644 --- a/json_schema/core.json +++ b/json_schema/core.json @@ -1,47 +1,29 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "description": "core object found in all modules", + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "Core object found in all modules.", "properties": { "schema_url": { - "description": "URL of json schema document that validates this version of this module.", + "description": "URL of JSON schema document that validates this version of this module.", "type": "string" - }, + }, "schema_version": { - "description": "Version number in major.minor.patch format. Example 3.2.4. Major version changes likely require change to software. Patches are just documentation changes", - "pattern": "^...*..*$", + "description": "Version number in major.minor.patch format. e.g. 3.2.4. Major version changes likely require changes to software. Minor version changes should not require software changes. Patches are just documentation changes.", + "pattern": "^...*..*$", "type": "string" - }, + }, "type": { - "description": "A name for the type of object/module. By convention also the file name (without dir or extension) of json schema or entity level metadata file", + "description": "The name of the core metadata entity type.", "enum": [ - "assay", - "barcode", - "cell_line", - "contact", - "core", - "death", - "donor", - "enrichment", - "imaging", - "lane", - "preservation", - "project", - "protocol", - "publication", - "rna", - "sample", - "seq", - "single_cell", - "well" + "assay", + "project", + "protocol", + "sample", + "file", + "analysis" ] } - }, + }, "required": [ - "type", - "schema_version", - "schema_url" - ], - "title": "core", - "type": "object" -} + "type" + ] +} \ No newline at end of file diff --git a/json_schema/death.json b/json_schema/death.json index 9bf1856a7..f316c5748 100644 --- a/json_schema/death.json +++ b/json_schema/death.json @@ -1,46 +1,39 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "death schema generate by tagSchemaToJson from death", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "definitions": {}, + "description": "Information relating to the death of the donor", "properties": { "cause_of_death": { - "description": "Cause of death from death report for human donor, from research lab for mouse", - "type": "string" - }, + "description": "Cause of death from death report for human donor, from research lab for mouse.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json" + }, "cold_perfused": { - "description": "If yes, was perfused with cold fluid to help preserve tissues before heart stopped", - "enum": [ - "no", - "yes" - ] - }, - "core": { - "description": "type and schema for this object", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, + "description": "Yes if perfused with cold fluid to help preserve tissues before heart stopped. No otherwise.", + "type": "boolean" + }, "days_on_ventilator": { - "description": "Days on ventilator before dying", - "maximum": 10000, - "minimum": 0, + "description": "Days on ventilator before dying.", + "maximum": 10000, + "minimum": 0, "type": "number" - }, + }, "hardy_scale": { - "description": "ventilator case, fast death -violent, fast-death -natural causes, intermediate death, slow death", - "maximum": 4, - "minimum": 0, + "description": "Should be integer representing: (0) ventilator case, (1) violent and fast death, (2) fast death of natural causes, (3) intermediate death, or (4) slow death.", + "maximum": 4, + "minimum": 0, "type": "integer" - }, + }, "time_of_death": { - "description": "Date and time of death on death certificate for deceased donor", - "pattern": "^....-..-.. ..:..:..$", + "description": "Date and time of death on death certificate for deceased donor.", + "format": "date-time", "type": "string" } - }, + }, "required": [ - "core", - "cause_of_death" - ], - "title": "death", + "cause_of_death", + "time_of_death" + ], + "title": "death", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/donor.json b/json_schema/donor.json index a751da5c9..e76d3992d 100644 --- a/json_schema/donor.json +++ b/json_schema/donor.json @@ -1,129 +1,123 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "donor schema generate by tagSchemaToJson from donor", + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "Information about the donor from which a specimen was collected", + "additionalProperties": false, "properties": { "age": { - "description": "Age in age_units. Expressed since birth. For embryos measured since fertilization.", - "maximum": 150, - "minimum": 0, + "description": "Age in age_units. For embryos, measured since fertilization. For all others, measured since birth.", + "maximum": 150, + "minimum": 0, "type": "number" - }, + }, "age_unit": { - "description": "Unit age is expressed, typically day, week, month, or year", + "description": "The unit in which age is expressed. Must be one of day, week, month, or year.", "enum": [ - "hour", - "day", - "week", + "hour", + "day", + "week", "year" ] - }, + }, + "alcohol_history": { + "description": "Number of drinks consumed on a typical day.", + "type": "string" + }, "ancestry": { - "description": "term from https://www.ebi.ac.uk/ols/ontologies/ancestro", - "type": "array", - "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - } - }, + "description": "An array of ontology terms from EMBL-EBI's Ancestry Ontology describing ancestral groups, uncategorised ancestral groups, and population isolates.", + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json" + } + }, "body_mass_index": { - "description": "Body mass index of donor", - "maximum": 100, - "minimum": 5, + "description": "The body mass index of the donor.", + "maximum": 100, + "minimum": 5, "type": "number" - }, - "core": { - "description": "type and schema for this object", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, + }, "death": { - "description": "Information about conditions of death (or info that donor was living at time of collection)", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/death.json" - }, + "description": "Information about conditions of death (or info that donor was living at time of collection).", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/death.json" + }, "development_stage": { - "description": "More detailed (especially for embryos) version of life_stage. Might be \"E9\" or \"P17\" for a mouse.", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - }, + "description": "More detailed (especially for embryos) version of life_stage. e.g. \"E9\" or \"P17\" for mouse.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json" + }, "disease": { - "description": "Short description of disease status of individual.", - "type": "array", - "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - } - }, + "description": "Short description of disease status of individual.", + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology_json/disease_ontology.json" + } + }, "genotype": { - "description": "Genotype. List of genetic changes from usual. Should be strain, cross and genetic modification info in MGI format for mice.", + "description": "Genotype of donor including strain, cross, and genetic modification information. Must be in MGI format for mice.", "type": "string" - }, + }, "height": { - "description": "Height of donor in meters", - "maximum": 10, - "minimum": 0, + "description": "Height of donor in meters.", + "maximum": 10, + "minimum": 0, "type": "number" - }, - "id": { - "description": "A unique ID for this donor", - "type": "string" - }, + }, "is_living": { - "description": "If yes donor is living at time of sample donation.", - "enum": [ - "no", - "yes" - ] - }, + "description": "Should be yes if donor is living at time of sample donation. Otherwise, should be no.", + "type": "boolean" + }, "life_stage": { - "description": "describe_me_please", + "description": "Should be one of adult, child, embryo, or postpartum.", "enum": [ - "adult", - "cell line", - "child", - "embryo", - "organoid", + "adult", + "child", + "embryo", "postpartum" ] - }, + }, "medication": { - "description": "List of medications donor is on", - "type": "array", - "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - } - }, + "description": "List of medications the donor was currently taking at time of sample donation.", + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json" + } + }, + "nutritional_state": { + "description": "Should be one of normal, fasting, or feeding tube removed.", + "enum": [ + "normal", + "fasting", + "feeding tube removed" + ] + }, "sex": { - "description": "Sex of donor. Either 'male' 'female' 'mixed' or 'unknown'", + "description": "Sex of donor. Should be one of male, female, mixed, or unknown.", "enum": [ - "female", - "male", - "mixed", + "female", + "male", + "mixed", "unknown" ] - }, - "species": { - "description": "Scientific binomial name of donor species in text field, NCBI taxon in ontology field", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - }, - "strain": { - "description": "Mouse inbred strain. Example C57BL/6.", - "type": "array", - "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json" - } - }, - "submitted_id": { - "description": "Free format string to help link back to submitter's donor database.", + }, + "smoking_history": { + "description": "Estimated number of cigarettes smoked per day and for how many years.", "type": "string" - }, + }, + "strain": { + "description": "The name of th mouse inbred strain. e.g. C57BL/6.", + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json" + } + }, "weight": { - "description": "Weight of donor in kg", - "maximum": 1000, - "minimum": 0, + "description": "Weight of donor in kilograms.", + "maximum": 1000, + "minimum": 0, "type": "number" } - }, + }, "required": [ - "core", "species", "is_living" + "is_living" ], - "title": "donor", + "title": "donor", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/enrichment.json b/json_schema/enrichment.json index be05998b4..9e39fe9bd 100644 --- a/json_schema/enrichment.json +++ b/json_schema/enrichment.json @@ -1,51 +1,36 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "description": "enrichment schema generate by tagSchemaToJson from enrichment", - "items": { - "additionalProperties": false, - "properties": { - "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json", - "description": "type and schema for this object" - }, - "markers": { - "description": "Text in the format of space delimited list of markers with +/-. Example: \"CD4+ CD8-\"", - "type": "string" - }, - "protocol": { - "description": "Refers to an enrichment protocol", - "items": { - "pattern": "^P-....-.*$", - "type": "string" - }, - "type": "array" - }, - "size_max": { - "description": "Maximum size passing selection in microns", - "maximum": 10000, - "minimum": 0.01, - "type": "number" - }, - "size_min": { - "description": "Minimum size passing selection in microns", - "maximum": 10000, - "minimum": 0.01, - "type": "number" - }, - "type": { - "description": "How enrichment was achieved. MACS, FACS, filter, etc", - "enum": [ - "FACS", - "MACS", - "Ficoll gradient" - ] - } - }, - "type": "object" - }, + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "enrichment schema generate by tagSchemaToJson from enrichment", + "additionalProperties": false, + "properties": { + "markers": { + "description": "Space-delimited list of markers (with +/-) used for enrichment. e.g. CD4+ CD8-", + "type": "string" + }, + "size_max": { + "description": "Maximum size passing selection, in microns.", + "maximum": 10000, + "minimum": 0.01, + "type": "number" + }, + "size_min": { + "description": "Minimum size passing selection, in microns.", + "maximum": 10000, + "minimum": 0.01, + "type": "number" + }, + "type": { + "description": "The method by which enrichment was achieved. e.g. MACS, FACS, Ficoli gradient", + "enum": [ + "FACS", + "MACS", + "Ficoll gradient" + ] + } + }, "required": [ "type" - ], - "title": "enrichment", + ], + "title": "enrichment", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/file.json b/json_schema/file.json new file mode 100644 index 000000000..f3e15b643 --- /dev/null +++ b/json_schema/file.json @@ -0,0 +1,31 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "Information about the data files produced from single cells.", + "title": "file", + "type": "object", + "properties": { + "filename": { + "description": "A filename for a data file. Must have a valid extension.", + "pattern": "^.*.fastq$|^.*.fastq.gz$|^.*.jpg$|^.*.jpeg$|^.*.tiff$|^.*.png$", + "type": "string" + }, + "file_format": { + "description": "The format that the data file is in. Must be one of: fastq, fastq.gz, tiff, jpg, png.", + "enum": [ + "fastq", + "fastq.gz", + "tiff", + "jpg", + "png" + ] + }, + "core": { + "description": "Type and schema for this object.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/core.json" + } + }, + "required": [ + "filename" + ] +} \ No newline at end of file diff --git a/json_schema/imaging.json b/json_schema/imaging.json index 23e9517b5..9269f11f2 100644 --- a/json_schema/imaging.json +++ b/json_schema/imaging.json @@ -1,65 +1,61 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "imaging schema generate by tagSchemaToJson from imaging", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "definitions": {}, + "description": "imaging schema generate by tagSchemaToJson from imaging", "properties": { - "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, "embedding": { - "description": "Description of embedding conditions", + "description": "Description of embedding conditions", "type": "string" - }, + }, "exposure_time": { - "description": "Exposure time as a floating point number in units of seconds", - "maximum": 1000, - "minimum": 1e-09, + "description": "Exposure time as a floating point number in units of seconds", + "maximum": 1000, + "minimum": 1e-09, "type": "number" - }, + }, "field_counts": { - "description": "number of fields in x and y dimensions", + "description": "number of fields in x and y dimensions", "items": { "type": "integer" - }, + }, "type": "array" - }, + }, "field_microns": { - "description": "microns covered by a field in x,y, and z. Z includes all focal planes in a single file", + "description": "microns covered by a field in x,y, and z. Z includes all focal planes in a single file", "items": { "type": "integer" - }, + }, "type": "array" - }, + }, "field_resolution": { - "description": "x, y, and z (number of focal planes) resolution of an individual field", + "description": "x, y, and z (number of focal planes) resolution of an individual field", "items": { "type": "integer" - }, + }, "type": "array" - }, + }, "fixation": { - "description": "Description of fixation conditions", + "description": "Description of fixation conditions", "type": "string" - }, + }, "microscope": { - "description": "Microscope used for imaging", + "description": "Microscope used for imaging", "enum": [ - "generic confocal", + "generic confocal", "generic two photon" ] - }, + }, "probes": { - "description": "A file containing information on probe sequence, genes they cover, and colors", + "description": "A file containing information on probe sequence, genes they cover, and colors", "type": "string" } - }, + }, "required": [ - "core", "field_counts", - "field_resolution", - "probes" - ], - "title": "imaging", + "field_resolution", + "probes" + ], + "title": "imaging", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/immortalized_cell_line.json b/json_schema/immortalized_cell_line.json new file mode 100644 index 000000000..adfb1d834 --- /dev/null +++ b/json_schema/immortalized_cell_line.json @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "Information about the immortalized cell line used in the sample", + "properties": { + "catalog_number": { + "description": "The supplier catalogue number for the cell line.", + "type": "string" + }, + "catalog_url": { + "description": "The supplier catalogue URL for the cell line.", + "type": "string" + }, + "cell_type": { + "description": "What cell type the line was derived from. CLO ontology.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology_json/cell_type_ontology.json" + }, + "date_established": { + "description": "When the cell line was established.", + "format": "date-time", + "type": "string" + }, + "disease": { + "description": "A disease associated with the cell line. EFO ontology.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology_json/disease_ontology.json" + }, + "karyotype": { + "description": "The karyotype of the cell line.", + "type": "string" + }, + "passage_number": { + "description": "The number of passages the cell line as been through.", + "maximum": 1000, + "minimum": 0, + "type": "integer" + }, + "publication": { + "description": "The publication in which the cell line creation was cited.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/publication.json" + }, + "cell_cycle": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json", + "description": "The cell cycle phase, if known." + } + }, + "title": "immortalized_cell_line", + "type": "object" +} \ No newline at end of file diff --git a/json_schema/ingest.json b/json_schema/ingest.json new file mode 100644 index 000000000..2a41ccad4 --- /dev/null +++ b/json_schema/ingest.json @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "Information added or generated at ingest time.", + "properties": { + "submissionDate": { + "description": "When project was first submitted to database.", + "format": "date-time", + "type": "string" + }, + "submitter_id": { + "description": "ID of contact who first submitted project", + "type": "string" + }, + "updateDate": { + "description": "When project was last updated", + "format": "date-time", + "type": "string" + }, + "updater_id": { + "description": "ID of contact who last updated project", + "type": "string" + }, + "document_id": { + "description": "Identifier for document.", + "comment": "This structure supports the current ingest API. It may change in future.", + "type": "string", + "pattern": ".{8}-.{4}-.{4}-.{4}-.{12}" + }, + "accession": { + "description": "A unique accession for this entity, provided by the broker.", + "type": "string" + } + }, + "required": [ + "document_id", + "submissionDate" + ] +} \ No newline at end of file diff --git a/json_schema/ontology.json b/json_schema/ontology.json index dc7075f63..b25986af3 100644 --- a/json_schema/ontology.json +++ b/json_schema/ontology.json @@ -1,20 +1,19 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "description": "A term that may be associated with an ontology term", + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "A term that may be associated with an ontology term", "properties": { "text": { - "description": "The text for the term as the user provides it.", + "description": "The text for the term as the user provides it.", "type": "string" - }, + }, "ontology": { - "description": "An optional ontology reference in format where prefix_ indicates which ontology", + "description": "An optional ontology reference in format where prefix_ indicates which ontology", "type": "string" } - }, + }, "required": [ "text" - ], - "title": "ontology", + ], + "title": "ontology", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/ontology_json/body_part_ontology.json b/json_schema/ontology_json/body_part_ontology.json new file mode 100644 index 000000000..4c1cd7b41 --- /dev/null +++ b/json_schema/ontology_json/body_part_ontology.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "A term that may be associated with an anatomy-related ontology term", + "properties": { + "text": { + "description": "The text for the term as the user provides it.", + "type": "string" + }, + "ontology": { + "description": "An optional ontology reference in format where prefix_ indicates which ontology", + "type": "string", + "graph_restriction": { + "ontologies" : ["obo:uberon", "obo:efo"], + "classes": ["obo:UBERON_0000465"], + "relations": ["rdfs:subClassOf"], + "direct": false, + "include_self": true + } + } + }, + "required": [ + "text" + ], + "title": "body_part_ontology", + "type": "object" +} diff --git a/json_schema/ontology_json/cell_type_ontology.json b/json_schema/ontology_json/cell_type_ontology.json new file mode 100644 index 000000000..9b17220d9 --- /dev/null +++ b/json_schema/ontology_json/cell_type_ontology.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "A term that may be associated with a cell type-related ontology term", + "properties": { + "text": { + "description": "The name of a cell type supplied by a user.", + "type": "string" + }, + "ontology": { + "description": "An ontology term identifier in the form prefix:accession", + "type": "string", + "graph_restriction": { + "ontologies" : ["obo:cl", "obo:efo"], + "classes": ["obo:CL_0000003"], + "relations": ["rdfs:subClassOf"], + "direct": false, + "include_self": true + } + } + }, + "required": [ + "text" + ], + "title": "cell_type_ontology", + "type": "object" +} \ No newline at end of file diff --git a/json_schema/ontology_json/disease_ontology.json b/json_schema/ontology_json/disease_ontology.json new file mode 100644 index 000000000..fd974576e --- /dev/null +++ b/json_schema/ontology_json/disease_ontology.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "A term that may be associated with a disease-related ontology term", + "properties": { + "text": { + "description": "The text for the term as the user provides it.", + "type": "string" + }, + "ontology": { + "description": "An optional ontology reference in format where prefix_ indicates which ontology", + "type": "string" + } + }, + "required": [ + "text" + ], + "title": "disease_ontology", + "type": "object" +} \ No newline at end of file diff --git a/json_schema/organoid.json b/json_schema/organoid.json new file mode 100644 index 000000000..c89cf21ba --- /dev/null +++ b/json_schema/organoid.json @@ -0,0 +1,14 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "A description of an organoid sample.", + "properties": { + "model_for_organ": { + "description": "Organ that this organoid is a model system for.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json" + } + }, + "title": "organoid", + "required": [ + "model_for_organ" + ] +} \ No newline at end of file diff --git a/json_schema/preservation.json b/json_schema/preservation.json deleted file mode 100644 index b3753ca0d..000000000 --- a/json_schema/preservation.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "State of body part at collection and how it was preserved after removal and/or cell enrichment", - "properties": { - "autolysis_score": { - "description": "State of tissue breakdown due to self-digestion: none, mild, moderate", - "enum": [ - "none", - "mild", - "moderate" - ] - }, - "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, - "gross_description": { - "description": "Color, size and other aspects of specimen as visible to naked eye", - "type": "string" - }, - "gross_image": { - "description": "Photographs of body part without magnification", - "items": { - "pattern": "^.*.jpg$|^.*.jpeg$|^.*.tiff$|^.*.png$", - "type": "string" - }, - "type": "array" - }, - "ischemic_time": { - "description": "Time in seconds when body part had insufficient blood supply.", - "maximum": 100000, - "minimum": 0, - "type": "integer" - }, - "microscopic_description": { - "description": "How the sample looks under the microscope and how it compares with normal cells", - "type": "string" - }, - "microscopic_image": { - "description": "Photographs of body part under microscope", - "items": { - "pattern": "^.*.jpg$|^.*.jpeg$|^.*.tiff$|^.*.png$", - "type": "string" - }, - "type": "array" - }, - "postmortem_interval": { - "description": "Time in seconds between when death declared and time the tissue is preserved or processed", - "maximum": 100000, - "minimum": 0, - "type": "integer" - }, - "preservation_protocol": { - "description": "Refers to a protocol that describes the use of chemicals, cold, or other means to prevent or retard biological or physical deterioration of sample.", - "pattern": "^P-....-.*$", - "type": "string" - }, - "storage_protocol": { - "description": "Refers to a protocol that says how sample was stored after preservation", - "pattern": "^P-....-.*$", - "type": "string" - } - }, - "required": [ - "core" - ], - "title": "preservation", - "type": "object" -} diff --git a/json_schema/primary_cell_line.json b/json_schema/primary_cell_line.json new file mode 100644 index 000000000..c3312e93a --- /dev/null +++ b/json_schema/primary_cell_line.json @@ -0,0 +1,32 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "Information about the primary cell line used in the sample", + "properties": { + "cell_type": { + "description": "The cell type that the cell line was derived from. Should be a CLO ontology.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology_json/cell_type_ontology.json" + }, + "date_established": { + "description": "When the cell line was established, in date-time format.", + "format": "date-time", + "type": "string" + }, + "disease": { + "description": "Free text describing any disease association to the cell type. Should be found in EFO ontology.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology_json/disease_ontology.json" + }, + "passage_number": { + "description": "The number of passages the cell line has been through.", + "maximum": 1000, + "minimum": 0, + "type": "integer" + }, + "cell_cycle": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json", + "description": "The cell cycle phase if the cell line is synchronized growing cells or the phase is known." + } + }, + "title": "primary_cell_line", + "type": "object" +} \ No newline at end of file diff --git a/json_schema/project.json b/json_schema/project.json index 01158e792..1e67fedf1 100644 --- a/json_schema/project.json +++ b/json_schema/project.json @@ -1,131 +1,106 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "project schema generate by tagSchemaToJson from project", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "definitions": {}, + "description": "project schema generate by tagSchemaToJson from project", "patternProperties": { "^characteristics_.*$": { - "description": "FIelds that begin with characteristics_ can contain any value, and the field name can be anything after the prefix", + "description": "Fields that begin with characteristics_ can contain any value, and the field name can be anything after the prefix", "type": "string" } - }, + }, "properties": { + "project_id": { + "description": "A unique ID for this project.", + "type": "string" + }, + "name": { + "description": "A short, descriptive name for the project that need not be unique.", + "type": "string" + }, + "description": { + "description": "A general description of the project.", + "type": "string" + }, "array_express_investigation": { - "description": "EBI Array Express investigation accession", - "pattern": "^E-....-.*$" - }, + "description": "An EBI ArrayExpress investigation accession.", + "pattern": "^E-....-.*$", + "type": "string" + }, "contributors": { - "description": "List of scientists contributing to project", + "description": "List of people contributing to the project.", "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/contact.json" - }, + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/contact.json" + }, "type": "array" - }, + }, "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/core.json", "description": "type and schema for this object" - }, - "ddjb_trace": { - "description": "Japanese trace archive project accession.", - "pattern": "^ERP.*$", - "type": "string" - }, - "description": { - "description": "A paragraph or so description of the the project", - "type": "string" - }, + }, "experimental_design": { "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json", - "description": "A short description of overall experiment type, such as \"single cell RNA sequencing.\"" - }, + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json", + "description": "A short description of overall experiment type. e.g. \"single cell RNA sequencing.\"" + }, "type": "array" - }, + }, "experimental_factor_name": { - "description": "A list of the factors that vary between samples in the experiment.", + "description": "A list of the factors that vary between samples in the experiment. e.g. \"time since collection\", \"preservation method\"", "items": { "type": "string" - }, + }, "type": "array" - }, + }, "geo_series": { - "description": "NCBI GEO series accession", - "pattern": "^GSE.*$", + "description": "An NCBI GEO series accession.", + "pattern": "^GSE.*$", "type": "string" - }, - "id": { - "description": "A unique ID for project, may be GEO or Array Express ID for imported projects", + }, + "insdc_project": { + "description": "An INSDC (International Nucleotide Sequence Database Collaboration) project accession. Can be from the DDBJ, EMBL-EBI, or NCBI. Accession must start with DRP, ERP, or SRP.", + "pattern": "^[D|E|S]RP[0-9]+$", "type": "string" - }, - "ncbi_bioproject": { - "description": "NCBI bioproject ID", - "pattern": "^PRJNA.*$", + }, + "insdc_study": { + "description": "An INSDC (International Nucleotide Sequence Database Collaboration) study accession. Can be from the DDBJ, EMBL-EBI, or NCBI. Accession must start with PRJE, PRJN, or PRJD", + "pattern": "^PRJ[E|N|D][a-zA-Z][0-9]+$", "type": "string" - }, - "protocols": { - "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/protocol.json", - "description": "An array of protocol modules. Assay and project protocols reference this." - }, - "type": "array" - }, + }, "publications": { "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/publication.json", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/publication.json", "description": "An array of publication modules." - }, + }, "type": "array" - }, + }, "related_projects": { - "description": "List of other projects that may be logically grouped with this one.", + "description": "A list of other projects that may be logically grouped with this one.", "items": { "type": "string" - }, + }, "type": "array" - }, - "sra_project": { - "description": "NCBI SRA project accession", - "pattern": "^SRP.*$", - "type": "string" - }, - "submit_date": { - "description": "When project was first submitted to database.", - "pattern": "^2...-..-..$", - "type": "string" - }, - "submitter": { - "description": "Contact who first submitted project", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/contact.json" - }, + }, + "submitters": { + "description": "List of people submitting data to the project.", + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/contact.json" + }, + "type": "array" + }, "supplementary_files": { - "description": "Project level supplementary files such as experimental design documents, lab spreadsheets, manuscripts in preparation.", + "description": "Project-level supplementary files. e.g. experimental design documents, lab spreadsheets, manuscripts in preparation.", "items": { "type": "string" - }, + }, "type": "array" - }, - "title": { - "description": "A sentence long or so title for the project", - "type": "string" - }, - "update_date": { - "description": "When project was last updated", - "pattern": "^2...-..-..$", - "type": "string" - }, - "updater": { - "description": "Contact who last updated project", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/contact.json" } - }, + }, "required": [ - "core", - "contributors", - "description", - "id", - "submitter", - "title" - ], - "title": "project", + "core", + "contributors", + "project_id" + ], + "title": "project", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/project_bundle.json b/json_schema/project_bundle.json new file mode 100644 index 000000000..c60499a01 --- /dev/null +++ b/json_schema/project_bundle.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "A schema for a project bundle", + "type": "array", + "definitions": { + "project_ingest": { + "type": "object", + "required": [ + "hca_ingest", + "content" + ], + "properties": { + "hca_ingest": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ingest.json", + "description": "core fields added by HCA ingest service", + "type": "object" + }, + "content": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/project.json", + "description": "Project content", + "type": "object" + } + } + } + }, + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/project_bundle.json#/definitions/project_ingest" + } +} \ No newline at end of file diff --git a/json_schema/protocol.json b/json_schema/protocol.json index 6929a97f3..581fe3670 100644 --- a/json_schema/protocol.json +++ b/json_schema/protocol.json @@ -1,62 +1,48 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "protocol schema generate by tagSchemaToJson from protocol", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "definitions": {}, + "description": "protocol schema generate by tagSchemaToJson from protocol", "properties": { - "batch_number": { - "description": "the batch of kit used", + "protocol_id": { + "description": "A unique ID for this protocol.", "type": "string" - }, - "core": { - "description": "type and schema for this object", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, + }, + "name": { + "description": "A short, descriptive name for the protocol that need not be unique.", + "type": "string" + }, "description": { - "description": "a text based desciption about the protocol", + "description": "A general description of the protocol.", "type": "string" - }, - "id": { - "description": "Unique ID for this protocol.", - "pattern": "^.*-....-.*$", + }, + "batch_number": { + "description": "The batch of kit used.", "type": "string" - }, + }, + "core": { + "description": "type and schema for this object", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/core.json" + }, "pdf": { - "description": "A pdf containing the details of the protocol", - "pattern": "^.*.pdf$", + "description": "A filename of a PDF containing the details of the protocol.", + "pattern": "^.*.pdf$", "type": "string" - }, + }, "retail_name": { - "description": "the retail name of the kit used e.g SureCell WTA 3\u2032 Library Prep Kit", + "description": "The retail name of the kit used. e.g SureCell WTA 3' Library Prep Kit", "type": "string" - }, - "submit_date": { - "description": "When protocol was first submitted to database.", - "pattern": "^2...-..-..$", - "type": "string" - }, - "submitter_id": { - "description": "who created this entry", - "type": "string" - }, + }, "type": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json", - "description": "Type of protocol, ideally an EFO term" - }, - "update_date": { - "description": "When protocol was last updated", - "pattern": "^2...-..-..$", - "type": "string" - }, - "updater_id": { - "description": "who last updated this entry", - "type": "string" + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json", + "description": "The type of protocol. Ideally an EFO term." } - }, + }, "required": [ - "core", - "id" - ], - "title": "protocol", + "core", + "protocol_id", + "type" + ], + "title": "protocol", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/publication.json b/json_schema/publication.json index 3c4008992..87ea18210 100644 --- a/json_schema/publication.json +++ b/json_schema/publication.json @@ -1,39 +1,36 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "description": "A publication contains information about a journal article, book, web page or other external available documentation on a project", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "A publication contains information about a journal article, book, web page or other external available documentation on a project", "properties": { "authors": { - "description": "Authors associated with piblication. Each is in last-name initials format such as: \"Tran NM\" or \"Kowalczyk M\" or \"de Silva-Schmidt CKJ\"", + "description": "A list of authors associated with the publication. Should be in \"surname initials\" format. e.g. \"Tran NM\", \"Kowalczyk M\", \"de Silva-Schmidt CKJ\"", "items": { "type": "string" - }, + }, "type": "array" - }, - "core": { - "description": "type and schema for this object", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, + }, "doi": { - "description": "Publication digital object identifier. Example \"10.1016/j.cell.2016.07.054\"", + "description": "The publication digital object identifier (doi), if available. e.g. \"10.1016/j.cell.2016.07.054\"", "type": "string" - }, + }, "pmid": { - "description": "PubMed ID of publication", + "description": "A PubMed ID of the publication, if available. e.g. 27565351", "type": "integer" - }, + }, "title": { - "description": "Title of publication", + "description": "The full title of the publication.", "type": "string" - }, + }, "url": { - "description": "URL, preferably not behind a paywall, for publication", + "description": "A URL, preferably not behind a paywall, for the publication.", "type": "string" } - }, + }, "required": [ - "core" - ], - "title": "publication", + "authors", + "title" + ], + "title": "publication", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/rna.json b/json_schema/rna.json index f27723c2d..2124c6265 100644 --- a/json_schema/rna.json +++ b/json_schema/rna.json @@ -1,73 +1,70 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "Information about how RNA was converted to DNA or otherwise prepared for assay", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "definitions": {}, + "description": "Information about how RNA was converted to cDNA for sequencing.", "properties": { - "core": { - "description": "type and schema for this object", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, "end_bias": { - "description": "\"3' bias\" or \"5' bias\" or \"3' end\" or \"5' end\" or \"none\"", + "description": "The type of end bias the library has. Must be one of 3' bias, 5' bias, 3' end, 5' end, or none.", "enum": [ - "three_prime_end", - "three_prime_bias", - "five_prime_end", - "five_prime_bias", - "full_transcript" + "three_prime_end", + "three_prime_bias", + "five_prime_end", + "five_prime_bias", + "none" ] - }, + }, "library_construction": { - "description": "Something like smart-seq, true-seq.", + "description": "The general approach for sequencing library construction. e.g. Smart-seq, Drop-seq, 10x.", "enum": [ - "unknown", - "CEL-seq", - "SMARTer Ultra Low RNA Kit", - "modified smart-seq2", + "unknown", + "CEL-seq", + "SMARTer Ultra Low RNA Kit", + "modified smart-seq2", "smart-seq2", - "QUARTZ-Seq", - "10x_v2", - "drop-seq", - "inDrop" + "QUARTZ-Seq", + "10x_v2", + "drop-seq", + "inDrop" ] - }, + }, "library_protocol": { - "description": "RNA preparation protocol", - "pattern": "^P-....-.*$", + "description": "RNA preparation protocol", + "pattern": "^P-....-.*$", "type": "string" - }, + }, "primer": { - "description": "Primer for cDNA synthesis from RNA. Usually random or poly-dt", + "description": "Primer used for cDNA synthesis from RNA. Must be either poly-dT or random.", "enum": [ - "poly-dt", + "poly-dT", "random" ] - }, + }, "spike_in": { - "description": "Name of RNA spike in kit. Usually ERCC", + "description": "Name of RNA spike-in kit. e.g. ERCC.", "enum": [ "ERCC" ] - }, + }, "spike_in_dilution": { - "description": "Dilution of RNA spike in", - "maximum": 1000000, - "minimum": 1, + "description": "Dilution of RNA spike-in.", + "maximum": 1000000, + "minimum": 1, "type": "integer" - }, + }, "strand": { - "description": "Single stranded?", + "description": "Single stranded?", "enum": [ - "both", + "both", "first" ] } - }, + }, "required": [ - "core", - "library_construction" - ], - "title": "rna", + "end_bias", + "library_construction", + "strand" + ], + "title": "rna", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/sample.json b/json_schema/sample.json index 496f27988..c96549a10 100644 --- a/json_schema/sample.json +++ b/json_schema/sample.json @@ -1,149 +1,126 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "description": "A sample contains information on the biosample that was sequenced or imaged. This includes donor, body part, and anything that comes between removing the sample from a body and the assay.", + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "A sample contains information on the biosample that was sequenced or imaged. This includes donor, body part, and anything that comes between removing the sample from a donor and the assay.", + "additionalProperties": false, "patternProperties": { "^characteristics_.*$": { - "description": "FIelds that begin with sample.characteristics_ can contain any value, and the field name can be anything after the prefix", + "description": "Fields that begin with characteristics_ can contain any value, and the field name can be anything after the prefix.", "type": "string" } - }, + }, + "allOf": [ + { + "required": [ + "sample_id", + "core", + "ncbi_taxon_id" + ] + } + ], + "oneOf": [ + { + "required": [ + "donor" + ] + }, + { + "required": [ + "immortalized_cell_line" + ] + }, + { + "required": [ + "cell_suspension" + ] + }, + { + "required": [ + "organoid" + ] + }, + { + "required": [ + "primary_cell_line" + ] + }, + { + "required": [ + "specimen_from_organism" + ] + } + ], "properties": { - "biosd_sample": { - "description": "EBI biosample ID", - "type": "string" - }, - "body_part": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json", - "description": "A more detailed position within the organ where body came from." - }, - "cell_cycle": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json", - "description": "If sample is synchronized growing cells or a cell in a known phase of cell cycle, put which phase here" - }, - "cell_line": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/787f6175423222eee531e52834dc3749c2bd3f4e/json_schema/cell_line.json", - "description": "Description of cell line if any that this came from." - }, "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json", - "description": "type and schema for this object" - }, - "culture_type": { - "description": "either \"primary culture,\" \"tissue\" or \"cell_line\"", - "enum": [ - "cell line", - "primary culture", - "tissue" - ] - }, - "disassociation_protocol": { - "description": "How body part was treated to disassociate it into cells that can float individually in solution in sorter or microfluidics device.", - "pattern": "^P-....-.*$", + "description": "Type and schema for this object.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/core.json" + }, + "sample_id": { + "description": "A unique ID for this sample.", "type": "string" - }, - "donor": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/787f6175423222eee531e52834dc3749c2bd3f4e/json_schema/donor.json", - "description": "Information on the donors that the sample came from." - }, - "ena_sample": { - "description": "European nucleotide archive sample ID", - "pattern": "^ERS.*$", - "type": "string" - }, - "enrichment": { - "description": "How cells were enriched for specific type or to weed out common types after disassociation.", - "items": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/787f6175423222eee531e52834dc3749c2bd3f4e/json_schema/enrichment.json" - }, - "type": "array" - }, - "geo_sample": { - "description": "NCBI GEO sample accession", - "pattern": "^GSM.*$", - "type": "string" - }, - "id": { - "description": "A unique ID for this sample", - "type": "string" - }, + }, "name": { - "description": "A short descriptive name for sample. Should be enough to distinguish it from other samples in the project", - "type": "string" - }, - "ncbi_biosample": { - "description": "NCBI biosample ID", + "description": "A short, descriptive name for the sample that need not be unique.", "type": "string" - }, - "organ": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json", - "description": "The organ that the sample came from. \"Blood\" and \"connective tissue\" count as organs." - }, - "preservation": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/787f6175423222eee531e52834dc3749c2bd3f4e/json_schema/preservation.json", - "description": "State of body_part at collection and how it was preserved after removal." - }, - "project_id": { - "description": "The id field of the project that this connects to", + }, + "description": { + "description": "A general description of the sample.", "type": "string" - }, - "protocol_ids": { - "description": "Array of protocol IDs from project.protocols used for this sample.", - "items": { - "pattern": "^.*-....-.*$", - "type": "string" - }, - "type": "array" - }, - "submit_date": { - "description": "When project was first submitted to database.", - "pattern": "^2...-..-..$", - "type": "string" - }, - "submitter_id": { - "description": "ID of contact who first submitted project", - "type": "string" - }, + }, + "donor": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/donor.json" + }, + "specimen_from_organism": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/specimen_from_organism.json" + }, + "immortalized_cell_line": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/immortalized_cell_line.json" + }, + "primary_cell_line": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/primary_cell_line.json" + }, + "organoid": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/organoid.json" + }, + "cell_suspension": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/cell_suspension.json" + }, + "ncbi_taxon_id": { + "description": "A taxonomy ID (taxonID) from NCBI.", + "type": "integer" + }, + "genus_species": { + "description": "Scientific binomial name of donor species. e.g. Homo sapiens.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json" + }, + "derived_from": { + "type": "string", + "description": "If this sample is derived from another sample e.g. a tissue or donor, enter the sample_id for the sample this was derived from", + "comment": "This field is primarily required for spreadsheet based submissions" + }, "supplementary_files": { - "description": "Sample level supplementary files.", + "description": "A list of filenames of sample-level supplementary files.", "items": { "type": "string" - }, + }, "type": "array" - }, - "title": { - "description": "A descriptive phrase or sentence, ideally about 60 characters.", - "type": "string" - }, - "total_estimated_cells": { - "description": "Total estimated number of cells in sample. May be 1 for well based assays.", - "maximum": 1000000000.0, - "minimum": 0.1, - "type": "number" - }, - "update_date": { - "description": "When project was last updated", - "pattern": "^2...-..-..$", - "type": "string" - }, - "updater_id": { - "description": "ID of contact who last updated project", - "type": "string" - }, - "well": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/787f6175423222eee531e52834dc3749c2bd3f4e/json_schema/well.json", - "description": "If sample was isolated in a well in plate (or fluidics chip), information about that" + }, + "sample_accessions": { + "type": "object", + "description": "One or more accession numbers from a standard archive.", + "additionalProperties": false, + "properties": { + "biosd_sample": { + "description": "A DDBJ, NCBI, or EBI BioSample ID. Accessions must start with SAMD, SAMN, or SAME.", + "pattern": "^SAM[N|E|D][0-9]+$", + "type": "string" + }, + "insdc_sample": { + "description": "An INSDC (International Nucleotide Sequence Database Collaboration) sample accession. Can be from the DDBJ, EMBL-EBI, or NCBI. Accession must start with DRS, ERS, or SRS.", + "pattern": "^[D|E|S]RS[0-9]+$", + "type": "string" + } + } } - }, - "required": [ - "body_part", - "core", - "donor", - "id", - "name", - "organ", - "project_id" - ], - "title": "sample", - "type": "object" -} + }, + "title": "sample" +} \ No newline at end of file diff --git a/json_schema/sample_bundle.json b/json_schema/sample_bundle.json new file mode 100644 index 000000000..efd25a478 --- /dev/null +++ b/json_schema/sample_bundle.json @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "A schema for a sample bundle", + "type": "array", + "definitions": { + "sample_ingest": { + "type": "object", + "required": [ + "hca_ingest", + "content" + ], + "properties": { + "hca_ingest": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ingest.json", + "description": "core fields added by HCA ingest service", + "type": "object" + }, + "content": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/sample.json", + "description": "Sample content", + "type": "object" + }, + "derived_from": { + "type": "string" + }, + "derivation_protocols": { + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/protocol.json", + "description": "An array of protocols used in derivation of this sample." + }, + "type": "array" + } + } + } + }, + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/sample_bundle.json#/definitions/sample_ingest" + } +} \ No newline at end of file diff --git a/json_schema/seq.json b/json_schema/seq.json index ffd48532c..931e918d4 100644 --- a/json_schema/seq.json +++ b/json_schema/seq.json @@ -1,134 +1,120 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, "definitions": { "lanes": { "items": { - "additionalProperties": false, + "additionalProperties": false, "properties": { "number": { - "description": "Which lane number", - "type": "number" - }, + "description": "Which lane number", + "type": "number" + }, "r1": { - "description": "File name of first read in paired read sequencing assay, or only read for unpaired", + "description": "File name of first read in paired-end read sequencing assay, or only read for unpaired.", "type": "string" - }, + }, "r2": { - "description": "File name of second read in paired read assay", + "description": "File name of second read in paired-end read assay.", "type": "string" - }, - "i1": { - "description": "A barcode index file. Often index off of first read", - "type": "string" - }, - "i2": { - "description": "A barcode index file. Often index off of second read", - "type": "string" - } - }, - "required": [ - "r1" - ], - "title": "a sequencing lane", + }, + "i1": { + "description": "A barcode index file. Often index off of first read.", + "type": "string" + }, + "i2": { + "description": "A barcode index file. Often index off of second read.", + "type": "string" + } + }, + "required": [ + "r1" + ], + "title": "A sequencing lane.", "type": "object" - }, - "title": "lane array", + }, + "title": "lane array", "type": "array" } - }, - "description": "Information about how a sample was sequenced.", + }, + "description": "Information about how a sample was sequenced.", "properties": { - "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" - }, - "ena_experiment": { - "description": "European Nucleotide Archive experiment accession", - "pattern": "^ERX.*$", - "type": "string" - }, - "ena_run": { - "description": "European Nucleotide Archive run accession", - "pattern": "^ERR.*$", + "insdc_experiment": { + "description": "An INSDC (International Nucleotide Sequence Database Collaboration) experiment accession. Accession must start with DRX, ERX, or SRX.", + "pattern": "^[D|E|S]RX[0-9]+$", "type": "string" - }, + }, + "insdc_run": { + "description": "An INSDC (International Nucleotide Sequence Database Collaboration) run accession. Accession must start with DRR, ERR, or SRR.", + "items": { + "pattern": "^[D|E|S]RR[0-9]+$", + "type": "string" + }, + "type": "array" + }, "instrument_model": { - "description": "Examples: HiSeq 200, Pac Bio SEQuel", + "description": "The model of the sequencer used. e.g. HiSeq 2000, Sequel.", "enum": [ - "Illumina HiSeq 2000", - "Illumina HiSeq 2500", - "Illumina HiSeq 4000", - "Illumina MiSeq", - "Illumina NextSeq 500" + "HiSeq 2000", + "HiSeq 2500", + "HiSeq 4000", + "MiSeq", + "NextSeq 500", + "Sequel" ] - }, + }, "instrument_platform": { - "description": "Examples: Illumina, Ion Torrent, Pac Bio", + "description": "The sequencing platform used. e.g. Illumina, Ion Torrent, Pacific Biosciences.", "enum": [ - "Illumina" + "Illumina", + "Ion Torrent", + "Pacific Biosciences" ] - }, + }, "lanes": { - "$ref": "#/definitions/lanes" - }, + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/seq.json#/definitions/lanes" + }, "library_construction": { - "description": "How dna sequencing library was prepared from sample or rna library. Examples: \"Nextera XT\" \"TrueSeq\"", + "description": "How the DNA sequencing library was prepared. e.g. Nextera XT, TrueSeq.", "enum": [ - "Nextera XT", - "TruSeq", + "Nextera XT", + "TruSeq", "modified Nextera XT" ] - }, + }, "library_protocol": { - "description": "DNA sequencing library preparation protocol", - "pattern": "^P-....-.*$", + "description": "DNA sequencing library preparation protocol.", + "pattern": "^P-....-.*$", "type": "string" - }, + }, "local_machine_name": { - "description": "Local lab name for particular machine of given platform and model this was run on.", + "description": "Local name for the particular machine on which the sample was sequenced.", "type": "string" - }, + }, "molecule": { - "description": "RNA, DNA, or protein, or more specifically \"total RNA,\" \"genomic DNA,\" etc.", + "description": "Specific type of molecule sequenced. e.g. total RNA, genomic DNA, polyA RNA.", "enum": [ - "RNA", - "polyA RNA", - "total RNA" + "polyA RNA", + "total RNA", + "genomic DNA" ] - }, + }, "paired_ends": { - "description": "Is a paired end sequencing strategy used? Values are yes/no", - "enum": [ - "no", - "yes" - ] - }, - "sra_experiment": { - "description": "NCBI Short Read Archive experiment accession (SRX)", - "pattern": "^SRX.*$", - "type": "string" - }, - "sra_run": { - "description": "NCBI Short Read Archive run accession (SRR)", - "items": { - "pattern": "^SRR.*$", - "type": "string" - }, - "type": "array" - }, + "description": "Was a paired-end sequencing strategy used? Must be either yes or no.", + "type": "boolean" + }, "umi_barcode": { - "description": "Information about unique molecular identifier barcode", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/barcode.json" + "description": "Information about unique molecular identifier (UMI) barcode sequences.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/barcode.json" } - }, + }, "required": [ - "core", - "instrument_model", - "instrument_platform", - "lanes", - "molecule", + "instrument_model", + "instrument_platform", + "lanes", + "molecule", "paired_ends" - ], - "title": "seq", + ], + "title": "seq", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/single_cell.json b/json_schema/single_cell.json index aca115029..5b217cc97 100644 --- a/json_schema/single_cell.json +++ b/json_schema/single_cell.json @@ -1,33 +1,28 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "definitions": {}, - "description": "Contains information on single cell aspects of an assay.", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "Contains information on single cell aspects of an assay.", "properties": { - "barcode": { - "description": "Information about cell identifier barcode", - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/07599dd75f61ec1ea86a720542f1c2ba5ffd80d0/json_schema/barcode.json" + "cell_barcode": { + "description": "Information about cell identifier barcode.", + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/barcode.json" }, "cell_handling": { - "description": "How cells are separated. FACS, drop-seq, tenX_v2, Fluidigm C1, etc", + "description": "How cells are separated. e.g. FACS, drop-seq, 10X_v2, Fluidigm C1.", "enum": [ - "10x_v2", - "FACS", - "Fluidigm C1", - "drop-seq", - "inDrop", - "mouth pipette", + "10x_v2", + "FACS", + "Fluidigm C1", + "drop-seq", + "inDrop", + "mouth pipette", "bulk" ] - }, - "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json" } - }, + }, "required": [ - "cell_handling", - "core" - ], - "title": "single_cell", + "cell_handling" + ], + "title": "single_cell", "type": "object" -} +} \ No newline at end of file diff --git a/json_schema/specimen_from_organism.json b/json_schema/specimen_from_organism.json new file mode 100644 index 000000000..f5797f920 --- /dev/null +++ b/json_schema/specimen_from_organism.json @@ -0,0 +1,25 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "Information about the specimen that was extracted from the donor organism.", + "title": "specimen_from_organism", + "type": "object", + "properties": { + "body_part": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology_json/body_part_ontology.json", + "description": "A more detailed position within the body than the term given in the organ field." + }, + "organ": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/ontology.json", + "description": "The organ that the sample came from. e.g. liver, spleen. Blood and connective tissue count as organs." + }, + "state_of_specimen": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/state_of_specimen.json", + "description": "State of body_part at collection and how it was preserved after removal." + } + }, + "required": [ + "body_part", + "organ" + ] +} \ No newline at end of file diff --git a/json_schema/state_of_specimen.json b/json_schema/state_of_specimen.json new file mode 100644 index 000000000..4867f0cb4 --- /dev/null +++ b/json_schema/state_of_specimen.json @@ -0,0 +1,54 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "definitions": {}, + "description": "State of body part at collection and how it was preserved after removal and/or cell enrichment", + "properties": { + "autolysis_score": { + "description": "State of tissue breakdown due to self-digestion. Must be one of: none, mild, moderate.", + "enum": [ + "none", + "mild", + "moderate" + ] + }, + "gross_description": { + "description": "Color, size, and other aspects of specimen as visible to naked eye.", + "type": "string" + }, + "gross_image": { + "description": "List of filenames of photographs of body part without magnification. Must be of format JPEG, TIFF, or PNG.", + "items": { + "pattern": "^.*.jpg$|^.*.jpeg$|^.*.tiff$|^.*.png$", + "type": "string" + }, + "type": "array" + }, + "ischemic_time": { + "description": "Duration of time, in seconds, that the body part had insufficient blood supply.", + "maximum": 100000, + "minimum": 0, + "type": "integer" + }, + "microscopic_description": { + "description": "How the sample looks under the microscope and how it compares with normal cells.", + "type": "string" + }, + "microscopic_image": { + "description": "List of filenames of photographs of body part under microscope. Must be of format JPEG, TIFF, or PNG.", + "items": { + "pattern": "^.*.jpg$|^.*.jpeg$|^.*.tiff$|^.*.png$", + "type": "string" + }, + "type": "array" + }, + "postmortem_interval": { + "description": "Duration of time, in seconds, between when death is declared and when the tissue is preserved or processed.", + "maximum": 100000, + "minimum": 0, + "type": "integer" + } + }, + "title": "state_of_specimen", + "type": "object" +} \ No newline at end of file diff --git a/json_schema/submission.json b/json_schema/submission.json index 97a63153b..020b93d16 100644 --- a/json_schema/submission.json +++ b/json_schema/submission.json @@ -1,87 +1,87 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", + "$schema": "http://json-schema.org/draft-04/schema#", "additionalProperties": false, - "definitions" : { - "transfer_service_version" : { - "additionalProperties" : false, - "type" : "string" - }, - "submitted_files" : { - "additionalProperties" : false, - "type" : "array", - "items" : { - "$ref" : "#/definitions/file" - } - }, - "file" : { - "type" : "object", - "additionalProperties" : false, - "properties" : { - "name" : { - "description" : "Name of the file", - "type" : "string" - }, - "content_type" :{ - "description" : "type of file e.g hca-sample-json, hca-assay-json, hca-rnaseq-fastq-gz", - "type" : "string" - }, - "size" : { - "description" : "Length of file in bytes", - "type" : "integer" - }, - "id" : { - "description" : "URI of the Data Storage System file resource that describes this file", - "type" : "string" - }, - "checksums" : { - "type" : "object", - "properties" : { - "s3etag" : { - "description" : "S3 ETAG checksum of this file", - "type" : "string" - }, - "sha1" : { - "description" : "SHA-1 checksum of this file", - "type" : "string" - }, - "sha256" : { - "description" : "SHA-256 checksum of this file", - "type" : "string" - }, - "crc32" : { - "description" : "CRC-32 checksum of this file", - "type" : "string" - } - }, - "required" : [ - "s3etag", - "sha1", - "sha256", - "crc32" - ] - } - }, - "required" : [ - "name", - "content_type", - "size", - "id", - "checksums" - ] - } + "definitions": { + "transfer_service_version": { + "additionalProperties": false, + "type": "string" + }, + "submitted_files": { + "additionalProperties": false, + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/submission.json#/definitions/file" + } + }, + "file": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": { + "description": "Name of the file", + "type": "string" + }, + "content_type": { + "description": "type of file e.g hca-sample-json, hca-assay-json, hca-rnaseq-fastq-gz", + "type": "string" + }, + "size": { + "description": "Length of file in bytes", + "type": "integer" + }, + "id": { + "description": "URI of the Data Storage System file resource that describes this file", + "type": "string" + }, + "checksums": { + "type": "object", + "properties": { + "s3etag": { + "description": "S3 ETAG checksum of this file", + "type": "string" + }, + "sha1": { + "description": "SHA-1 checksum of this file", + "type": "string" + }, + "sha256": { + "description": "SHA-256 checksum of this file", + "type": "string" + }, + "crc32": { + "description": "CRC-32 checksum of this file", + "type": "string" + } + }, + "required": [ + "s3etag", + "sha1", + "sha256", + "crc32" + ] + } + }, + "required": [ + "name", + "content_type", + "size", + "id", + "checksums" + ] + } }, - "properties" : { - "transfer_service_version" : { - "$ref" : "#/definitions/transfer_service_version" - }, - "submitted_files" : { - "$ref" : "#/definitions/submitted_files" - } + "properties": { + "transfer_service_version": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/submission.json#/definitions/transfer_service_version" + }, + "submitted_files": { + "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/4.0.0/json_schema/submission.json#/definitions/submitted_files" + } }, - "required" : [ - "transfer_service_version", - "submitted_files" + "required": [ + "transfer_service_version", + "submitted_files" ], - "title" : "submission", - "type" : "object" -} + "title": "submission", + "type": "object" +} \ No newline at end of file diff --git a/json_schema/well.json b/json_schema/well.json index 5d16da223..03eed4fdb 100644 --- a/json_schema/well.json +++ b/json_schema/well.json @@ -1,50 +1,34 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "additionalProperties": false, - "description": "This describes the well inside the plate or fluidics chip where sample came from.", + "$schema": "http://json-schema.org/draft-04/schema#", + "additionalProperties": false, + "description": "This describes the well inside the plate or fluidics chip where sample came from.", "properties": { - "cell_type": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/ontology.json", - "description": "the cell type which submitter thinks is in the well" - }, "col": { - "description": "well column in plate", + "description": "Well column in plate.", "type": "string" - }, - "core": { - "$ref": "https://raw.githubusercontent.com/HumanCellAtlas/metadata-schema/9f75c0b028341324c71ebd5cf1f3ab74e4761bb0/json_schema/core.json", - "description": "type and schema for this object" - }, + }, "name": { - "description": "A name for the well, should be unique for the plate", + "description": "A name for the well. Should be unique for the plate", "type": "string" - }, + }, "plate": { - "description": "plate id that well came from if any", + "description": "An ID for the plate that the well is located on.", "type": "string" - }, - "plating_protocol": { - "description": "all wells", - "pattern": "^P-....-.*$", - "type": "string" - }, + }, "quality": { - "description": "Note on how good cell looks if imaged in well before sequencing", + "description": "Note on how good cell looks if imaged in well before sequencing.", "enum": [ - "OK", - "control, 2-cell well", - "control, empty well", + "OK", + "control, 2-cell well", + "control, empty well", "low quality cell" ] - }, + }, "row": { - "description": "well row in plate", + "description": "Well row in plate.", "type": "string" } - }, - "required": [ - "core" - ], - "title": "well", + }, + "title": "well", "type": "object" -} +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..f9b29d598 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +jsonschema + diff --git a/schema_test_files/10x_pbmc8k_donor_0.json b/schema_test_files/10x_pbmc8k_donor_0.json new file mode 100644 index 000000000..8d5874691 --- /dev/null +++ b/schema_test_files/10x_pbmc8k_donor_0.json @@ -0,0 +1,16 @@ +{ + "core" : { + "schema_version" : "4.0.0", + "type" : "sample" + }, + "sample_id": "d1", + "donor" : { + "is_living": true, + "life_stage": "adult" + }, + "ncbi_taxon_id": 9606, + "genus_species": { + "text": "Homo sapiens", + "ontology": "NCBITAXON_9606" + } +} \ No newline at end of file diff --git a/schema_test_files/10x_pbmc8k_sample_0.json b/schema_test_files/10x_pbmc8k_sample_0.json new file mode 100644 index 000000000..e8858452c --- /dev/null +++ b/schema_test_files/10x_pbmc8k_sample_0.json @@ -0,0 +1,20 @@ +{ + "core" : { + "schema_version" : "4.0.0", + "type" : "sample" + }, + "sample_id": "s1", + "name": "PBMCs", + "description": "peripheral blood mononuclear cells (PBMCs)", + "ncbi_taxon_id": 9606, + "specimen_from_organism" : { + "organ": { + "text": "blood", + "ontology": "UBERON_0000178" + }, + "body_part": { + "text": "peripheral blood mononuclear cells (PBMCs)" + } + }, + "derived_from": "d1" +} \ No newline at end of file diff --git a/schema_test_files/10x_pbmc8k_sample_bundle.json b/schema_test_files/10x_pbmc8k_sample_bundle.json new file mode 100644 index 000000000..8c2fb6a35 --- /dev/null +++ b/schema_test_files/10x_pbmc8k_sample_bundle.json @@ -0,0 +1,55 @@ +[ + { + "hca_ingest": { + "document_id": "a37dbd93-b93a-4838-a7bb-cd0796b3d9d0", + "submissionDate": "2017-10-13T09:18:49.422Z", + "updateDate": "2017-10-13T09:19:40.069Z", + "accession": "SAM0888697" + }, + "content": { + "core" : { + "schema_version" : "4.0.0", + "type" : "sample" + }, + "sample_id": "d1", + "donor" : { + "is_living": true, + "life_stage": "adult" + }, + "ncbi_taxon_id": 9606, + "genus_species": { + "text": "Homo sapiens", + "ontology": "NCBITAXON_9606" + } + } + }, + { + "hca_ingest" : { + "document_id": "a37dbd93-b93a-4838-a7bb-cd0796b3d9d1", + "submissionDate": "2017-10-13T09:18:49.422Z", + "updateDate": "2017-10-13T09:19:40.069Z", + "accession": "SAM0999697" + }, + "content" : { + "core" : { + "schema_version" : "4.0.0", + "type" : "sample" + }, + "sample_id": "s1", + "name": "PBMCs", + "description": "peripheral blood mononuclear cells (PBMCs)", + "ncbi_taxon_id": 9606, + "specimen_from_organism" : { + "organ": { + "text": "blood", + "ontology": "UBERON_0000178" + }, + "body_part": { + "text": "peripheral blood mononuclear cells (PBMCs)" + } + }, + "derived_from": "d1" + }, + "derived_from": "a37dbd93-b93a-4838-a7bb-cd0796b3d9d1" + } +] diff --git a/schema_tests/README.md b/schema_tests/README.md new file mode 100644 index 000000000..6d1d89d3e --- /dev/null +++ b/schema_tests/README.md @@ -0,0 +1,15 @@ +## Schema test files + +* For each schema to be tested: + * Add a directory to this folder named for the schema. + - e.g. to test sample.json, add a directory called 'sample'. + + * Add the name of the schema to conf.json + * Under this add a directory called 'pass' (for all instances that are meant + to pass), and a directory called 'fail' (for all instances that are meant + to fail). Any files with the .json extension in these directories will be + tested against the relevant schema. + + + + This behavior relies on /src/test.py. \ No newline at end of file diff --git a/schema_tests/conf.json b/schema_tests/conf.json new file mode 100644 index 000000000..f799166fc --- /dev/null +++ b/schema_tests/conf.json @@ -0,0 +1,4 @@ +{ "schemas_to_test": ["sample", + "sample_ingest", + "sample_bundle"] +} \ No newline at end of file diff --git a/schema_tests/sample/fail/sample-test-current.json b/schema_tests/sample/fail/sample-test-current.json new file mode 100644 index 000000000..a51fbf963 --- /dev/null +++ b/schema_tests/sample/fail/sample-test-current.json @@ -0,0 +1,75 @@ +{ + "core": { + "validationState": "Valid", + "uuid": "28b6cc77-6426-4686-b838-f845683fb320", + "accession": null, + "submissionDate": { + "date": "2017-09-29T14:47:00.395Z" + }, + "updateDate": { + "date": "2017-09-29T14:47:50.724Z" + }, + "events": [ + { + "endState": "Validating", + "originalState": "Draft", + "submissionDate": { + "date": "2017-09-29T14:47:40.669+0000" + } + }, + { + "endState": "Valid", + "originalState": "Validating", + "submissionDate": { + "date": "2017-09-29T14:47:50.724+0000" + } + } + ] + }, + "name": "Q3_DEMO-Single cell mRNA-seq_MGH30_A01", + "organ": { + "text": "glioblastoma" + }, + "body_part": { + "text": "brain" + }, + "donor_id": "Q3_DEMO-donor_MGH30", + "donor": { + "species": { + "text": "Homo sapiens", + "ontology": 9606 + }, + "is_living": "yes", + "core": { + "validationState": "Valid", + "uuid": "d9180483-1121-4d62-ae1c-fde00ba701c8", + "accession": null, + "submissionDate": { + "date": "2017-09-29T14:47:00.377Z" + }, + "updateDate": { + "date": "2017-09-29T14:47:51.699Z" + }, + "events": [ + { + "endState": "Validating", + "originalState": "Draft", + "submissionDate": { + "date": "2017-09-29T14:47:40.663+0000" + } + }, + { + "endState": "Valid", + "originalState": "Validating", + "submissionDate": { + "date": "2017-09-29T14:47:51.699+0000" + } + } + ] + }, + "ncbi_taxon": 9606, + "id": "Q3_DEMO-donor_MGH30" + }, + "id": "Q3_DEMO-sample_SAMN02797092", + "protocols": [] +} \ No newline at end of file diff --git a/schema_tests/sample/pass/10x_pbmc8k_donor_0.json b/schema_tests/sample/pass/10x_pbmc8k_donor_0.json new file mode 100644 index 000000000..b4ae3611c --- /dev/null +++ b/schema_tests/sample/pass/10x_pbmc8k_donor_0.json @@ -0,0 +1,14 @@ +{ + "id": "d1", + "type": "donor", + "donor" : { + "is_living": true, + "life_stage": "adult", + "species": { + "text": "Homo sapiens", + "ontology": "NCBITAXON_9606" + } + }, + "ncbi_taxon_id": 9606 + +} \ No newline at end of file diff --git a/schema_tests/sample/pass/10x_pbmc8k_sample_0.json b/schema_tests/sample/pass/10x_pbmc8k_sample_0.json new file mode 100644 index 000000000..f61b1471d --- /dev/null +++ b/schema_tests/sample/pass/10x_pbmc8k_sample_0.json @@ -0,0 +1,16 @@ +{ + "id": "s1", + "type": "sample_from_organism", + "name": "PBMCs", + "description": "peripheral blood mononuclear cells (PBMCs)", + "sample_from_organism" : { + "organ": { + "text": "blood", + "ontology": "UBERON_0000178" + }, + "body_part": { + "text": "peripheral blood mononuclear cells (PBMCs)" + }, + "donor_id": "d1" + } +} \ No newline at end of file diff --git a/schema_tests/sample/pass/donor_test1.json b/schema_tests/sample/pass/donor_test1.json new file mode 100644 index 000000000..127199770 --- /dev/null +++ b/schema_tests/sample/pass/donor_test1.json @@ -0,0 +1,16 @@ +{ + "core" : { + "id": "d1", + "type": "sample" + }, + "donor" : { + "is_living": true, + "life_stage": "adult", + "species": { + "text": "Homo sapiens", + "ontology": "NCBITAXON_9606" + } + }, + "ncbi_taxon_id": 9606 + +} \ No newline at end of file diff --git a/schema_tests/sample_bundle/pass/10x_pbmc8k_sample_bundle.json b/schema_tests/sample_bundle/pass/10x_pbmc8k_sample_bundle.json new file mode 100644 index 000000000..eac118891 --- /dev/null +++ b/schema_tests/sample_bundle/pass/10x_pbmc8k_sample_bundle.json @@ -0,0 +1,48 @@ +[ + { + "core": { + "document_id": "a37dbd93-b93a-4838-a7bb-cd0796b3d9d0", + "submissionDate": "2017-10-13T09:18:49.422Z", + "updateDate": "2017-10-13T09:19:40.069Z", + "accession": "SAM0888697" + }, + "content": { + "id": "s1", + "type": "sample_from_organism", + "name": "PBMCs", + "description": "peripheral blood mononuclear cells (PBMCs)", + "sample_from_organism": { + "organ": { + "text": "blood", + "ontology": "UBERON_0000178" + }, + "body_part": { + "text": "peripheral blood mononuclear cells (PBMCs)" + }, + "donor_id": "d1" + } + } + }, + { + "core" : { + "document_id": "a37dbd93-b93a-4838-a7bb-cd0796b3d9d1", + "submissionDate": "2017-10-13T09:18:49.422Z", + "updateDate": "2017-10-13T09:19:40.069Z", + "accession": "SAM0999697" + }, + "content" : { + "id": "d1", + "type": "donor", + "donor" : { + "is_living": true, + "life_stage": "adult", + "species": { + "text": "Homo sapiens", + "ontology": "NCBITAXON_9606" + } + }, + "ncbi_taxon_id": 9606 + }, + "derived_from": "a37dbd93-b93a-4838-a7bb-cd0796b3d9d1" + } +] diff --git a/schema_tests/sample_ingest/pass/10x_pbmc8k_donor_0-ingest.json b/schema_tests/sample_ingest/pass/10x_pbmc8k_donor_0-ingest.json new file mode 100644 index 000000000..0a949b1f5 --- /dev/null +++ b/schema_tests/sample_ingest/pass/10x_pbmc8k_donor_0-ingest.json @@ -0,0 +1,21 @@ +{ + "core" : { + "document_id": "a37dbd93-b93a-4838-a7bb-cd0796b3d9d1", + "submissionDate": "2017-10-13T09:18:49.422Z", + "updateDate": "2017-10-13T09:19:40.069Z", + "accession": "SAM0999697" + }, + "content" : { + "id": "d1", + "type": "donor", + "donor" : { + "is_living": true, + "life_stage": "adult", + "species": { + "text": "Homo sapiens", + "ontology": "NCBITAXON_9606" + } + }, + "ncbi_taxon_id": 9606 + } +} \ No newline at end of file diff --git a/schema_tests/sample_ingest/pass/10x_pbmc8k_sample_0-ingest.json b/schema_tests/sample_ingest/pass/10x_pbmc8k_sample_0-ingest.json new file mode 100644 index 000000000..a7bba3c92 --- /dev/null +++ b/schema_tests/sample_ingest/pass/10x_pbmc8k_sample_0-ingest.json @@ -0,0 +1,24 @@ +{ + "core" : { + "document_id": "a37dbd93-b93a-4838-a7bb-cd0796b3d9d0", + "submissionDate": "2017-10-13T09:18:49.422Z", + "updateDate": "2017-10-13T09:19:40.069Z", + "accession": "SAM0888697" + }, + "content" : { + "id": "s1", + "type": "sample_from_organism", + "name": "PBMCs", + "description": "peripheral blood mononuclear cells (PBMCs)", + "sample_from_organism" : { + "organ": { + "text": "blood", + "ontology": "UBERON_0000178" + }, + "body_part": { + "text": "peripheral blood mononuclear cells (PBMCs)" + }, + "donor_id": "d1" + } + } +} \ No newline at end of file diff --git a/src/js-validator/package.json b/src/js-validator/package.json new file mode 100644 index 000000000..18853aeb2 --- /dev/null +++ b/src/js-validator/package.json @@ -0,0 +1,11 @@ +{ + "name": "js-validator", + "version": "0.0.1", + "dependencies": { + "ajv": "^5.3.0", + "package.json": "^2.0.1" + }, + "scripts": { + "test": "mocha" + } +} diff --git a/src/js-validator/test/validate_json.js b/src/js-validator/test/validate_json.js new file mode 100644 index 000000000..3e5bb3c11 --- /dev/null +++ b/src/js-validator/test/validate_json.js @@ -0,0 +1,92 @@ +const Ajv = require('ajv'); +const fs = require('fs'); +const path = require('path') + +function fromDir(startPath,filter){ + var f = [] + if (!fs.existsSync(startPath)){ + console.log("no dir ",startPath); + return; + } + + var files=fs.readdirSync(startPath); + for(var i=0;i=0) { + console.log('-- found: ',filename); + f.push(filename) + }; + }; + return f; +}; + + +function createConfiguredAjv(){ + var ajv = new Ajv({allErrors: true, + verbose : true, + meta: false, // optional, to prevent adding draft-06 meta-schema + extendRefs: true, // optional, current default is to 'fail', spec behaviour is to 'ignore' + unknownFormats: 'ignore', // optional, current default is true (fail) + errorDataPath: 'property', + messages: true, + jsonPointers: false, + inlineRefs: false, + addUsedSchema: true, + passContext: true + }); + + var metaSchema = require('ajv/lib/refs/json-schema-draft-04.json'); + ajv.addMetaSchema(metaSchema); + ajv._opts.defaultMeta = metaSchema.id; + + // optional, using unversioned URI is out of spec, see https://github.com/json-schema-org/json-schema-spec/issues/216 + ajv._refs['http://json-schema.org/schema'] = 'http://json-schema.org/draft-04/schema'; + + // Optionally you can also disable keywords defined in draft-06 + ajv.removeKeyword('propertyNames'); + ajv.removeKeyword('contains'); + ajv.removeKeyword('const'); + + // add the schemas + var schemaFiles = fromDir("../../../json_schema/", ".json"); + + schemaFiles + .filter((schemaFile) => !schemaFile.includes("analysis.json")) + .forEach((schemaFile) => { + var schema = require(schemaFile); + schema.id = schemaFile; + ajv.addSchema(require(schemaFile), schemaFile); + }); + + return ajv; +} + +function getValidatorFunctionForSchema(ajvValidator, schemaPath){ + schema = require(schemaPath); + return ajvValidator.compile(schema); +} + + +var donor = require("../../../schema_tests/sample/pass/donor_test1.json"); +var sample = require("../../../schema_tests/sample/fail/sample-test-current.json"); + + +test(donor, "../../../json_schema/sample.json"); +test(sample, "../../../json_schema/sample.json"); + +function test(data, schema) { + var ajv = createConfiguredAjv() + var validator = getValidatorFunctionForSchema(ajv, schema) + var valid = validator(data); + if (valid) console.log('Valid!'); + else console.log('Invalid: ' + ajv.errorsText(validator.errors)); + + validator.errors.forEach((error) =>{ + console.log(error); + }); + +} diff --git a/src/ref_updater.py b/src/ref_updater.py new file mode 100644 index 000000000..0dc3fa043 --- /dev/null +++ b/src/ref_updater.py @@ -0,0 +1,70 @@ +from optparse import OptionParser +import os, json + + +def update_refs(args): + url = args.url + input = args.input + output = args.output + + + files = getJsonFiles(input) + + for f in files: + if ".json" in f: + updateJson(input, f, output, url) + +def updateJson(input, f, output, url): + with open(input + f) as data_file: + jsonData = json.load(data_file) + newJson = replaceUrl(jsonData, "$ref", url) + dumpJsonToFile(output, newJson, f) + + +def dumpJsonToFile(outputDir, object, name): + if outputDir: + dir = os.path.abspath(outputDir) + if not os.path.exists(dir): + os.makedirs(dir) + tmpFile = open(dir + "/" +name, "w") + tmpFile.write(json.dumps(object, indent=4)) + tmpFile.close() + + +def replaceUrl(jsonSchema, k, v): + for key in jsonSchema.keys(): + if key == k: + old = jsonSchema[key] + jsonSchema[key] = v + old + elif type(jsonSchema[key]) is dict: + replaceUrl(jsonSchema[key], k, v) + + return jsonSchema + + + + +def getJsonFiles(path): + if os.path.exists(path): + filesInDir = os.listdir(path) + + return filesInDir + + else: + print(path + " is not a valid directory") + exit(3) + + + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-u", "--url", dest="url", help="Full reference URL") + parser.add_option("-i", "--input", dest="input", help="Path to json files") + parser.add_option("-o", "--output", dest="output", help="Path for outout json") + + (options, args) = parser.parse_args() + if not options.url: + print("You must supply a full URL") + exit(2) + + update_refs(options) \ No newline at end of file diff --git a/src/sample_test.py b/src/sample_test.py new file mode 100644 index 000000000..23660c6ef --- /dev/null +++ b/src/sample_test.py @@ -0,0 +1,54 @@ +from schema_test_suite import get_validator, get_json_from_file, validate +import os +import sys +import subprocess + +""" +Sample test: Are JSON files that are in the +schema_test* folders valid JSON schema? +""" + +# Flag for tracking the exit status of validate() calls +status_flag = True + +os.chdir('../json_schema') +pwd = subprocess.check_output('pwd').decode("utf-8").rstrip() +base_uri = "file://" + pwd + "/" +print(base_uri) + +print('\nValidating sample.json') +sv = get_validator('sample.json', base_uri) + +# Specific schema tests follow + +print('\nValidating schema_test_files/10x_pbmc8k_donor_0.json') +dt1 = get_json_from_file('../schema_test_files/10x_pbmc8k_donor_0.json') +if not validate(sv, dt1): # will return False if fails (show return value) + status_flag = False + +print('\nValidating schema_test_files/10x_pbmc8k_sample_0.json') +sfo1 = get_json_from_file('../schema_test_files/10x_pbmc8k_sample_0.json') +if not validate(sv, sfo1): + status_flag = False + +print('\nValidating schema_tests/sample/fail/sample-test-current.json\n(This should fail)') +sf1 = get_json_from_file('../schema_tests/sample/fail/sample-test-current.json') +# This should fail. If it fails, keep status_flag = True +if validate(sv, sf1): + status_flag = False + +# Specific bundle tests follow + +print('\nValidating sample_bundle.json') +sample_bundle_validator = get_validator('sample_bundle.json', base_uri) + +print('\nValidating schema_test_files/10x_pbmc8k_sample_bundle.json') +sample_bundle_file = get_json_from_file('../schema_test_files/10x_pbmc8k_sample_bundle.json') +if not validate(sample_bundle_validator, sample_bundle_file): + status_flag = False + +# If any of the validate() calls fail, set exit status to 1. +# Failed validate() calls on things that are supposed to fail will not affect exit status. +# Without the following line, failed validate() will result in exit status 0, which is not desirable. +if not status_flag: + sys.exit(1) diff --git a/src/schema_test_suite.py b/src/schema_test_suite.py new file mode 100644 index 000000000..b8abbd353 --- /dev/null +++ b/src/schema_test_suite.py @@ -0,0 +1,81 @@ +import json +from jsonschema import Draft4Validator, RefResolver, SchemaError +import warnings +import subprocess +import os +import glob + + +def get_json_from_file(filename, warn = False): + """Loads json from a file. + Optionally specify warn = True to warn, rather than + fail if file not found.""" + f = open(filename, 'r') + return json.loads(f.read()) + +def get_validator(filename, base_uri = ''): + """Load schema from JSON file; + Check whether it's a valid schema; + Return a Draft4Validator object. + Optionally specify a base URI for relative path + resolution of JSON pointers (This is especially useful + for local resolution via base_uri of form file://{some_path}/) + """ + + schema = get_json_from_file(filename) + try: + # Check schema via class method call. Works, despite IDE complaining + Draft4Validator.check_schema(schema) + print("Schema %s is valid" % filename) + except SchemaError: + raise + if base_uri: + resolver = RefResolver(base_uri = base_uri, + referrer = filename) + else: + resolver = None + return Draft4Validator(schema = schema, + resolver = resolver) + +def validate(validator, instance): + """Validate an instance of a schema and report errors.""" + if validator.is_valid(instance): + print("Validation Passes") + return True + else: + es = validator.iter_errors(instance) + recurse_through_errors(es) + print("Validation Fails") + return False + +def recurse_through_errors(es, level = 0): + """Recurse through errors posting message + and schema path until context is empty""" + # Assuming blank context is a sufficient escape clause here. + for e in es: + warnings.warn( + "***"*level + "subschema level " + str(level) + "\t".join([e.message, + "Path to error:" + str(e.absolute_schema_path)]) + "\n") + if e.context: + level += 1 + recurse_through_errors(e.context, level = level) + + +def test_local(path_to_schema_dir, schema_file, test_dir): + """Tests all instances in a test_folder against a single schema. + Assumes all schema files in single dir. + Assumes all *.json files in the test_dir should validate against the schema. + * path_to_schema_dir: Absolute or relative path to schema dir + * schema_file: schema file name + * test_dir: path to test directory (absolute or local to schema dir) + """ + os.chdir(path_to_schema_dir) + pwd = subprocess.check_output('pwd').decode("utf-8").rstrip() + base_uri = "file://" + pwd + "/" + sv = get_validator(schema_file, base_uri) + test_files = glob.glob(pathname=test_dir + '/*.json') + #print("Found test files: %s in %s" % (str(test_files), test_dir)) + for instance_file in test_files: + i = get_json_from_file(instance_file) + print("Testing: %s" % instance_file) + validate(sv, i) diff --git a/src/schema_tests.py b/src/schema_tests.py new file mode 100644 index 000000000..f36bf54a2 --- /dev/null +++ b/src/schema_tests.py @@ -0,0 +1,40 @@ +import unittest +from schema_test_suite import get_validator, validate +import os +import subprocess + +# This => instant tests, but still not sure it is best way to manage. +# Might be better to have configs mapping schemas to tests. +# Or could run the whole thing with a generic script using a +# standard directory struc and file name convention (brittle to dir +# restructuring). Even with that, probably need a config to list which +# tests to run. + + +def get_local_base_uri(script_to_json_relative_path): + """ + Takes the relative path from script to json schema files as an input. + Switches script to run in json schema folder. + returns base URI for schema folder on local file system + """ + # Might be better done in shell as part of travis job and stored as env? + os.chdir(script_to_json_relative_path) + pwd = subprocess.check_output('pwd').decode("utf-8").rstrip() + return "file://" + pwd + '/' + +class TestSchemas(unittest.TestCase): + + def setUp(self): + # Need a better way to manage paths. Nasty bit of hard wiring for now + base_uri = get_local_base_uri('../jsonschema/') + + def positive_tests(self): + v = get_validator('sample.json', self.base_uri) + validate(v, {}) + return + + def negative_tests(self): + return + + + \ No newline at end of file diff --git a/src/simple_test.py b/src/simple_test.py new file mode 100644 index 000000000..c9149a089 --- /dev/null +++ b/src/simple_test.py @@ -0,0 +1,15 @@ +from schema_test_suite import get_validator +import glob + +""" +Simple test: Are JSON schemas that are in the +json_schema folder valid JSON schema? +Specific schema tests to follow. +""" + +# a*.json skipped for now +schemas = glob.glob('../json_schema/' +"[b-z]*.json") + +for s in schemas: + print("Checking whether %s is a valid json schema" % s) + get_validator(s) diff --git a/src/test.py b/src/test.py new file mode 100644 index 000000000..aa67a954e --- /dev/null +++ b/src/test.py @@ -0,0 +1,24 @@ +from schema_test_suite import test_local, get_json_from_file +import re +import os + +# Could move these into config +path_to_schemas = '../json_schema' +path_to_test = '../schema_tests/' + +# Assume all dir in test directory not +# beginning with a '.' are the names of schemas to test. +# (Note - maybe simpler to use glob?) +dtree = list(os.walk(top = path_to_test)) +dlist = dtree[0][1] +schemas_to_test = [] +for d in dlist: + if not re.match('\..+', d): + schemas_to_test.append(d) + +valid = True +for s in schemas_to_test: + print("Checking whether %s.json is a valid json schema" % s) + test_local(path_to_schema_dir = path_to_schemas , + schema_file = s + '.json', + test_dir = path_to_test + s + '/pass/') \ No newline at end of file