diff --git a/.buildinfo b/.buildinfo new file mode 100644 index 0000000..7dbc837 --- /dev/null +++ b/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file records the configuration used when building these files. When it is not found, a full rebuild will be done. +config: acfbbaecd20da8e792384ef7f306c5ac +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/bkbit.cli.doctree b/.doctrees/bkbit.cli.doctree new file mode 100644 index 0000000..a983148 Binary files /dev/null and b/.doctrees/bkbit.cli.doctree differ diff --git a/.doctrees/bkbit.data_translators.anatomical_structure_translator.doctree b/.doctrees/bkbit.data_translators.anatomical_structure_translator.doctree new file mode 100644 index 0000000..a220428 Binary files /dev/null and b/.doctrees/bkbit.data_translators.anatomical_structure_translator.doctree differ diff --git a/.doctrees/bkbit.data_translators.doctree b/.doctrees/bkbit.data_translators.doctree new file mode 100644 index 0000000..ace54fa Binary files /dev/null and b/.doctrees/bkbit.data_translators.doctree differ diff --git a/.doctrees/bkbit.data_translators.file_manifest_translator.doctree b/.doctrees/bkbit.data_translators.file_manifest_translator.doctree new file mode 100644 index 0000000..9a67a37 Binary files /dev/null and b/.doctrees/bkbit.data_translators.file_manifest_translator.doctree differ diff --git a/.doctrees/bkbit.data_translators.genome_annotation_translator.doctree b/.doctrees/bkbit.data_translators.genome_annotation_translator.doctree new file mode 100644 index 0000000..67d2fca Binary files /dev/null and b/.doctrees/bkbit.data_translators.genome_annotation_translator.doctree differ diff --git a/.doctrees/bkbit.data_translators.library_generation_translator.doctree b/.doctrees/bkbit.data_translators.library_generation_translator.doctree new file mode 100644 index 0000000..cb83ff1 Binary files /dev/null and b/.doctrees/bkbit.data_translators.library_generation_translator.doctree differ diff --git a/.doctrees/bkbit.data_translators.specimen_metadata_translator.doctree b/.doctrees/bkbit.data_translators.specimen_metadata_translator.doctree new file mode 100644 index 0000000..8795bfb Binary files /dev/null and b/.doctrees/bkbit.data_translators.specimen_metadata_translator.doctree differ diff --git a/.doctrees/bkbit.doctree b/.doctrees/bkbit.doctree new file mode 100644 index 0000000..8f266cf Binary files /dev/null and b/.doctrees/bkbit.doctree differ diff --git a/.doctrees/bkbit.model_converters.doctree b/.doctrees/bkbit.model_converters.doctree new file mode 100644 index 0000000..9a09690 Binary files /dev/null and b/.doctrees/bkbit.model_converters.doctree differ diff --git a/.doctrees/bkbit.model_converters.sheets_converter.doctree b/.doctrees/bkbit.model_converters.sheets_converter.doctree new file mode 100644 index 0000000..53326da Binary files /dev/null and b/.doctrees/bkbit.model_converters.sheets_converter.doctree differ diff --git a/.doctrees/bkbit.model_converters.yaml2sheet_converter.doctree b/.doctrees/bkbit.model_converters.yaml2sheet_converter.doctree new file mode 100644 index 0000000..74b9a61 Binary files /dev/null and b/.doctrees/bkbit.model_converters.yaml2sheet_converter.doctree differ diff --git a/.doctrees/bkbit.model_editors.add_dunderMethods_genomeAnnotation.doctree b/.doctrees/bkbit.model_editors.add_dunderMethods_genomeAnnotation.doctree new file mode 100644 index 0000000..7330b96 Binary files /dev/null and b/.doctrees/bkbit.model_editors.add_dunderMethods_genomeAnnotation.doctree differ diff --git a/.doctrees/bkbit.model_editors.doctree b/.doctrees/bkbit.model_editors.doctree new file mode 100644 index 0000000..6b13075 Binary files /dev/null and b/.doctrees/bkbit.model_editors.doctree differ diff --git a/.doctrees/bkbit.model_editors.linkml_trimmer.doctree b/.doctrees/bkbit.model_editors.linkml_trimmer.doctree new file mode 100644 index 0000000..f02963d Binary files /dev/null and b/.doctrees/bkbit.model_editors.linkml_trimmer.doctree differ diff --git a/.doctrees/bkbit.models.anatomical_structure.doctree b/.doctrees/bkbit.models.anatomical_structure.doctree new file mode 100644 index 0000000..5320398 Binary files /dev/null and b/.doctrees/bkbit.models.anatomical_structure.doctree differ diff --git a/.doctrees/bkbit.models.doctree b/.doctrees/bkbit.models.doctree new file mode 100644 index 0000000..dd73a17 Binary files /dev/null and b/.doctrees/bkbit.models.doctree differ diff --git a/.doctrees/bkbit.models.genome_annotation.doctree b/.doctrees/bkbit.models.genome_annotation.doctree new file mode 100644 index 0000000..ba78308 Binary files /dev/null and b/.doctrees/bkbit.models.genome_annotation.doctree differ diff --git a/.doctrees/bkbit.models.library_generation.doctree b/.doctrees/bkbit.models.library_generation.doctree new file mode 100644 index 0000000..e75cc12 Binary files /dev/null and b/.doctrees/bkbit.models.library_generation.doctree differ diff --git a/.doctrees/bkbit.utils.doctree b/.doctrees/bkbit.utils.doctree new file mode 100644 index 0000000..30fa2f2 Binary files /dev/null and b/.doctrees/bkbit.utils.doctree differ diff --git a/.doctrees/bkbit.utils.get_ncbi_taxonomy.doctree b/.doctrees/bkbit.utils.get_ncbi_taxonomy.doctree new file mode 100644 index 0000000..207081f Binary files /dev/null and b/.doctrees/bkbit.utils.get_ncbi_taxonomy.doctree differ diff --git a/.doctrees/bkbit.utils.load_json.doctree b/.doctrees/bkbit.utils.load_json.doctree new file mode 100644 index 0000000..e92b9a0 Binary files /dev/null and b/.doctrees/bkbit.utils.load_json.doctree differ diff --git a/.doctrees/bkbit.utils.nimp_api_endpoints.doctree b/.doctrees/bkbit.utils.nimp_api_endpoints.doctree new file mode 100644 index 0000000..aa33578 Binary files /dev/null and b/.doctrees/bkbit.utils.nimp_api_endpoints.doctree differ diff --git a/.doctrees/bkbit.utils.setup_logger.doctree b/.doctrees/bkbit.utils.setup_logger.doctree new file mode 100644 index 0000000..b65a423 Binary files /dev/null and b/.doctrees/bkbit.utils.setup_logger.doctree differ diff --git a/.doctrees/contributing.doctree b/.doctrees/contributing.doctree new file mode 100644 index 0000000..fc5f07a Binary files /dev/null and b/.doctrees/contributing.doctree differ diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle new file mode 100644 index 0000000..8f867f8 Binary files /dev/null and b/.doctrees/environment.pickle differ diff --git a/.doctrees/genome_annotation.doctree b/.doctrees/genome_annotation.doctree new file mode 100644 index 0000000..0549b29 Binary files /dev/null and b/.doctrees/genome_annotation.doctree differ diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree new file mode 100644 index 0000000..aaacc2b Binary files /dev/null and b/.doctrees/index.doctree differ diff --git a/.doctrees/install.doctree b/.doctrees/install.doctree new file mode 100644 index 0000000..f40dadb Binary files /dev/null and b/.doctrees/install.doctree differ diff --git a/.doctrees/linkml_trimmer.doctree b/.doctrees/linkml_trimmer.doctree new file mode 100644 index 0000000..ebbce04 Binary files /dev/null and b/.doctrees/linkml_trimmer.doctree differ diff --git a/.doctrees/modules.doctree b/.doctrees/modules.doctree new file mode 100644 index 0000000..6dc4126 Binary files /dev/null and b/.doctrees/modules.doctree differ diff --git a/.doctrees/quickstart.doctree b/.doctrees/quickstart.doctree new file mode 100644 index 0000000..e3493af Binary files /dev/null and b/.doctrees/quickstart.doctree differ diff --git a/.doctrees/specimen_file_manifest.doctree b/.doctrees/specimen_file_manifest.doctree new file mode 100644 index 0000000..92052e1 Binary files /dev/null and b/.doctrees/specimen_file_manifest.doctree differ diff --git a/.doctrees/specimen_metadata.doctree b/.doctrees/specimen_metadata.doctree new file mode 100644 index 0000000..44bc9ef Binary files /dev/null and b/.doctrees/specimen_metadata.doctree differ diff --git a/.doctrees/spreadsheet_converter.doctree b/.doctrees/spreadsheet_converter.doctree new file mode 100644 index 0000000..710ce47 Binary files /dev/null and b/.doctrees/spreadsheet_converter.doctree differ diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/_modules/bkbit/data_translators/anatomical_structure_translator.html b/_modules/bkbit/data_translators/anatomical_structure_translator.html new file mode 100644 index 0000000..4030525 --- /dev/null +++ b/_modules/bkbit/data_translators/anatomical_structure_translator.html @@ -0,0 +1,326 @@ + + + + + + + + bkbit.data_translators.anatomical_structure_translator — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.data_translators.anatomical_structure_translator

+import csv
+import inspect
+import os
+import json
+from bkbit.models import anatomical_structure as ans
+
+
+[docs] +class AnS(): + def __init__(self): + self.anatomical_annotation_set = [] + self.anatomical_space = [] + self.image_dataset = [] + self.parcellation_annotation = [] + self.parcellation_annotation_term_map = [] + self.parcellation_atlas = [] + self.parcellation_color_assignment = [] + self.parcellation_terminology = [] + self.parcellation_term_set = [] + self.parcellation_term = [] + self.parcellation_color_scheme = [] + self.func_header_mapping = self.__class__.assign_func_header() + +
+[docs] + @classmethod + def assign_func_header(cls): + mapping = {} + for f in dir(cls): + func = getattr(cls, f) + if inspect.isfunction(func): + func_params = inspect.signature(func).parameters.keys() + filtered_params = frozenset(param for param in func_params if param != 'self') + mapping[filtered_params] = func + return mapping
+ + + +
+[docs] + def read_data(self, file_name): + + with open(file_name, mode='r', encoding='utf-8') as file: + # Create a CSV reader object + reader = csv.reader(file) + + # Read the first row to get column names + column_names = next(reader) + # Find corresponding 'generate' function + func = self.func_header_mapping.get(frozenset(column_names)) + if func: + # Iterate through each row in the CSV file + for row in reader: + # Create a dictionary to store key-value pairs for each row + row_data = {} + for column_name, data in zip(column_names, row): + row_data[column_name] = data + # Generate appropriate data object + func(self, **row_data)
+ + +
+[docs] + def provide_data(self, dir_path, output_file_name): + for file in os.listdir(dir_path): + if file.endswith(".csv"): + self.read_data(os.path.join(dir_path, file)) + jsonld_file = os.path.join(dir_path, output_file_name) + self.serialize_to_jsonld(jsonld_file)
+ + + + +
+[docs] + def generate_parcellation_atlas(self, label,name,description,specialization_of,revision_of,version,anatomical_space_label,anatomical_annotation_set_label,parcellation_terminology_label): + parcellation_atlas = ans.ParcellationAtlas(id=label, name=name, description=description, specialization_of=specialization_of, revision_of=revision_of, version=version, has_anatomical_space=anatomical_space_label, has_anatomical_annotation_set=anatomical_annotation_set_label, has_parcellation_terminology=parcellation_terminology_label) + self.parcellation_atlas.append(parcellation_atlas) + return parcellation_atlas
+ + +
+[docs] + def generate_anatomical_space(self, label, name, description, version, image_dataset_label): + anat_space = ans.AnatomicalSpace(id=label, name=name, description=description, version=version, measures=image_dataset_label) + self.anatomical_space.append(anat_space) + return anat_space
+ + +
+[docs] + def generate_anatomical_annotation_set(self, label, name, description, revision_of, version, anatomical_space_label): + anat_annot_set = ans.AnatomicalAnnotationSet(id=label, name=name, description=description, revision_of=revision_of, version=version, parameterizes=anatomical_space_label) + self.anatomical_annotation_set.append(anat_annot_set) + return anat_annot_set
+ + +
+[docs] + def generate_parcellation_annotation(self, internal_identifier, anatomical_annotation_set_label, voxel_count): + parcellation_annotation = ans.ParcellationAnnotation(internal_identifier=internal_identifier, part_of_anatomical_annotation_set=anatomical_annotation_set_label, voxel_count=voxel_count) + self.parcellation_annotation.append(parcellation_annotation) + return parcellation_annotation
+ + + +
+[docs] + def generate_parcellation_terminology(self, label, name, description, revision_of, version): + parcellation_terminology = ans.ParcellationTerminology(id=label, name=name, description=description, revision_of=revision_of, version=version) + self.parcellation_terminology.append(parcellation_terminology) + return parcellation_terminology
+ + +
+[docs] + def generate_parcellation_term_set(self, label, name, description, parcellation_terminology_label, parcellation_term_set_order, parcellation_parent_term_set_label): + parcellation_term_set = ans.ParcellationTermSet(id=label, name=name, description=description, part_of_parcellation_terminology=parcellation_terminology_label, ordinal=parcellation_term_set_order, has_parent_parcellation_term_set=parcellation_parent_term_set_label) + self.parcellation_term_set.append(parcellation_term_set) + return parcellation_term_set
+ + +
+[docs] + def generate_parcellation_term(self, name, symbol, description, parcellation_term_set_label, parcellation_terminology_label, parcellation_term_identifier, parcellation_term_order, parcellation_parent_term_set_label, parcellation_parent_term_identifier): + parcellation_term = ans.ParcellationTerm(id=parcellation_term_identifier, name=name, symbol=symbol, description=description, part_of_parcellation_term_set = parcellation_term_set_label, ordinal = parcellation_term_order, has_parent_parcellation_term = parcellation_parent_term_identifier) + self.parcellation_term.append(parcellation_term) + return parcellation_term
+ + +
+[docs] + def generate_parcellation_annotation_term_map(self, internal_identifier,anatomical_annotation_set_label,parcellation_term_identifier,parcellation_term_set_label,parcellation_terminology_label): + parcellation_annotation_term_map = ans.ParcellationAnnotationTermMap(subject_parcellation_annotation=internal_identifier, subject_parcellation_term=parcellation_term_identifier) + self.parcellation_annotation_term_map.append(parcellation_annotation_term_map) + return parcellation_annotation_term_map
+ + +
+[docs] + def generate_parcellation_color_scheme(self, label, name, description, revision_of, version, parcellation_terminology_label): + parcellation_color_scheme = ans.ParcellationColorScheme(id=label, name=name, description=description, revision_of=revision_of, version=version, subject_parcellation_terminology=parcellation_terminology_label) + self.parcellation_color_scheme.append(parcellation_color_scheme) + return parcellation_color_scheme
+ + +
+[docs] + def generate_parcellation_color_assignment(self, parcellation_color_scheme_label,parcellation_term_identifier,parcellation_terminology_label,color_hex_triplet): + parcellation_color_assignment = ans.ParcellationColorAssignment(part_of_parcellation_color_scheme=parcellation_color_scheme_label, subject_parcellation_term=parcellation_term_identifier, color=color_hex_triplet) + self.parcellation_color_assignment.append(parcellation_color_assignment) + return parcellation_color_assignment
+ + +
+[docs] + def generate_image_dataset(self, label, name, description, revision_of, version, x_direction, y_direction, z_direction, x_size, y_size, z_size, x_resolution, y_resolution, z_resolution, unit): + image_dataset = ans.ImageDataset(id=label, name=name, description=description, version=version, revision_of=revision_of, x_direction=getattr(ans.ANATOMICALDIRECTION, x_direction.replace('-','_')), y_direction=getattr(ans.ANATOMICALDIRECTION, y_direction.replace('-', '_')), z_direction=getattr(ans.ANATOMICALDIRECTION, z_direction.replace('-', '_')), x_size=x_size, y_size=y_size, z_size=z_size, x_resolution=x_resolution, y_resolution=y_resolution, z_resolution=z_resolution, unit=getattr(ans.DISTANCEUNIT, unit)) + self.image_dataset.append(image_dataset) + return image_dataset
+ + + + +
+[docs] + def serialize_to_jsonld( + self, output_file: str, exclude_none: bool = True, exclude_unset: bool = False + ): + """ + Serialize the object and write it to the specified output file. + + Parameters: + output_file (str): The path of the output file. + + Returns: + None + """ + with open(output_file, "w", encoding="utf-8") as f: + data = [] + # for obj in self.generated_objects.values(): + # # data.append(obj.to_dict(exclude_none=exclude_none, exclude_unset=exclude_unset)) + # data.append(obj.__dict__) + data.extend([obj.__dict__ for obj in self.anatomical_annotation_set]) + data.extend([obj.__dict__ for obj in self.anatomical_space]) + data.extend([obj.__dict__ for obj in self.image_dataset]) + data.extend([obj.__dict__ for obj in self.parcellation_annotation]) + data.extend([obj.__dict__ for obj in self.parcellation_annotation_term_map]) + data.extend([obj.__dict__ for obj in self.parcellation_atlas]) + data.extend([obj.__dict__ for obj in self.parcellation_color_assignment]) + data.extend([obj.__dict__ for obj in self.parcellation_color_scheme]) + data.extend([obj.__dict__ for obj in self.parcellation_terminology]) + data.extend([obj.__dict__ for obj in self.parcellation_term_set]) + data.extend([obj.__dict__ for obj in self.parcellation_term]) + + + output_data = { + "@context": "https://raw.githubusercontent.com/brain-bican/models/main/jsonld-context-autogen/anatomical_structure.context.jsonld", + "@graph": data, + } + f.write(json.dumps(output_data, indent=2))
+
+ + + + + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/data_translators/genome_annotation_translator.html b/_modules/bkbit/data_translators/genome_annotation_translator.html new file mode 100644 index 0000000..7eda0f8 --- /dev/null +++ b/_modules/bkbit/data_translators/genome_annotation_translator.html @@ -0,0 +1,1098 @@ + + + + + + + + bkbit.data_translators.genome_annotation_translator — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.data_translators.genome_annotation_translator

+"""
+Module for downloading, parsing, and processing GFF3 files from NCBI and Ensembl repositories. This module provides functionality to:
+
+1. Download a GFF3 file from a specified URL and calculate its checksums.
+2. Parse the GFF3 file to extract gene annotations.
+3. Generate various metadata objects such as organism taxon, genome assembly, and genome annotation.
+4. Serialize the extracted information into JSON-LD format for further use.
+
+Classes:
+    Gff3: The Gff3 class is designed to handle the complete lifecycle of downloading, parsing, and processing GFF3 files from NCBI or Ensembl repositories. It extracts gene annotations and serializes the data into JSON-LD format.
+
+Functions:
+    gff2jsonld: The gff2jsonld function is responsible for creating GeneAnnotation objects from a provided GFF3 file and serializing the extracted information into the JSON-LD format.
+
+Usage:
+    The module can be run as a standalone script by executing it with appropriate arguments and options:
+    
+    ```
+    python genome_annotation_translator.py <content_url> -a <assembly_accession> -s <assembly_strain> -l <log_level> -f
+    ```
+    
+    The script will download the GFF3 file from the specified URL, parse it, and serialize the extracted information into JSON-LD format.
+
+Example:
+    ```
+    python genome_annotation_translator.py "https://example.com/path/to/gff3.gz" -a "GCF_000001405.39" -s "strain_name" -l "INFO" -f True
+    ```
+    
+Dependencies:
+    - re
+    - hashlib
+    - tempfile
+    - uuid
+    - urllib
+    - urllib.request
+    - urllib.parse
+    - os
+    - json
+    - datetime
+    - collections.defaultdict
+    - subprocess
+    - gzip
+    - tqdm
+    - click
+    - pkg_resources
+    - bkbit.models.genome_annotation as ga
+    - bkbit.utils.setup_logger as setup_logger
+    - bkbit.utils.load_json as load_json    
+"""
+
+import re
+import hashlib
+import tempfile
+import uuid
+import urllib
+import urllib.request
+from urllib.parse import urlparse
+import os
+import json
+from datetime import datetime
+from collections import defaultdict
+import subprocess
+import gzip
+import sys
+from tqdm import tqdm
+import click
+import pkg_resources
+from bkbit.models import genome_annotation as ga
+from bkbit.utils.setup_logger import setup_logger
+from bkbit.utils.load_json import load_json
+
+
+
+## CONSTANTS ##
+
+PREFIX_MAP = {
+    "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
+    "NCBIGene": "http://identifiers.org/ncbigene/",
+    "ENSEMBL": "http://identifiers.org/ensembl/",
+    "NCBIAssembly": "https://www.ncbi.nlm.nih.gov/assembly/",
+}
+NCBI_GENE_ID_PREFIX = "NCBIGene"
+ENSEMBL_GENE_ID_PREFIX = "ENSEMBL"
+TAXON_PREFIX = "NCBITaxon"
+ASSEMBLY_PREFIX = "NCBIAssembly"
+BICAN_ANNOTATION_PREFIX = "bican:annotation-"
+GENOME_ANNOTATION_DESCRIPTION_FORMAT = (
+    "{authority} {taxon_scientific_name} Annotation Release {genome_version}"
+)
+DEFAULT_FEATURE_FILTER = ("gene", "pseudogene", "ncRNA_gene")
+DEFAULT_HASH = ("MD5",)
+LOG_FILE_NAME = (
+    "gff3_translator_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + ".log"
+)
+TAXON_DIR_PATH = "../utils/ncbi_taxonomy/"
+SCIENTIFIC_NAME_TO_TAXONID_PATH = pkg_resources.resource_filename(__name__, TAXON_DIR_PATH + "scientific_name_to_taxid.json")
+TAXON_SCIENTIFIC_NAME_PATH = pkg_resources.resource_filename(__name__, TAXON_DIR_PATH + "taxid_to_scientific_name.json")
+TAXON_COMMON_NAME_PATH = pkg_resources.resource_filename(__name__, TAXON_DIR_PATH + "taxid_to_common_name.json")
+
+
+[docs] +class Gff3: + """ + The Gff3 class is responsible for downloading, parsing, and processing of GFF3 files from NCBI and Ensembl repositories. + + Attributes: + content_url (str): The URL of the GFF file. + assembly_accession (str): The ID of the genome assembly. + assembly_strain (str, optional): The strain of the genome assembly. Defaults to None. + log_level (str): The logging level. Defaults to 'WARNING'. + log_to_file (bool): Flag to log messages to a file. Defaults to False. + + Methods: + __init__(content_url, assembly_accession=None, assembly_strain=None, log_level="WARNING", log_to_file=False): + Initializes the Gff3 class with the provided parameters. + + parse_url(): + Parses the content URL and extracts information about the genome annotation. + + __download_gff_file(): + Downloads a GFF file from a given URL and calculates the MD5, SHA256, and SHA1 hashes. + + generate_organism_taxon(taxon_id): + Generates an organism taxon object based on the provided taxon ID. + + assign_authority_type(authority): + Assigns the authority type based on the given authority string. + + generate_genome_assembly(assembly_id, assembly_version, assembly_label, assembly_strain=None): + Generates a genome assembly object based on the provided parameters. + + generate_genome_annotation(genome_label, genome_version): + Generates a genome annotation object based on the provided parameters. + + generate_digest(hash_values, hash_functions=DEFAULT_HASH): + Generates checksum digests for the GFF file using the specified hash functions. + + __get_line_count(file_path): + Returns the line count of a file. + + parse(feature_filter=DEFAULT_FEATURE_FILTER): + Parses the GFF file and extracts gene annotations based on the provided feature filter. + + generate_ensembl_gene_annotation(attributes, curr_line_num): + Generates a GeneAnnotation object for Ensembl based on the provided attributes. + + generate_ncbi_gene_annotation(attributes, curr_line_num): + Generates a GeneAnnotation object for NCBI based on the provided attributes. + + __get_attribute(attributes, attribute_name, curr_line_num): + Retrieves the value of a specific attribute from the given attributes dictionary. + + __resolve_ncbi_gene_annotation(new_gene_annotation, curr_line_num): + Resolves conflicts between existing and new gene annotations based on certain conditions. + + __merge_values(t): + Merges values from a list of lists into a dictionary of sets. + + serialize_to_jsonld(exclude_none=True, exclude_unset=False): + Serializes the object and either writes it to the specified output file or prints it to the CLI. + """ + +
+[docs] + def __init__( + self, + content_url, + assembly_accession=None, + assembly_strain=None, + log_level="WARNING", + log_to_file=False, + ): + """ + Initializes an instance of the GFFTranslator class. + + Parameters: + - content_url (str): The URL of the GFF file. + - assembly_id (str): The ID of the genome assembly. + - assembly_strain (str, optional): The strain of the genome assembly. Defaults to None. + - hash_functions (tuple[str]): A tuple of hash functions to use for generating checksums. Defaults to ('MD5'). + """ + self.logger = setup_logger(LOG_FILE_NAME, log_level, log_to_file) + try: + self.scientific_name_to_taxonid = load_json(SCIENTIFIC_NAME_TO_TAXONID_PATH) + self.taxon_scientific_name = load_json(TAXON_SCIENTIFIC_NAME_PATH) + self.taxon_common_name = load_json(TAXON_COMMON_NAME_PATH) + except FileNotFoundError as e: + self.logger.critical("NCBI Taxonomy not downloaded. Run 'bkbit download-ncbi-taxonomy' command first." ) + print(e) + sys.exit(2) + + self.content_url = content_url + + ## STEP 1: Parse the content URL to get metadata + # Parse content_url to get metadata + url_metadata = self.parse_url() + if url_metadata is None: + self.logger.critical( + "The provided content URL is not supported. Please provide a valid URL." + ) + raise ValueError( + "The provided content URL is not supported. Please provide a valid URL." + ) + + # Define variables to store metadata + ( + taxon_id, + assembly_id, + assembly_version, + assembly_label, + genome_label, + genome_version, + ) = (None, None, None, None, None, None) + + # Assign the authority type + self.authority = url_metadata.get("authority") + + # Assign the taxon_id and assembly_id based on the authority + if self.authority.value == ga.AuthorityType.NCBI.value: + taxon_id = url_metadata.get("taxonid") + assembly_id = url_metadata.get("assembly_accession") + elif self.authority.value == ga.AuthorityType.ENSEMBL.value: + taxon_id = self.scientific_name_to_taxonid.get( + url_metadata.get("scientific_name").replace("_", " ") + ) + if assembly_accession is None: + self.logger.critical( + "The assembly ID is required for Ensembl URLs. Please provide the assembly ID." + ) + raise ValueError( + "The assembly ID is required for Ensembl URLs. Please provide the assembly ID." + ) + assembly_id = assembly_accession + + # Assign assembly_version, assembly_label, genome_version, and genome_label + assembly_version = ( + assembly_id.split(".")[1] if len(assembly_id.split(".")) >= 1 else None + ) + assembly_label = url_metadata.get("assembly_name") + genome_version = url_metadata.get("release_version") + genome_label = self.authority.value + "-" + taxon_id + "-" + genome_version + + ## STEP 2: Download the GFF file + # Download the GFF file + self.gff_file, hash_values = self.__download_gff_file() + + ## STEP 3: Generate the organism taxon, genome assembly, checksums, and genome annotation objects + # Generate the organism taxon object + self.organism_taxon = self.generate_organism_taxon(taxon_id) + self.genome_assembly = self.generate_genome_assembly( + assembly_id, assembly_version, assembly_label, assembly_strain + ) + self.checksums = self.generate_digest(hash_values, DEFAULT_HASH) + self.genome_annotation = self.generate_genome_annotation( + genome_label, genome_version + ) + + self.gene_annotations = {}
+ + +
+[docs] + def parse_url(self): + """ + Parses the content URL and extracts information about the genome annotation. + + Returns: + A dictionary containing the following information: + - 'authority': The authority type (NCBI or ENSEMBL). + - 'taxonid': The taxon ID of the genome. + - 'release_version': The release version of the genome annotation. + - 'assembly_accession': The assembly accession of the genome. + - 'assembly_name': The name of the assembly. + - 'species': The species name (only for ENSEMBL URLs). + """ + # Define regex patterns for NCBI and Ensembl URLs + # NCBI : [assembly accession.version]_[assembly name]_[content type].[optional format] + # ENSEMBL : <species>.<assembly>.<_version>.gff3.gz -> organism full name, assembly name, genome version + ncbi_pattern = r"/genomes/all/annotation_releases/(\d+)(?:/(\d+))?/(GCF_\d+\.\d+)[_-]([^/]+)/(GCF_\d+\.\d+)[_-]([^/]+)_genomic\.gff\.gz" + ensembl_pattern = ( + r"/pub/release-(\d+)/gff3/([^/]+)/([^/.]+)\.([^/.]+)\.([^/.]+)\.gff3\.gz" + ) + + # Parse the URL to get the path + parsed_url = urlparse(self.content_url) + path = parsed_url.path + + # Determine if the URL is from NCBI or Ensembl and extract information + if "ncbi" in parsed_url.netloc: + ncbi_match = re.search(ncbi_pattern, path) + if ncbi_match: + return { + "authority": ga.AuthorityType.NCBI, + "taxonid": ncbi_match.group(1), + "release_version": ( + ncbi_match.group(2) + if ncbi_match.group(2) + else ncbi_match.group(4) + ), + "assembly_accession": ncbi_match.group(3), + "assembly_name": ncbi_match.group(6), + } + + elif "ensembl" in parsed_url.netloc: + ensembl_match = re.search(ensembl_pattern, path) + if ensembl_match: + return { + "authority": ga.AuthorityType.ENSEMBL, + "release_version": ensembl_match.group(1), + "scientific_name": ensembl_match.group(3), + "assembly_name": ensembl_match.group(4), + } + + # If no match is found, return None + return None
+ + + def __download_gff_file(self): + """ + Downloads a GFF file from a given URL and calculates the MD5, SHA256, and SHA1 hashes. + + Returns: + tuple: A tuple containing the path to the downloaded gzip file and a dictionary + with the MD5, SHA256, and SHA1 hashes of the file. + """ + response = urllib.request.urlopen(self.content_url) + total_size = int(response.headers.get("content-length", 0)) + block_size = 1024 # 1 Kilobyte + + # Create hash objects + md5_hash = hashlib.md5() + sha256_hash = hashlib.sha256() + sha1_hash = hashlib.sha1() + + # Create a temporary file for the gzip data + with tempfile.NamedTemporaryFile(suffix=".gz", delete=False) as f_gzip: + gzip_file_path = f_gzip.name + + # Create a progress bar + progress_bar = tqdm( + total=total_size, + unit="iB", + unit_scale=True, + desc="Downloading GFF file", + ) + + # Read the file in chunks, write to the temporary file, and update the hash + while True: + data = response.read(block_size) + if not data: + break + f_gzip.write(data) + md5_hash.update(data) + sha256_hash.update(data) + sha1_hash.update(data) + progress_bar.update(len(data)) + + progress_bar.close() + + # Return the path to the temporary file and the md5 hash + return gzip_file_path, { + "MD5": md5_hash.hexdigest(), + "SHA256": sha256_hash.hexdigest(), + "SHA1": sha1_hash.hexdigest(), + } + +
+[docs] + def generate_organism_taxon(self, taxon_id: str): + """ + Generates an organism taxon object based on the provided taxon ID. + + Args: + taxon_id (str): The taxon ID of the organism. + + Returns: + ga.OrganismTaxon: The generated organism taxon object. + """ + return ga.OrganismTaxon( + id=TAXON_PREFIX + ":" + taxon_id, + full_name=self.taxon_scientific_name[taxon_id], + name=self.taxon_common_name[taxon_id], + iri=PREFIX_MAP[TAXON_PREFIX] + taxon_id, + )
+ + +
+[docs] + def assign_authority_type(self, authority: str): + """ + Assigns the authority type based on the given authority string. + + Args: + authority (str): The authority string to be assigned. + + Returns: + ga.AuthorityType: The corresponding authority type. + + Raises: + Exception: If the authority is not supported. Only NCBI and Ensembl authorities are supported. + """ + if authority.upper() == ga.AuthorityType.NCBI.value: + return ga.AuthorityType.NCBI + if authority.upper() == ga.AuthorityType.ENSEMBL.value: + return ga.AuthorityType.ENSEMBL + self.logger.critical( + "Authority %s is not supported. Please use NCBI or Ensembl.", authority + ) + raise ValueError( + f"Authority {self.authority} is not supported. Please use NCBI or Ensembl." + )
+ + +
+[docs] + def generate_genome_assembly( + self, + assembly_id: str, + assembly_version: str, + assembly_label: str, + assembly_strain: str = None, + ): + """ + Generate a genome assembly object. + + Parameters: + assembly_id (str): The ID of the assembly. + assembly_version (str): The version of the assembly. + assembly_label (str): The label of the assembly. + assembly_strain (str, optional): The strain of the assembly. Defaults to None. + + Returns: + ga.GenomeAssembly: The generated genome assembly object. + """ + return ga.GenomeAssembly( + id=ASSEMBLY_PREFIX + ":" + assembly_id, + in_taxon=[self.organism_taxon.id], + in_taxon_label=self.organism_taxon.full_name, + version=assembly_version, + name=assembly_label, + strain=assembly_strain, + )
+ + +
+[docs] + def generate_genome_annotation(self, genome_label: str, genome_version: str): + """ + Generates a genome annotation object. + + Args: + genome_label (str): The label of the genome. + genome_version (str): The version of the genome. + + Returns: + ga.GenomeAnnotation: The generated genome annotation. + """ + return ga.GenomeAnnotation( + id=BICAN_ANNOTATION_PREFIX + genome_label.upper(), + digest=[checksum.id for checksum in self.checksums], + content_url=[self.content_url], + reference_assembly=self.genome_assembly.id, + version=genome_version, + in_taxon=[self.organism_taxon.id], + in_taxon_label=self.organism_taxon.full_name, + description=GENOME_ANNOTATION_DESCRIPTION_FORMAT.format( + authority=self.authority.value, + taxon_scientific_name=self.organism_taxon.full_name, + genome_version=genome_version, + ), + authority=self.authority, + )
+ + +
+[docs] + def generate_digest( + self, + hash_values: dict, + hash_functions: tuple[str] = DEFAULT_HASH, + ) -> list[ga.Checksum]: + """ + Generates checksum digests for the GFF file using the specified hash functions. + + Args: + hash_functions (list[str]): A list of hash functions to use for generating the digests. + + Returns: + list[ga.Checksum]: A list of Checksum objects containing the generated digests. + + Raises: + ValueError: If an unsupported hash algorithm is provided. + + """ + checksums = [] + for hash_type in hash_functions: + # Generate a UUID version 4 + uuid_value = uuid.uuid4() + + # Construct a URN with the UUID + urn = f"urn:uuid:{uuid_value}" + hash_type = hash_type.strip().upper() + # Create a Checksum object + if hash_type == ga.DigestType.SHA256.name: + checksums.append( + ga.Checksum( + id=urn, + checksum_algorithm=ga.DigestType.SHA256, + value=hash_values.get("SHA256"), + ) + ) + elif hash_type == ga.DigestType.MD5.name: + checksums.append( + ga.Checksum( + id=urn, + checksum_algorithm=ga.DigestType.MD5, + value=hash_values.get("MD5"), + ) + ) + elif hash_type == ga.DigestType.SHA1.name: + checksums.append( + ga.Checksum( + id=urn, + checksum_algorithm=ga.DigestType.SHA1, + value=hash_values.get("SHA1"), + ) + ) + else: + self.logger.error( + "Hash algorithm %s is not supported. Please use SHA256, MD5, or SHA1.", + hash_type, + ) + return checksums
+ + + def __get_line_count(self, file_path): + """ + Get the line count of a file. + + Args: + file_path (str): The path to the file. + + Returns: + int: The number of lines in the file. + """ + + result = subprocess.run( + ["wc", "-l", file_path], stdout=subprocess.PIPE, check=True + ) # If check is True and the exit code was non-zero, it raises a CalledProcessError. + # The CalledProcessError object will have the return code in the returncode attribute, + # and output & stderr attributes if those streams were captured. + output = result.stdout.decode().strip() + line_count = int(output.split()[0]) # Extract the line count from the output + return line_count + +
+[docs] + def parse(self, feature_filter: tuple[str] = DEFAULT_FEATURE_FILTER): + """ + Parses the GFF file and extracts gene annotations based on the provided feature filter. + + Args: + feature_filter (tuple[str]): Tuple of feature types to include in the gene annotations. + + Raises: + FileNotFoundError: If the GFF file does not exist. + + Returns: + None + """ + gff_file = self.gff_file + if self.gff_file.endswith(".gz"): + # Decompress the gzip file + with gzip.open(self.gff_file, "rb") as f_in: + # Create a temporary file to save the decompressed data + with tempfile.NamedTemporaryFile(delete=False) as f_out: + # Copy the decompressed data to the temporary file + f_out.write(f_in.read()) + gff_file = f_out.name + + if not os.path.isfile(gff_file): + raise FileNotFoundError(f"File {gff_file} does not exist.") + + with open(gff_file, "r", encoding="utf-8") as file: + curr_line_num = 1 + progress_bar = tqdm( + total=self.__get_line_count(gff_file), desc="Parsing GFF3 File" + ) + for line_raw in file: + line_strip = line_raw.strip() + if curr_line_num == 1 and not line_strip.startswith("##gff-version 3"): + self.logger.warning( + '"##gff-version 3" missing from the first line of the file. The given file may not be a valid GFF3 file.' + ) + elif len(line_strip) == 0: # blank line + continue + elif line_strip.startswith("##"): # TODO: parse more metadata + pass + elif line_strip.startswith("#"): # TODO: parse more metadata + pass + else: # line may be a feature or unknown + tokens = list(map(str.strip, line_raw.split("\t"))) + if len(tokens) != 9: + self.logger.warning( + "Line %s: Features are expected 9 columns, found %s.", + curr_line_num, + len(tokens), + ) + if ( + tokens[2] in feature_filter + ): # only look at rows that have a type that is included in feature_filter + attributes = self.__merge_values( + tuple(a.split("=") for a in tokens[8].split(";")) + ) + # TODO: Write cleaner code that calls respective generate function based on the authority automatically + if self.genome_annotation.authority == ga.AuthorityType.ENSEMBL: + gene_annotation = self.generate_ensembl_gene_annotation( + attributes, curr_line_num + ) + if gene_annotation is not None: + self.gene_annotations[gene_annotation] = gene_annotation + elif self.genome_annotation.authority == ga.AuthorityType.NCBI: + gene_annotation = self.generate_ncbi_gene_annotation( + attributes, curr_line_num + ) + if gene_annotation is not None: + self.gene_annotations[gene_annotation.id] = ( + gene_annotation + ) + progress_bar.update(1) + curr_line_num += 1 + progress_bar.close()
+ + +
+[docs] + def generate_ensembl_gene_annotation(self, attributes, curr_line_num): + """ + Generates a GeneAnnotation object for Ensembl based on the provided attributes. + + Args: + attributes (dict): A dictionary containing the attributes of the gene. + curr_line_num (int): The line number of the current row in the input file. + + Returns: + GeneAnnotation or None: The generated GeneAnnotation object if it is not a duplicate, + otherwise None. + + Raises: + None + + """ + stable_id = self.__get_attribute(attributes, "gene_id", curr_line_num) + if stable_id: + stable_id = stable_id.split(".")[0] + + # Check and validate the name attribute + name = self.__get_attribute(attributes, "Name", curr_line_num) + + # Check and validate the description attribute + description = self.__get_attribute(attributes, "description", curr_line_num) + + # Check and validate the biotype attribute + biotype = self.__get_attribute(attributes, "biotype", curr_line_num) + + gene_annotation = ga.GeneAnnotation( + id=ENSEMBL_GENE_ID_PREFIX + ":" + stable_id, + source_id=stable_id, + symbol=name, + name=name, + description=description, + molecular_type=biotype, + referenced_in=self.genome_annotation.id, + in_taxon=[self.organism_taxon.id], + in_taxon_label=self.organism_taxon.full_name, + ) + # handle duplicates + if gene_annotation not in self.gene_annotations: + return gene_annotation + return None
+ + +
+[docs] + def generate_ncbi_gene_annotation(self, attributes, curr_line_num): + """ + Generates a GeneAnnotation object for NCBI based on the provided attributes. + + Args: + attributes (dict): A dictionary containing the attributes of the gene. + curr_line_num (int): The line number of the current row in the input file. + + Returns: + GeneAnnotation or None: The generated GeneAnnotation object if it is not a duplicate, + otherwise None. + + Raises: + None + + """ + stable_id = None + if "Dbxref" in attributes: + dbxref = {t.strip() for s in attributes["Dbxref"] for t in s.split(",")} + geneid_values = set() + for reference in dbxref: + k, v = reference.split(":", 1) + if k == "GeneID": + geneid_values.add(v.split(".")[0]) + if len(geneid_values) == 1: + stable_id = geneid_values.pop() + else: + self.logger.error( + "Line %s: No GeneAnnotation object created for this row due to missing dbxref attribute.", + curr_line_num, + ) + return None + + if not stable_id: + self.logger.error( + "Line %s: No GeneAnnotation object created for this row due to number of GeneIDs provided in dbxref attribute is not equal to one.", + curr_line_num, + ) + return None + + # Check and validate the name attribute + name = self.__get_attribute(attributes, "Name", curr_line_num) + + # Check and validate the description attribute + description = self.__get_attribute(attributes, "description", curr_line_num) + + # Check and validate the biotype attribute + biotype = self.__get_attribute(attributes, "gene_biotype", curr_line_num) + + # Parse synonyms + synonyms = [] + if "gene_synonym" in attributes: + synonyms = list( + {t.strip() for s in attributes["gene_synonym"] for t in s.split(",")} + ) + synonyms.sort() # note: this is not required, but it makes the output more predictable therefore easier to test + else: + self.logger.debug( + "Line %s: synonym is not set for this row's GeneAnnotation object due to missing gene_synonym attribute.", + curr_line_num, + ) + + gene_annotation = ga.GeneAnnotation( + id=NCBI_GENE_ID_PREFIX + ":" + stable_id, + source_id=stable_id, + symbol=name, + name=name, + description=description, + molecular_type=biotype, + referenced_in=self.genome_annotation.id, + in_taxon=[self.organism_taxon.id], + in_taxon_label=self.organism_taxon.full_name, + synonym=synonyms, + ) + if gene_annotation.id in self.gene_annotations: + if gene_annotation != self.gene_annotations[gene_annotation.id]: + return self.__resolve_ncbi_gene_annotation( + gene_annotation, curr_line_num + ) + if name != self.gene_annotations[gene_annotation.id].name: + self.logger.debug( + "Line %s: GeneAnnotation object with id %s already exists with a different name. Current name: %s, Existing name: %s", + curr_line_num, + stable_id, + name, + self.gene_annotations[gene_annotation.id].name, + ) + return None + return gene_annotation
+ + + def __get_attribute(self, attributes, attribute_name, curr_line_num): + """ + Get the value of a specific attribute from the given attributes dictionary. + + Args: + attributes (dict): A dictionary containing attribute names and their values. + attribute_name (str): The name of the attribute to retrieve. + curr_line_num (int): The current line number for logging purposes. + + Returns: + str or None: The value of the attribute if found, None otherwise. + """ + value = None + if attribute_name in attributes: + if len(attributes[attribute_name]) != 1: + self.logger.debug( + "Line %s: %s not set for this row's GeneAnnotation object due to more than one %s provided.", + curr_line_num, + attribute_name, + attribute_name, + ) + elif attribute_name == "description": + value = re.sub( + r"\s*\[Source.*?\]", + "", + urllib.parse.unquote(attributes["description"].pop()), + ) + else: + value = attributes[attribute_name].pop() + if value.find(",") != -1: + self.logger.debug( + 'Line %s: %s not set for this row\'s GeneAnnotation object due to value of %s attribute containing ",".', + curr_line_num, + attribute_name, + attribute_name, + ) + value = None + else: + self.logger.debug( + "Line %s: %s not set for this row's GeneAnnotation object due to missing %s attribute.", + curr_line_num, + attribute_name, + attribute_name, + ) + return value + + def __resolve_ncbi_gene_annotation(self, new_gene_annotation, curr_line_num): + """ + Resolves conflicts between existing and new gene annotations based on certain conditions. + + Args: + new_gene_annotation (GeneAnnotation): The new gene annotation to be resolved. + curr_line_num (int): The current line number in the file. + + Returns: + GeneAnnotation or None: The resolved gene annotation or None if it cannot be resolved + or None if the resolution is in favor of the existing gene + annotation. + + Raises: + ValueError: If duplicates cannot be resolved. + + """ + existing_gene_annotation = self.gene_annotations[new_gene_annotation.id] + if ( + existing_gene_annotation.description is not None + and new_gene_annotation.description is None + ): + return None + if ( + existing_gene_annotation.description is None + and new_gene_annotation.description is not None + ): + return new_gene_annotation + if ( + existing_gene_annotation.molecular_type is not None + and new_gene_annotation.molecular_type is None + ): + return None + if ( + existing_gene_annotation.molecular_type is None + and new_gene_annotation.molecular_type is not None + ): + return new_gene_annotation + if existing_gene_annotation.molecular_type == ga.BioType.protein_coding.value: + return None + if new_gene_annotation.molecular_type == ga.BioType.protein_coding.value: + return new_gene_annotation + + self.logger.error( + "Line %s: Unable to resolve duplicates for GeneID: %s.\nexisting gene: %s\nnew gene: %s", + curr_line_num, + new_gene_annotation.id, + existing_gene_annotation, + new_gene_annotation, + ) + return None + + def __merge_values(self, t): + """ + Merge values from a list of lists into a dictionary of sets. + + Args: + t (list): A list of lists containing key-value pairs. + + Returns: + dict: A dictionary where each key maps to a set of values. + + """ + result = defaultdict(set) + for lst in t: + key = lst[0].strip() + value = lst[1:] + for e in value: + result[key].add(e.strip()) + return result + +
+[docs] + def serialize_to_jsonld( + self, exclude_none: bool = True, exclude_unset: bool = False + ): + """ + Serialize the object and either write it to the specified output file or print it to the CLI. + + Parameters: + exclude_none (bool): Whether to exclude None values in the output. + exclude_unset (bool): Whether to exclude unset values in the output. + + Returns: + None + """ + + data = [ + self.organism_taxon.dict( + exclude_none=exclude_none, exclude_unset=exclude_unset + ), + self.genome_assembly.dict( + exclude_none=exclude_none, exclude_unset=exclude_unset + ), + self.genome_annotation.dict( + exclude_none=exclude_none, exclude_unset=exclude_unset + ), + ] + for ck in self.checksums: + data.append(ck.dict(exclude_none=exclude_none, exclude_unset=exclude_unset)) + for ga in self.gene_annotations.values(): + data.append(ga.dict(exclude_none=exclude_none, exclude_unset=exclude_unset)) + + output_data = { + "@context": "https://raw.githubusercontent.com/brain-bican/models/main/jsonld-context-autogen/genome_annotation.context.jsonld", + "@graph": data, + } + + print(json.dumps(output_data, indent=2))
+
+ + + +@click.command() +##ARGUEMENTS## +# Argument #1: The URL of the GFF file +@click.argument("content_url", type=str) + +##OPTIONS## +# Option #1: The ID of the genome assembly +@click.option("assembly_accession", "-a", required=False, default=None, type=str) +# Option #2: The strain of the genome assembly +@click.option( + "--assembly_strain", + "-s", + required=False, + default=None, + type=str, + help="The strain of the genome assembly. Defaults to None.", +) +# Option #3: The log level +@click.option( + "--log_level", + "-l", + required=False, + default="WARNING", + help="The log level. Defaults to WARNING.", +) +# Option #4: Log to file +@click.option( + "--log_to_file", + "-f", + is_flag=True, + help="Log to a file instead of the console.", +) +def gff2jsonld(content_url, assembly_accession, assembly_strain, log_level, log_to_file): + ''' + Creates GeneAnnotation objects from a GFF3 file and serializes them to JSON-LD format. + ''' + gff3 = Gff3( + content_url, assembly_accession, assembly_strain, log_level, log_to_file + ) + gff3.parse() + gff3.serialize_to_jsonld() + + +if __name__ == "__main__": + gff2jsonld() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/data_translators/specimen_metadata_translator.html b/_modules/bkbit/data_translators/specimen_metadata_translator.html new file mode 100644 index 0000000..76bd98f --- /dev/null +++ b/_modules/bkbit/data_translators/specimen_metadata_translator.html @@ -0,0 +1,159 @@ + + + + + + + + bkbit.data_translators.specimen_metadata_translator — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.data_translators.specimen_metadata_translator

+import csv
+import click
+
+
+[docs] +def extract_specimen_ids(csv_file_path): + """ + Reads a CSV file and prints the 'Specimen ID' column to the command line. + + Parameters: + - csv_file_path: str, path to the input CSV file. + """ + with open(csv_file_path, 'r', encoding='utf-8') as csv_file: + reader = csv.DictReader(csv_file) + + # Ensure 'Specimen ID' column exists in the CSV + if 'Specimen ID' not in reader.fieldnames: + raise ValueError("The CSV file does not contain the 'Specimen ID' column.") + + # Print 'Specimen ID' values to the command line + for row in reader: + specimen_id = row['Specimen ID'] + if specimen_id.startswith("LA"): + print(specimen_id)
+ + +@click.command +@click.argument('specimen_metadata_file_path') + +def list_library_aliquot(specimen_metadata_file_path): + """ + Extracts and prints all the Library Aliquot NHash IDs from Data Catalog's specimen metadata file. + + Args: + specimen_metadata_file_path (str): Path to the specimen metadata file. + """ + extract_specimen_ids(specimen_metadata_file_path) + +if __name__ == '__main__': + list_library_aliquot() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/model_converters/sheets_converter.html b/_modules/bkbit/model_converters/sheets_converter.html new file mode 100644 index 0000000..b3786f5 --- /dev/null +++ b/_modules/bkbit/model_converters/sheets_converter.html @@ -0,0 +1,438 @@ + + + + + + + + bkbit.model_converters.sheets_converter — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.model_converters.sheets_converter

+import os, sys, shutil
+import csv, yaml
+from io import StringIO
+import click
+from pathlib import Path
+import requests
+
+from linkml_runtime.linkml_model.meta import SchemaDefinition, SlotDefinition
+from linkml_runtime.utils.schema_as_dict import schema_as_dict
+from linkml_runtime.utils.schemaview import SchemaView
+from schemasheets import schemamaker as sm
+import pandas as pd
+
+
+SIMPLE_TYPES_NOSTR = ["integer", "float", "boolean", "date", "datetime"]
+
+
+
+[docs] +def fix_tsv_files(tsv_files, inlined=False, ref_by_ind=True): + """ + Fixing all the tsv files, modyfying the range column, and adding any_of, exactly_one_of, and inlined columns. + :param tsv_files: list of tsv files + :param inlined: if True, the inlined column will be added + "param ref_by_ind: if True (and if inlined is True) the range will be modified (adding string) to be able to reference by index + :return: list of fixed tsv files + """ + + tsv_file_fixed_list = [] + dir_fixed = Path(tsv_files[0]).parent / "fixed_sheets" + dir_fixed.mkdir(exist_ok=True) + for tsv_file in list(tsv_files): + # TODO: check if the file indeed has 3 lines of headers + tsv_file_fixed = dir_fixed / Path(tsv_file).name + tsv_file_fixed_list.append(str(tsv_file_fixed)) + + with open(tsv_file, 'r', newline='') as file: + # cleaned of any ^M characters + content = file.read().replace('\r', '') + # convert the cleaned content back to a file-like object + data = StringIO(content) + + # read the file-like object to a pandas dataframe + df = pd.read_csv(data, header=[0, 1, 2], delimiter='\t') + + columns_to_change_new = [] + for ind in df.columns: + if "mapping" in ind[1].lower(): + columns_to_change_new.append(ind) + for col in columns_to_change_new: + df[col] = df[col].str.replace(" ", "%20") + + # finding the range column, and other columns that are relevant for the following changes + range_ind, range_col = None, None + multival_col, exactlyone_col, valset_col = None, None, None + for ind, col in enumerate(df.columns): + if "range" in col[1].lower().strip(): + range_ind = ind + range_col = col + elif "multivalued" in col[0].lower().strip(): + multival_col = col + elif "exactlyoneof" in col[0].lower().strip(): + exactlyone_col = col + elif "permissible" in col[0].lower().strip(): + valset_col = col + + if range_ind is not None: + any_of_col = (f"{range_col[0]}: any_of", "any_of", "inner_key: range") + exactly_one_of_col = (f"{range_col[0]}: exactly_one_of", "exactly_one_of", "inner_key: range") + if inlined: + inline_col = ("inlined", "inlined", "") + else: # ignoring if inlined is set to False + inline_col = ("inlined", "ignore", "") + df.insert(range_ind + 1, any_of_col, None) + df.insert(range_ind + 2, exactly_one_of_col, None) + df.insert(range_ind + 3, inline_col, None) + + def fix_range(row): + """ Fixing the range column, moving some ranges to any_of or exactly_one_of columns + It also depends on the values of ref_by_ind and inlined. + """ + if pd.isna(row[range_col]): + return row + # do not add string to range if range already has string or all the elements are simple types + elif "string" in row[range_col] or all([el in SIMPLE_TYPES_NOSTR for el in row[range_col].split("|")]): + pass + # checking if the range is not value set (TODO: in the future might need modification) + elif valset_col is not None and row[valset_col]: + pass + elif inlined: # setting inlined to True for range that have complex types + row[inline_col] = True + if ref_by_ind: # adding string to the range to be able to reference by index + row[range_col] = row[range_col] + "|string" + + # checking if range has multiple values, and if it should be treated as any_of or exactly_one_of + if "|" in row[range_col]: + if (row[multival_col] is True) and (exactlyone_col is not None) and (row[exactlyone_col] is True): + row[exactly_one_of_col] = row[range_col] + else: + row[any_of_col] = row[range_col] + row[range_col] = None + return row + + df = df.apply(fix_range, axis=1) + + df.to_csv(tsv_file_fixed, sep='\t', index=False) + + # fixing the headers that are saved by pandas + with open(tsv_file_fixed, 'r') as file: + lines = file.readlines() + lines[2] = "\t".join(["" if el.startswith("Unnamed") else el for el in lines[2].split("\t")]) + "\n" + lines[1] = "\t".join(["" if el.startswith("Unnamed") else el for el in lines[1].split("\t")]) + "\n" + with open(tsv_file_fixed, 'w') as file: + file.writelines(lines) + + return tsv_file_fixed_list
+ + + + +
+[docs] +def bican_fix(schema: SchemaDefinition) -> SchemaDefinition: + """ + Apply BICAN specific fixes to the schema + :param schema: + :return: + """ + # fixing values for categories + for nm, cl in schema.classes.items(): + cl.slot_usage["category"] = SlotDefinition(name="category", pattern=r"^bican:[A-Z][A-Za-z]+$") + # not needed anymore + # if "name_pr" in cl.slots: + # cl.slots["name"] = cl.slots.pop("name_pr") + # if "slot_usage" in cl and "name_pr" in cl["slot_usage"]: + # cl.slot_usage["name"] = cl.slot_usage.pop("name_pr") + + # removing names from the slots + if "name" in schema.slots: + del schema.slots["name"] + + # removing slots that are from the imported schemas + slots_from_imports = [] + for el in schema.imports: + if Path(f"{el}.yaml").exists(): + sv = SchemaView(f'{el}.yaml') + slots_from_imports.extend(sv.schema.slots.keys()) + slots_to_remove = list(set(schema.slots) & set(slots_from_imports)) + + for nm in slots_to_remove: + del schema.slots[nm] + + # removing empty subsets that are from imported biolink schema + biolink_subsets = [] + for nm, subs in schema.subsets.items(): + if not subs.description and not subs.from_schema: + biolink_subsets.append(nm) + for nm in biolink_subsets: + del schema.subsets[nm] + return schema
+ + + +
+[docs] +def adding_template(schema:SchemaDefinition, template_yaml) -> SchemaDefinition: + with (template_yaml).open() as file: + classes_base = yaml.safe_load(file) + for key, val in classes_base["classes"].items(): + schema.classes[key] = val + for key, val in classes_base["slots"].items(): + schema.slots[key] = val + return schema
+ + + +
+[docs] +def download_google_sheet_as_tsv(sheet_id, save_path, sheet_gid): + # Construct the URL to export the Google Sheet as TSV + tsv_url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=tsv&gid={sheet_gid}' + response = requests.get(tsv_url) + response.raise_for_status() # Ensure the request was successful + # Save the TSV content to a file + with open(save_path, 'wb') as file: + file.write(response.content)
+ + + +
+[docs] +def read_and_parse_gsheet_yaml(gsheet_yaml): + with open(gsheet_yaml, 'r') as file: + data = yaml.safe_load(file) + + gsheet_id = data['gsheet_id'] + sheets = data['sheets'] + return gsheet_id, sheets
+ + + +
+[docs] +def download_gsheets(gsheet_id, sheets, gsheet_download_dir): + downloaded_files = [] + for sht in sheets: + if "gid" not in sht: + raise Exception(f"Each sheet has to have gid,but not found in {sht}") + shnm = sht.get("name", sht["gid"]) + gsheet_save_path = gsheet_download_dir / f"{shnm}.tsv" + download_google_sheet_as_tsv(gsheet_id, gsheet_save_path, sht["gid"]) + downloaded_files.append(gsheet_save_path) + return downloaded_files
+ + + +@click.command() +@click.option('-o', '--output', + type=click.File(mode="w"), + default=sys.stdout, + help="Path for the yaml output file.") +@click.option('-t', '--template', + type=click.Path(exists=True), + default=None, + help="Optional template yaml file with standard classes that will be added to the model.") +@click.option("--gsheet/--no-gsheet", + default=False, + show_default=True, + help="Using Google Sheet as a source. " + "If True, the arguments MUST be a yaml file with gsheet_id and gid of all the sheets") +@click.option("--gsheet-download-dir", + type=click.Path(), + default=None, + help="Path used to download Google Sheets. If not specified a default directory will be created.") +@click.option("--fix_tsv/--no-fix_tsv", + default=True, + show_default=True, + help="Fixing known issues with tsv files from Google Sheets.") +@click.option("--fix_tsv_save/--no-fix_tsv_save", + default=False, + show_default=True, + help="Keeping the fixed files, relevant only if fix_tsv=True") +@click.option("--repair/--no-repair", + default=True, + show_default=True, + help="Standard Linkml auto-repair schema") +@click.option("--inlined/--no-inlined", + default=True, + show_default=True, + help="Adding inlined=True to all slots that have complex type as a range") +@click.option("--ref_by_ind/--no-ref_by_ind", + default=True, + show_default=True, + help="Adding string to the range to be able to reference by index (relevant only if inlined=True)") +@click.option("--fix_bican_model/--no-fix_bican_model", + default=True, + show_default=True, + help="Automated repair specifically for the BICAN YAML model") +@click.argument('spreadsheets', nargs=-1) +def schema2model(spreadsheets, output, fix_tsv, fix_tsv_save, repair, fix_bican_model, template, + gsheet, gsheet_download_dir, inlined, ref_by_ind): + """ + This converter allows creating a yaml linkml model from a set of spreadsheets. + It can either use tsv files or Google Sheet as an input. + + The default behavior is to run the converter starting with TSV files, + specifying their paths as arguments, for example, model_spreadsheets/*tsv. + + If `--gsheet` option is used, the converter starts from downloading spreadsheets + from Google Sheets. + The argument must be a YAML file that has `gsheet_id` and a list of `sheets` + with `gid` (a unique identifier for each individual sheet) and `name` (optionally) + that will be used as a name of the downloaded TSV file (if not available `gid` will be used). + """ + + schema_maker = sm.SchemaMaker() + + if gsheet: + if len(spreadsheets) != 1 or not Path(spreadsheets[0]).exists: + raise Exception(f"if gsheet is used the argument must me a yaml file with gsheet_id, " + f"but file {spreadsheets} doesn't exist") + gsheet_id, sheets = read_and_parse_gsheet_yaml(spreadsheets[0]) + if gsheet_download_dir: + gsheet_download_dir = Path(gsheet_download_dir) + else: + gsheet_download_dir = Path(".") / f"google_sheet_{gsheet_id}" + + gsheet_download_dir.mkdir(exist_ok=True) + spreadsheets = download_gsheets(gsheet_id, sheets, gsheet_download_dir) + + # checking template and default name of template + if template: + template = Path(template) + elif (Path(spreadsheets[0]).parent / "classes_base.yaml").exists(): + template = Path(spreadsheets[0]).parent / "classes_base.yaml" + + if fix_tsv: + spreadsheets = fix_tsv_files(list(spreadsheets), inlined=inlined, ref_by_ind=ref_by_ind) + schema = schema_maker.create_schema(list(spreadsheets)) + if repair: + schema = schema_maker.repair_schema(schema) + + if fix_bican_model: + schema = bican_fix(schema) + + if template: + schema = adding_template(schema, template_yaml=template) + + schema_dict = schema_as_dict(schema) + output.write(yaml.dump(schema_dict, sort_keys=False)) + output.close() + + # removing the fixed files: + if fix_tsv and not fix_tsv_save: + shutil.rmtree(Path(spreadsheets[0]).parent) + + +if __name__ == '__main__': + schema2model() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/model_converters/yaml2sheet_converter.html b/_modules/bkbit/model_converters/yaml2sheet_converter.html new file mode 100644 index 0000000..823ea79 --- /dev/null +++ b/_modules/bkbit/model_converters/yaml2sheet_converter.html @@ -0,0 +1,296 @@ + + + + + + + + bkbit.model_converters.yaml2sheet_converter — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.model_converters.yaml2sheet_converter

+import csv, yaml
+import click
+from pathlib import Path
+
+from linkml_runtime.utils.schemaview import SchemaView
+
+SIMPLE_TYPES_NOSTR = ["integer", "float", "boolean", "date", "datetime"]
+
+CLASS_HEADERS = [
+    # header, linkml_header, linkml_header_minor
+    ("Class Name", "> class", ">"),
+    ("Inheritance: is_a", "is_a", ""),
+    ("Inheritance: mixin", "mixins", 'internal_separator: "|"'),
+    ("Subsets", "in_subset", 'internal_separator: "|"'),
+    ("Description", "description", ""),
+    ("NIMP Terminology NHash", "exact_mappings: {curie_prefix: NIMP}", "")
+]
+
+SLOTS_HEADERS = [
+    # header, linkml_header, linkml_header_minor
+    ("Proposed BICAN Field", "> alias", ">"),
+    ("LinkML Slot or Attribute Name", "attribute", ""),
+    ("BICAN UUID", "slot_uri: {curie_prefix: bican}", ""),
+    ("SubGroup/LinkML Class Name", "class", ""),
+    ("Definition", "description", ""),
+    ("Required (True/False)", "is_required", ""),
+    ("Multivalued (True/False)", "multivalued", ""),
+    ("Data Type/Value Set", "range", ""),
+    ("Data Examples", "ignore", ""),
+    ("Min Value", "ignore", ""),
+    ("Max Value", "ignore", ""),
+    ("Unit", "ignore", ""),
+    ("Statistical Type", "ignore", ""),
+    ("Subsets", "in_subset", ""),
+    ("Notes", "ignore", ""),
+    ("NIMP Category", "ignore", ""),
+    ("NIMP Terminology NHash", "exact_mappings: {curie_prefix: NIMP}", ""),
+    ("Local Variable Name (e.g. NIMP)", "local_names", "inner_key: local_name_value"),
+    ("Local Variable Source (e.g. NIMP)", "local_names", "inner_key: local_name_source")
+]
+
+ENUM_HEADERS = [
+    # header, linkml_header, linkml_header_minor
+    ("Value Set Name", "> enum", ">"),
+    ("Permissible Value", "permissible_value", ""),
+    ("Description", "description", ""),
+    ("NIMP Terminology NHash", "meaning: {curie_prefix: NIMP}", "")
+]
+
+PREFIXES_HEADERS = [
+    # header, linkml_header
+    ("Schema Name", "> schema"),
+    ("Title", "title"),
+    ("Description", "description"),
+    ("ID", "id"),
+    ("Default Prefix", "default_prefix"),
+    ("Imports", "imports"),
+    ("Prefix", "prefix"),
+    ("Prefix URI", "prefix_reference"),
+]
+
+
+
+[docs] +def create_classes_slots_cvs(classes: dict, output_dir: Path): + # creating headers (including linkml header lines) + classes_cvs = [[], [], []] + for header, linkml_header, linkml_header_minor in CLASS_HEADERS: + classes_cvs[0].append(header) + classes_cvs[1].append(linkml_header) + classes_cvs[2].append(linkml_header_minor) + + slots_cvs = [[], [], []] + for header, linkml_header, linkml_header_minor in SLOTS_HEADERS: + slots_cvs[0].append(header) + slots_cvs[1].append(linkml_header) + slots_cvs[2].append(linkml_header_minor) + + sl_header = \ + ["Proposed BICAN Field", "LinkML Slot or Attribute Name", "BICAN UUID", "SubGroup/LinkML Class Name", "Definition", "Required (True/False)", "Multivalued (True/False)", "Data Type/Value Set", "Data Examples", "Min Value", "Max Value", "Unit", "Statistical Type", "Subsets", "Notes", "NIMP Category", "NIMP Terminology NHash", "Local Variable Name (e.g. NIMP)", "Local Variable Source (e.g. NIMP)"] + sl_linkml_header = \ + ["> alias", "attribute", "slot_uri: {curie_prefix: bican}", "class", "description", "is_required", "multivalued", "range", "ignore", "ignore", "ignore", "ignore", "ignore", "in_subset", "ignore", "ignore", "exact_mappings: {curie_prefix: NIMP}", "local_names", "local_names"] + sl_linkml_header_minor = [">", "", "", "", "", "", "", "", "", "", "", "", "", 'internal_separator: "|"', "", "", "", "inner_key: local_name_value", "inner_key: local_name_source"] + slots_cvs = [sl_header, sl_linkml_header, sl_linkml_header_minor] + for class_name, class_d in classes.items(): + if class_name in "NamedThing": + continue + cl_l = [class_name, class_d.is_a, "|".join(class_d.mixins), "|".join(class_d.in_subset), class_d.description, ""] + classes_cvs.append(cl_l) + class_attr_dict = class_d.attributes + class_attr_dict.update(class_d.slot_usage) + for slot_name, slot_obj in class_attr_dict.items(): + if slot_obj.range: + range = slot_obj.range + elif slot_obj.any_of: + # removing an additional type + range = "|".join(_removing_str_type(slot_obj.any_of)) + else: + range = "string" # default range + sl_l = ["", slot_name, slot_obj.slot_uri, class_name, slot_obj.description, slot_obj.required, slot_obj.multivalued, range, "", "", "", "", "", "", "", "", "", "", ""] + slots_cvs.append(sl_l) + _write_cvs(Path(output_dir / "classes.csv"), classes_cvs) + _write_cvs(Path(output_dir / "slots.csv"), slots_cvs)
+ + + +
+[docs] +def create_enums_cvs(enums: dict, output_dir: Path): + enums_cvs = [[], [], []] + for header, linkml_header, linkml_header_minor in ENUM_HEADERS: + enums_cvs[0].append(header) + enums_cvs[1].append(linkml_header) + enums_cvs[2].append(linkml_header_minor) + if enums: + for enum_name, enum in enums.items(): + for value_nm, value_obj in enum.permissible_values.items(): + enums_cvs.append([enum_name, value_nm, value_obj.title, value_obj.meaning]) + _write_cvs(Path(output_dir / "enums.csv"), enums_cvs)
+ + +
+[docs] +def create_prefix_headers_csv(schema: SchemaView, output_dir: Path): + prefixes_cvs = [[], []] + for header, linkml_header in PREFIXES_HEADERS: + prefixes_cvs[0].append(header) + prefixes_cvs[1].append(linkml_header) + + prefixes_cvs.append([schema.name, schema.title, schema.description, schema.id, schema.default_prefix, "", "", ""]) + for imp in schema.imports: + if imp != "linkml:types": # this is imported by default + prefixes_cvs.append(["", "", "", "", "", imp, "", ""]) + for prefix in schema.prefixes.values(): + prefixes_cvs.append(["", "", "", "", "", "", prefix.prefix_prefix, prefix.prefix_reference]) + _write_cvs(Path(output_dir / "prefixes.csv"), prefixes_cvs)
+ + +def _removing_str_type(any_of_list: list): + """If the range list contains only more complex types, it removes string from the list. + String is used in these cases as an additional type to be able to refer by id, + no need to include it in the google sheet + """ + range_list = [el.range for el in any_of_list] + simple_types = ["integer", "float", "boolean", "date", "datetime"] + if "string" in range_list and not(any([el in simple_types for el in range_list])): + range_list.remove("string") + return range_list + + +def _write_cvs(filename, data): + with open(filename, 'w', newline='') as file: + csv_writer = csv.writer(file) + csv_writer.writerows(data) + +@click.command() +@click.option('-o', '--output_dir', + type=click.Path(), + default="output_dir_cvs", + help="Path to the output directory, where csv files will be stored.") +@click.argument('yaml_model', type=click.Path(exists=True)) +def yaml2cvs(yaml_model, output_dir): + """ + This converter create csv files from the yaml model. + The cvs files can be used to create Google Spreadsheet (automation TODO) + Takes a path to yaml model as an input. + """ + output_dir = Path(output_dir) + output_dir.mkdir(exist_ok=True) + + schema = SchemaView(yaml_model) + create_prefix_headers_csv(schema.schema, output_dir) + create_enums_cvs(schema.all_enums(), output_dir) + create_classes_slots_cvs(schema.all_classes(), output_dir) + +if __name__ == "__main__": + yaml2cvs() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/model_editors/linkml_trimmer.html b/_modules/bkbit/model_editors/linkml_trimmer.html new file mode 100644 index 0000000..1af221a --- /dev/null +++ b/_modules/bkbit/model_editors/linkml_trimmer.html @@ -0,0 +1,320 @@ + + + + + + + + bkbit.model_editors.linkml_trimmer — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.model_editors.linkml_trimmer

+"""
+This script provides a utility for trimming a LinkML schema by retaining specified classes, slots, and enums, along with their dependencies.
+
+It defines a `YamlTrimmer` class for schema manipulation and offers a command-line interface using Click for easy usage from the terminal.
+
+Usage:
+    python script.py [OPTIONS] SCHEMA
+
+Options:
+    --classes, -c TEXT  Comma-separated list of classes to include in the trimmed schema (required).
+    --slots, -s TEXT    Comma-separated list of slots to include in the trimmed schema.
+    --enums, -e TEXT    Comma-separated list of enums to include in the trimmed schema.
+
+Example:
+    python script.py schema.yaml -c Person,Organization -s name,age -e StatusEnum
+
+The script performs the following steps:
+1. Loads the specified LinkML schema.
+2. Trims the schema by keeping only the specified classes, slots, and enums, along with their dependencies.
+3. Serializes and prints the trimmed schema in YAML format.
+
+Dependencies:
+    - click
+    - linkml-runtime
+    - linkml
+
+"""
+
+from dataclasses import dataclass
+from typing import Union
+from pathlib import Path
+from linkml_runtime.linkml_model.meta import SchemaDefinition
+from linkml_runtime.utils.schemaview import SchemaView
+from linkml._version import __version__
+from linkml.generators.yamlgen import YAMLGenerator
+import click
+
+
+[docs] +@dataclass +class YamlTrimmer: + """ + A utility class for trimming a LinkML schema by retaining specified classes, slots, and enums, along with their dependencies. + + This class helps in generating a simplified version of a LinkML schema by removing all elements that are not reachable from the specified classes, slots, and enums to keep. + + Args: + schema (Union[str, Path, SchemaDefinition]): The LinkML schema to be trimmed. It can be a file path, URL, or a `SchemaDefinition` object. + + Attributes: + schemaview (SchemaView): An object representing the loaded schema, used for manipulation and traversal. + + Methods: + trim_model(keep_classes: list[str], keep_slots: list[str] = [], keep_enums: list[str] = []): + Trims the schema by keeping only the specified classes, slots, and enums, and their dependencies. + + serialize(): + Serializes and prints the trimmed schema in YAML format. + + Example: + >>> yt = YamlTrimmer('path/to/schema.yaml') + >>> yt.trim_model(['Person', 'Organization'], keep_slots=['name'], keep_enums=['StatusEnum']) + >>> yt.serialize() + """ + def __init__(self, schema: Union[str, Path, SchemaDefinition]): + self.schemaview = SchemaView(schema) + +
+[docs] + def trim_model( + self, + keep_classes: list[str], + keep_slots: list[str] = [], + keep_enums: list[str] = [], + ): + """ + Trims the model by removing classes, slots, and enums that are not reachable from the specified keep_classes, keep_slots, and keep_enums. + + Args: + keep_classes (list[str]): List of classes to keep. + keep_slots (list[str], optional): List of slots to keep. Defaults to []. + keep_enums (list[str], optional): List of enums to keep. Defaults to []. + """ + sv = self.schemaview + # vistited_classes, visited_enums, and visited slots keep track of the classes, enums, and slots that are reachable from the input class, slots, and enums we are interested in keeping + visited_classes = set() + visited_slots = set() + visited_enums = set() + + # stack is a list of classes, enums, and slots that we will traverse to find all reachable classes, enums, and slots + stack = [] + stack.extend(keep_classes) + stack.extend(keep_slots) + stack.extend(keep_enums) + + # all_classes, all_enums, and all_slots are the set of all classes, enums, and slots defined in the given schema + all_classes = set(sv.all_classes(imports=False)) + all_enums = set(sv.all_enums(imports=False)) + all_slots = set(sv.all_slots(imports=False, attributes=False)) + + while stack: + curr_node = stack.pop() + if ( + curr_node in visited_classes + or curr_node in visited_enums + or curr_node in visited_slots + ): + continue + + # if curr_node is a class + if curr_node in all_classes: + visited_classes.add(curr_node) + # add parent classes to stack + for inherited_class in sv.class_parents(curr_node, imports=False): + if ( + inherited_class not in visited_classes + and inherited_class in all_classes + ): + stack.append(inherited_class) + + # iterate through attributes/slots and add respective range to stack if type is a class or enum + for slot in sv.class_slots( + curr_node, imports=False, direct=True, attributes=True + ): + if slot not in visited_slots and slot in all_slots: + stack.append(slot) + + elif curr_node in all_slots: + visited_slots.add(curr_node) + for slot_range in sv.slot_range_as_union( + sv.get_slot(curr_node, strict=True) + ): + if ( + slot_range in all_classes and slot_range not in visited_classes + ) or (slot_range in all_enums and slot_range not in visited_enums): + stack.append(slot_range) + for parent_slot in sv.slot_parents(curr_node, imports=False): + if parent_slot not in visited_slots and parent_slot in all_slots: + stack.append(parent_slot) + + elif curr_node in all_enums: + visited_enums.add(curr_node) + # add parent classes to stack + for parent_enum in sv.enum_parents(curr_node, imports=False): + if parent_enum not in visited_enums and parent_enum in all_enums: + stack.append(parent_enum) + + else: + raise ValueError( + f"ERROR: {curr_node} not found in schema classes, slots, or enums" + ) + + for c in all_classes: + if c not in visited_classes: + sv.delete_class(c) + for e in all_enums: + if e not in visited_enums: + sv.delete_enum(e) + for s in all_slots: + if s not in visited_slots: + sv.delete_slot(s)
+ + +
+[docs] + def serialize(self): + """ + Serializes the schema using YAMLGenerator and prints the serialized output. + """ + print(YAMLGenerator(self.schemaview.schema).serialize())
+
+ + + +@click.command() +## ARGUMENTS ## +# Argument #1: Schema file +@click.argument("schema", type=click.Path(exists=True)) + +## OPTIONS ## +# Option #1: Classes +@click.option('--classes', '-c', required=True, help='Comma-separated list of classes to include in trimmed schema') +# Option #2: Slots +@click.option('--slots', '-s', help='Comma-separated list of slots to include in trimmed schema') +# Option #3: Enums +@click.option('--enums', '-e', help='Comma-separated list of enums to include in trimmed schema') + +def linkml_trimmer(schema, classes, slots, enums): + """ + Trim a LinkMl schema based on a list of classes, slots, and enums to keep. + """ + classes = classes.split(',') + slots = slots.split(',') if slots else [] + enums = enums.split(',') if enums else [] + + yt = YamlTrimmer(schema) + yt.trim_model(classes, slots, enums) + yt.serialize() + +if __name__ == "__main__": + linkml_trimmer() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/models/anatomical_structure.html b/_modules/bkbit/models/anatomical_structure.html new file mode 100644 index 0000000..5065737 --- /dev/null +++ b/_modules/bkbit/models/anatomical_structure.html @@ -0,0 +1,586 @@ + + + + + + + + bkbit.models.anatomical_structure — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.models.anatomical_structure

+from __future__ import annotations 
+
+import re
+import sys
+from datetime import (
+    date,
+    datetime,
+    time
+)
+from decimal import Decimal 
+from enum import Enum 
+from typing import (
+    Any,
+    ClassVar,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Union
+)
+
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    RootModel,
+    field_validator
+)
+
+
+metamodel_version = "None"
+version = "None"
+
+
+
+[docs] +class ConfiguredBaseModel(BaseModel): + model_config = ConfigDict( + validate_assignment = True, + validate_default = True, + extra = "forbid", + arbitrary_types_allowed = True, + use_enum_values = True, + strict = False, + ) + pass
+ + + + + +
+[docs] +class LinkMLMeta(RootModel): + root: Dict[str, Any] = {} + model_config = ConfigDict(frozen=True) + + def __getattr__(self, key:str): + return getattr(self.root, key) + + def __getitem__(self, key:str): + return self.root[key] + + def __setitem__(self, key:str, value): + self.root[key] = value + + def __contains__(self, key:str) -> bool: + return key in self.root
+ + + +linkml_meta = LinkMLMeta({'default_prefix': 'AnS', + 'default_range': 'string', + 'description': 'The Anatomical Structure schema is designed to represent ' + 'types and relationships of anatomical brain structures. ', + 'id': 'https://w3id.org/my-org/anatomical-structure-schema', + 'imports': ['linkml:types', 'anatomical_structure_core'], + 'license': 'MIT', + 'name': 'anatomical-structure-schema', + 'prefixes': {'AnS': {'prefix_prefix': 'AnS', + 'prefix_reference': 'https://w3id.org/my-org/anatomical-structure-schema/'}, + 'PATO': {'prefix_prefix': 'PATO', + 'prefix_reference': 'http://purl.obolibrary.org/obo/PATO_'}, + 'biolink': {'prefix_prefix': 'biolink', + 'prefix_reference': 'https://w3id.org/biolink/'}, + 'linkml': {'prefix_prefix': 'linkml', + 'prefix_reference': 'https://w3id.org/linkml/'}, + 'schema': {'prefix_prefix': 'schema', + 'prefix_reference': 'http://schema.org/'}}, + 'settings': {'ColorHexTriplet': {'setting_key': 'ColorHexTriplet', + 'setting_value': '#[0-9a-fA-F]{6}'}, + 'PositiveFloat': {'setting_key': 'PositiveFloat', + 'setting_value': '^[+]?\\d*\\.?\\d+$'}}, + 'source_file': 'anatomical_structure.yaml', + 'title': 'Anatomical Structure Schema'} ) + +
+[docs] +class ANATOMICALDIRECTION(str, Enum): + """ + A controlled vocabulary term defining axis direction in terms of anatomical direction. + """ + left_to_right = "left_to_right" + posterior_to_anterior = "posterior_to_anterior" + inferior_to_superior = "inferior_to_superior" + superior_to_inferior = "superior_to_inferior" + anterior_to_posterior = "anterior_to_posterior"
+ + + +
+[docs] +class DISTANCEUNIT(str, Enum): + millimeter = "mm" + micrometer = "um" + meter = "m"
+ + + + +
+[docs] +class NamedThing(ConfiguredBaseModel): + """ + Core base entity for Anatomical Structure schema representing an entity with an identifier name and description. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, + 'from_schema': 'https://w3id.org/my-org/anatomical-structure-core-schema'}) + + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/NamedThing","AnS:NamedThing"]] = Field(["AnS:NamedThing"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class VersionedNamedThing(NamedThing): + """ + Core base entity for Anatomical Structure schema representing an versioned named thing. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, + 'from_schema': 'https://w3id.org/my-org/anatomical-structure-core-schema'}) + + version: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'version', 'domain_of': ['VersionedNamedThing']} }) + revision_of: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'revision_of', 'domain_of': ['VersionedNamedThing']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/VersionedNamedThing","AnS:VersionedNamedThing"]] = Field(["AnS:VersionedNamedThing"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class ImageDataset(VersionedNamedThing): + """ + An image dataset is versioned release of a multidimensional regular grid of measurements and metadata required for a morphological representation of an entity such as an anatomical structure (ref: OBI_0003327, RRID:SCR_006266) + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema', + 'slot_usage': {'revision_of': {'any_of': [{'range': 'ImageDataset'}, + {'range': 'string'}], + 'name': 'revision_of'}}}) + + x_direction: Optional[ANATOMICALDIRECTION] = Field(None, description="""A controlled vocabulary attribute defining the x axis direction in terms of anatomical direction.""", json_schema_extra = { "linkml_meta": {'alias': 'x_direction', 'domain_of': ['ImageDataset']} }) + y_direction: Optional[ANATOMICALDIRECTION] = Field(None, description="""A controlled vocabulary attribute defining the y axis direction in terms of anatomical direction.""", json_schema_extra = { "linkml_meta": {'alias': 'y_direction', 'domain_of': ['ImageDataset']} }) + z_direction: Optional[ANATOMICALDIRECTION] = Field(None, description="""A controlled vocabulary attribute defining the z axis direction in terms of anatomical direction.""", json_schema_extra = { "linkml_meta": {'alias': 'z_direction', 'domain_of': ['ImageDataset']} }) + x_size: Optional[int] = Field(None, description="""The number of pixels/voxels (size) along the x axis.""", ge=1, json_schema_extra = { "linkml_meta": {'alias': 'x_size', 'domain_of': ['ImageDataset']} }) + y_size: Optional[int] = Field(None, description="""The number of pixels/voxels (size) along the y axis.""", ge=1, json_schema_extra = { "linkml_meta": {'alias': 'y_size', 'domain_of': ['ImageDataset']} }) + z_size: Optional[int] = Field(None, description="""The number of pixels/voxels (size) along the y axis.""", ge=1, json_schema_extra = { "linkml_meta": {'alias': 'z_size', 'domain_of': ['ImageDataset']} }) + x_resolution: Optional[float] = Field(None, description="""The resolution (length / pixel) in along the x axis (numerical value part).""", json_schema_extra = { "linkml_meta": {'alias': 'x_resolution', + 'domain_of': ['ImageDataset'], + 'structured_pattern': {'syntax': '{PositiveFloat}'}} }) + y_resolution: Optional[float] = Field(None, description="""The resolution (length / pixel) in along the y axis (numerical value part).""", json_schema_extra = { "linkml_meta": {'alias': 'y_resolution', + 'domain_of': ['ImageDataset'], + 'structured_pattern': {'syntax': '{PositiveFloat}'}} }) + z_resolution: Optional[float] = Field(None, description="""The resolution (length / pixel) in along the z axis (numerical value part).""", json_schema_extra = { "linkml_meta": {'alias': 'z_resolution', + 'domain_of': ['ImageDataset'], + 'structured_pattern': {'syntax': '{PositiveFloat}'}} }) + unit: Optional[DISTANCEUNIT] = Field(None, description="""A controlled vocabulary attribute defining the length unit of the x, y, and z resolution values.""", json_schema_extra = { "linkml_meta": {'alias': 'unit', 'domain_of': ['ImageDataset']} }) + version: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'version', 'domain_of': ['VersionedNamedThing']} }) + revision_of: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'revision_of', + 'any_of': [{'range': 'ImageDataset'}, {'range': 'string'}], + 'domain_of': ['VersionedNamedThing']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/ImageDataset","AnS:ImageDataset"]] = Field(["AnS:ImageDataset"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class AnatomicalSpace(VersionedNamedThing): + """ + An anatomical space is versioned release of a mathematical space with a defined mapping between the anatomical axes and the mathematical axes. An anatomical space may be defined by a reference image chosen as the biological reference for an anatomical structure of interest derived from a single or multiple specimens (ref: ILX:0777106, RRID:SCR_023499) + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema', + 'slot_usage': {'revision_of': {'any_of': [{'range': 'AnatomicalSpace'}, + {'range': 'string'}], + 'name': 'revision_of'}}}) + + measures: str = Field(..., description="""Reference to the specific image dataset used to define the anatomical space.""", json_schema_extra = { "linkml_meta": {'alias': 'measures', + 'any_of': [{'range': 'ImageDataset'}, {'range': 'string'}], + 'domain_of': ['AnatomicalSpace']} }) + version: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'version', 'domain_of': ['VersionedNamedThing']} }) + revision_of: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'revision_of', + 'any_of': [{'range': 'AnatomicalSpace'}, {'range': 'string'}], + 'domain_of': ['VersionedNamedThing']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/AnatomicalSpace","AnS:AnatomicalSpace"]] = Field(["AnS:AnatomicalSpace"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class ParcellationTerminology(VersionedNamedThing): + """ + A parcellation terminology is a versioned release set of terms that can be used to label annotations in an atlas, providing human readability and context and allowing communication about brain locations and structural properties. Typically, a terminology is a set of descriptive anatomical terms following a specific naming convention and/or approach to organization scheme. The terminology may be a flat list of controlled vocabulary, a taxonomy and partonomy, or an ontology (ref: ILX:0777107, RRID:SCR_023499) + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema', + 'slot_usage': {'revision_of': {'any_of': [{'range': 'ParcellationTerminology'}, + {'range': 'string'}], + 'name': 'revision_of'}}}) + + version: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'version', 'domain_of': ['VersionedNamedThing']} }) + revision_of: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'revision_of', + 'any_of': [{'range': 'ParcellationTerminology'}, {'range': 'string'}], + 'domain_of': ['VersionedNamedThing']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/ParcellationTerminology","AnS:ParcellationTerminology"]] = Field(["AnS:ParcellationTerminology"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class ParcellationTermSet(NamedThing): + """ + A parcellation term set is the set of parcellation terms within a specific parcellation terminology. A parcellation term set belongs to one and only one parcellation terminology and each parcellation term in a parcellation terminology belongs to one and only one term set. If the parcellation terminology is a taxonomy, parcellation term sets can be used to represent taxonomic ranks. For consistency, if the terminology does not have the notion of taxonomic ranks, all terms are grouped into a single parcellation term set. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema'}) + + part_of_parcellation_terminology: str = Field(..., description="""Reference to the parcellation terminology for which the parcellation term set partitions.""", json_schema_extra = { "linkml_meta": {'alias': 'part_of_parcellation_terminology', + 'any_of': [{'range': 'ParcellationTerminology'}, {'range': 'string'}], + 'domain_of': ['ParcellationTermSet']} }) + ordinal: Optional[int] = Field(None, description="""Ordinal of the parcellation term set among other term sets within the context of the associated parcellation terminology.""", ge=0, json_schema_extra = { "linkml_meta": {'alias': 'ordinal', 'domain_of': ['ParcellationTermSet', 'ParcellationTerm']} }) + has_parent_parcellation_term_set: Optional[str] = Field(None, description="""Reference to the parent parcellation term set for which the parcellation term set is a child (lower taxonomic rank) of.""", json_schema_extra = { "linkml_meta": {'alias': 'has_parent_parcellation_term_set', + 'any_of': [{'range': 'ParcellationTermSet'}, {'range': 'string'}], + 'domain_of': ['ParcellationTermSet']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/ParcellationTermSet","AnS:ParcellationTermSet"]] = Field(["AnS:ParcellationTermSet"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class ParcellationTerm(NamedThing): + """ + A parcellation term is an individual term within a specific parcellation terminology describing a single anatomical entity by a persistent identifier, name, symbol and description. A parcellation term is a unique and exclusive member of a versioned release parcellation terminology. Although term identifiers must be unique within the context of one versioned release of a parcellation terminology, they can be reused in different parcellation terminology versions enabling the representation of terminology updates and modifications over time. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema'}) + + symbol: Optional[str] = Field(None, description="""Symbol representing a parcellation term.""", json_schema_extra = { "linkml_meta": {'alias': 'symbol', 'domain_of': ['ParcellationTerm']} }) + part_of_parcellation_term_set: str = Field(..., description="""Reference to the parcellation term set for which the parcellation term is part of.""", json_schema_extra = { "linkml_meta": {'alias': 'part_of_parcellation_term_set', + 'any_of': [{'range': 'ParcellationTermSet'}, {'range': 'string'}], + 'domain_of': ['ParcellationTerm']} }) + ordinal: Optional[int] = Field(None, description="""Ordinal of the parcellation term among other terms within the context of the associated parcellation terminology.""", ge=0, json_schema_extra = { "linkml_meta": {'alias': 'ordinal', 'domain_of': ['ParcellationTermSet', 'ParcellationTerm']} }) + has_parent_parcellation_term: Optional[str] = Field(None, description="""Reference to the parent parcellation term for which the parcellation term is a child ( spatially part) of""", json_schema_extra = { "linkml_meta": {'alias': 'has_parent_parcellation_term', + 'any_of': [{'range': 'ParcellationTerm'}, {'range': 'string'}], + 'domain_of': ['ParcellationTerm']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/ParcellationTerm","AnS:ParcellationTerm"]] = Field(["AnS:ParcellationTerm"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class ParcellationColorScheme(VersionedNamedThing): + """ + A parcellation color scheme is a versioned release color palette that can be used to visualize a parcellation terminology or its related parcellation annotation. A parcellation terminology may have zero or more parcellation color schemes and each color scheme is in context of a specific parcellation terminology, where each parcellation term is assigned a hex color value. A parcellation color scheme is defined as a part of one and only one parcellation terminology. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema', + 'slot_usage': {'revision_of': {'any_of': [{'range': 'ParcellationColorScheme'}, + {'range': 'string'}], + 'name': 'revision_of'}}}) + + subject_parcellation_terminology: str = Field(..., description="""Reference to the parcellation terminology for which the parcellation color scheme is in context of.""", json_schema_extra = { "linkml_meta": {'alias': 'subject_parcellation_terminology', + 'any_of': [{'range': 'ParcellationTerminology'}, {'range': 'string'}], + 'domain_of': ['ParcellationColorScheme']} }) + version: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'version', 'domain_of': ['VersionedNamedThing']} }) + revision_of: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'revision_of', + 'any_of': [{'range': 'ParcellationColorScheme'}, {'range': 'string'}], + 'domain_of': ['VersionedNamedThing']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/ParcellationColorScheme","AnS:ParcellationColorScheme"]] = Field(["AnS:ParcellationColorScheme"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class ParcellationColorAssignment(ConfiguredBaseModel): + """ + The parcellation color assignment associates hex color value to a parcellation term within a versioned release of a color scheme. A parcellation term is uniquely denoted by a parcellation term identifier and the parcellation terminology it belongs to. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema'}) + + part_of_parcellation_color_scheme: str = Field(..., description="""Reference to the parcellation color scheme for which the color assignment is part of.""", json_schema_extra = { "linkml_meta": {'alias': 'part_of_parcellation_color_scheme', + 'any_of': [{'range': 'ParcellationColorScheme'}, {'range': 'string'}], + 'domain_of': ['ParcellationColorAssignment']} }) + subject_parcellation_term: str = Field(..., description="""Reference to the parcellation term identifier for which the color assignment is about.""", json_schema_extra = { "linkml_meta": {'alias': 'subject_parcellation_term', + 'any_of': [{'range': 'ParcellationTerm'}, {'range': 'string'}], + 'domain_of': ['ParcellationColorAssignment', 'ParcellationAnnotationTermMap']} }) + color: Optional[str] = Field(None, description="""A string representing to hex triplet code of a color""", json_schema_extra = { "linkml_meta": {'alias': 'color', + 'domain_of': ['ParcellationColorAssignment'], + 'structured_pattern': {'syntax': '{ColorHexTriplet}'}} })
+ + + +
+[docs] +class AnatomicalAnnotationSet(VersionedNamedThing): + """ + An anatomical annotation set is a versioned release of a set of anatomical annotations anchored in the same anatomical space that divides the space into distinct segments following some annotation criteria or parcellation scheme. For example, the anatomical annotation set of 3D image based reference atlases (e.g. Allen Mouse CCF) can be expressed as a set of label indices of single multi-valued image annotations or as a set of segmentation masks (ref: ILX:0777108, RRID:SCR_023499) + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema', + 'slot_usage': {'revision_of': {'any_of': [{'range': 'AnatomicalAnnotationSet'}, + {'range': 'string'}], + 'name': 'revision_of'}}}) + + parameterizes: str = Field(..., description="""Reference to the anatomical space for which the anatomical annotation set is anchored""", json_schema_extra = { "linkml_meta": {'alias': 'parameterizes', + 'any_of': [{'range': 'AnatomicalSpace'}, {'range': 'string'}], + 'domain_of': ['AnatomicalAnnotationSet']} }) + version: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'version', 'domain_of': ['VersionedNamedThing']} }) + revision_of: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'revision_of', + 'any_of': [{'range': 'AnatomicalAnnotationSet'}, {'range': 'string'}], + 'domain_of': ['VersionedNamedThing']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/AnatomicalAnnotationSet","AnS:AnatomicalAnnotationSet"]] = Field(["AnS:AnatomicalAnnotationSet"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +
+[docs] +class ParcellationAnnotation(ConfiguredBaseModel): + """ + A parcellation annotation defines a specific segment of an anatomical space denoted by an internal identifier and is a unique and exclusive member of a versioned release anatomical annotation set. For example, in the case where the anatomical annotation set is a single multi-value image mask (e.g. Allen Mouse CCF), a specific annotation corresponds to a specific label index (internal identifier) in the mask. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema'}) + + part_of_anatomical_annotation_set: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'part_of_anatomical_annotation_set', + 'any_of': [{'range': 'AnatomicalAnnotationSet'}, {'range': 'string'}], + 'domain_of': ['ParcellationAnnotation']} }) + internal_identifier: str = Field(..., description="""An identifier that uniquely denotes a specific parcellation annotation within the context of an anatomical annotation set""", json_schema_extra = { "linkml_meta": {'alias': 'internal_identifier', 'domain_of': ['ParcellationAnnotation']} }) + voxel_count: Optional[int] = Field(None, description="""The number of voxels (3D pixels) spanned by the parcellation annotation (optional).""", ge=0, json_schema_extra = { "linkml_meta": {'alias': 'voxel_count', 'domain_of': ['ParcellationAnnotation']} })
+ + + +
+[docs] +class ParcellationAnnotationTermMap(ConfiguredBaseModel): + """ + The parcellation annotation term map table defines the relationship between parcellation annotations and parcellation terms. A parcellation term is uniquely denoted by a parcellation term identifier and the parcellation terminology it belongs to. A parcellation term can be spatially parameterized by the union of one or more parcellation annotations within a versioned release of an anatomical annotation set. For example, annotations defining individual cortical layers in cortical region R (R1, R2/3, R4, etc) can be combined to define the parent region R. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema'}) + + subject_parcellation_annotation: Union[ParcellationAnnotation, str] = Field(..., description="""Reference to the parcellation annotation that is the subject of the association.""", json_schema_extra = { "linkml_meta": {'alias': 'subject_parcellation_annotation', + 'any_of': [{'range': 'ParcellationAnnotation'}, {'range': 'string'}], + 'domain_of': ['ParcellationAnnotationTermMap']} }) + subject_parcellation_term: str = Field(..., description="""Reference to the parcellation term that is the subject of the association.""", json_schema_extra = { "linkml_meta": {'alias': 'subject_parcellation_term', + 'any_of': [{'range': 'ParcellationTerm'}, {'range': 'string'}], + 'domain_of': ['ParcellationColorAssignment', 'ParcellationAnnotationTermMap']} })
+ + + +
+[docs] +class ParcellationAtlas(VersionedNamedThing): + """ + A parcellation atlas is a versioned release reference used to guide experiments or deal with the spatial relationship between objects or the location of objects within the context of some anatomical structure. An atlas is minimally defined by a notion of space (either implicit or explicit) and an annotation set. Reference atlases usually have additional parts that make them more useful in certain situations, such as a well defined coordinate system, delineations indicating the boundaries of various regions or cell populations, landmarks, and labels and names to make it easier to communicate about well known and useful locations (ref: ILX:0777109, RRID:SCR_023499). + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://w3id.org/my-org/anatomical-structure-schema', + 'slot_usage': {'revision_of': {'any_of': [{'range': 'ParcellationAtlas'}, + {'range': 'string'}], + 'name': 'revision_of'}}}) + + has_anatomical_space: str = Field(..., description="""Reference to the anatomical space component of the parcellation atlas""", json_schema_extra = { "linkml_meta": {'alias': 'has_anatomical_space', + 'any_of': [{'range': 'AnatomicalSpace'}, {'range': 'string'}], + 'domain_of': ['ParcellationAtlas']} }) + has_anatomical_annotation_set: str = Field(..., description="""Reference to the anatomical annotation set component of the parcellation atlas""", json_schema_extra = { "linkml_meta": {'alias': 'has_anatomical_annotation_set', + 'any_of': [{'range': 'AnatomicalAnnotationSet'}, {'range': 'string'}], + 'domain_of': ['ParcellationAtlas']} }) + has_parcellation_terminology: str = Field(..., description="""Reference to the parcellation terminology component of the parcellation atlas""", json_schema_extra = { "linkml_meta": {'alias': 'has_parcellation_terminology', + 'any_of': [{'range': 'ParcellationTerminology'}, {'range': 'string'}], + 'domain_of': ['ParcellationAtlas']} }) + specialization_of: Optional[str] = Field(None, description="""Reference to the general (non versioned) parcellation atlas for which the parcellation atlas is a specific version release of.""", json_schema_extra = { "linkml_meta": {'alias': 'specialization_of', + 'any_of': [{'range': 'ParcellationAtlas'}, {'range': 'string'}], + 'domain_of': ['ParcellationAtlas']} }) + version: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'version', 'domain_of': ['VersionedNamedThing']} }) + revision_of: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'revision_of', + 'any_of': [{'range': 'ParcellationAtlas'}, {'range': 'string'}], + 'domain_of': ['VersionedNamedThing']} }) + id: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing']} }) + name: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['NamedThing']} }) + description: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['NamedThing']} }) + category: List[Literal["https://w3id.org/my-org/anatomical-structure-schema/ParcellationAtlas","AnS:ParcellationAtlas"]] = Field(["AnS:ParcellationAtlas"], json_schema_extra = { "linkml_meta": {'alias': 'category', + 'designates_type': True, + 'domain_of': ['NamedThing'], + 'is_class_field': True} })
+ + + +# Model rebuild +# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model +NamedThing.model_rebuild() +VersionedNamedThing.model_rebuild() +ImageDataset.model_rebuild() +AnatomicalSpace.model_rebuild() +ParcellationTerminology.model_rebuild() +ParcellationTermSet.model_rebuild() +ParcellationTerm.model_rebuild() +ParcellationColorScheme.model_rebuild() +ParcellationColorAssignment.model_rebuild() +AnatomicalAnnotationSet.model_rebuild() +ParcellationAnnotation.model_rebuild() +ParcellationAnnotationTermMap.model_rebuild() +ParcellationAtlas.model_rebuild() + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/models/genome_annotation.html b/_modules/bkbit/models/genome_annotation.html new file mode 100644 index 0000000..65d6cbe --- /dev/null +++ b/_modules/bkbit/models/genome_annotation.html @@ -0,0 +1,6465 @@ + + + + + + + + bkbit.models.genome_annotation — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.models.genome_annotation

+from __future__ import annotations 
+
+import re
+import sys
+from datetime import (
+    date,
+    datetime,
+    time
+)
+from decimal import Decimal 
+from enum import Enum 
+from typing import (
+    Any,
+    ClassVar,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Union
+)
+
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    RootModel,
+    field_validator
+)
+
+
+metamodel_version = "None"
+version = "None"
+
+
+
+[docs] +class ConfiguredBaseModel(BaseModel): + model_config = ConfigDict( + validate_assignment = True, + validate_default = True, + extra = "forbid", + arbitrary_types_allowed = True, + use_enum_values = True, + strict = False, + ) + pass
+ + + + + +
+[docs] +class LinkMLMeta(RootModel): + root: Dict[str, Any] = {} + model_config = ConfigDict(frozen=True) + + def __getattr__(self, key:str): + return getattr(self.root, key) + + def __getitem__(self, key:str): + return self.root[key] + + def __setitem__(self, key:str, value): + self.root[key] = value + + def __contains__(self, key:str) -> bool: + return key in self.root
+ + + +linkml_meta = LinkMLMeta({'default_prefix': 'bican', + 'default_range': 'string', + 'description': 'The Genome Annotation schema is designed to represent types ' + "and relationships of an organism's annotated genome.", + 'id': 'https://identifiers.org/brain-bican/genome-annotation-schema', + 'imports': ['linkml:types', 'bican_biolink', 'bican_core'], + 'name': 'genome-annotation-schema', + 'prefixes': {'NCBIAssembly': {'prefix_prefix': 'NCBIAssembly', + 'prefix_reference': 'https://www.ncbi.nlm.nih.gov/assembly/'}, + 'bican': {'prefix_prefix': 'bican', + 'prefix_reference': 'https://identifiers.org/brain-bican/vocab/'}, + 'linkml': {'prefix_prefix': 'linkml', + 'prefix_reference': 'https://w3id.org/linkml/'}, + 'ncbi': {'prefix_prefix': 'ncbi', + 'prefix_reference': 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id='}, + 'schema': {'prefix_prefix': 'schema', + 'prefix_reference': 'http://schema.org/'}}, + 'source_file': 'genome_annotation.yaml', + 'title': 'Genome Annotation Schema'} ) + +
+[docs] +class DigestType(str, Enum): + SHA1 = "spdx:checksumAlgorithm_sha1" + MD5 = "spdx:checksumAlgorithm_md5" + SHA256 = "spdx:checksumAlgorithm_sha256"
+ + + +
+[docs] +class BioType(str, Enum): + protein_coding = "protein_coding" + noncoding = "noncoding"
+ + + +
+[docs] +class AuthorityType(str, Enum): + ENSEMBL = "ENSEMBL" + NCBI = "NCBI"
+ + + + +
+[docs] +class OntologyClass(ConfiguredBaseModel): + """ + a concept or class in an ontology, vocabulary or thesaurus. Note that nodes in a biolink compatible KG can be considered both instances of biolink classes, and OWL classes in their own right. In general you should not need to use this class directly. Instead, use the appropriate biolink class. For example, for the GO concept of endocytosis (GO:0006897), use bl:BiologicalProcess as the type. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:OntologyClass', + 'comments': ["This is modeled as a mixin. 'ontology class' should not be the " + 'primary type of a node in the KG. Instead you should use an ' + 'informative bioloink category, such as AnatomicalEntity (for ' + 'Uberon classes), ChemicalSubstance (for CHEBI or CHEMBL), etc', + 'Note that formally this is a metaclass. Instances of this class ' + "are instances in the graph, but can be the object of 'type' " + 'edges. For example, if we had a node in the graph representing ' + 'a specific brain of a specific patient (e.g brain001), this ' + 'could have a category of bl:Sample, and by typed more ' + 'specifically with an ontology class UBERON:nnn, which has as ' + 'category bl:AnatomicalEntity'], + 'definition_uri': 'https://w3id.org/biolink/vocab/OntologyClass', + 'exact_mappings': ['owl:Class', 'schema:Class'], + 'examples': [{'description': "the class 'brain' from the Uberon anatomy " + 'ontology', + 'value': 'UBERON:0000955'}], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['MESH', 'UMLS', 'KEGG.BRITE'], + 'mixin': True, + 'see_also': ['https://github.com/biolink/biolink-model/issues/486']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} })
+ + + +
+[docs] +class Annotation(ConfiguredBaseModel): + """ + Biolink Model root class for entity annotations. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, + 'class_uri': 'biolink:Annotation', + 'definition_uri': 'https://w3id.org/biolink/vocab/Annotation', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema'}) + + pass
+ + + +
+[docs] +class QuantityValue(Annotation): + """ + A value of an attribute that is quantitative and measurable, expressed as a combination of a unit and a numeric value + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:QuantityValue', + 'definition_uri': 'https://w3id.org/biolink/vocab/QuantityValue', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema'}) + + has_unit: Optional[str] = Field(None, description="""connects a quantity value to a unit""", json_schema_extra = { "linkml_meta": {'alias': 'has_unit', + 'close_mappings': ['EFO:0001697', 'UO-PROPERTY:is_unit_of'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_unit', + 'domain': 'quantity value', + 'domain_of': ['quantity value'], + 'exact_mappings': ['qud:unit', 'IAO:0000039'], + 'in_subset': ['samples'], + 'narrow_mappings': ['SNOMED:has_concentration_strength_denominator_unit', + 'SNOMED:has_concentration_strength_numerator_unit', + 'SNOMED:has_presentation_strength_denominator_unit', + 'SNOMED:has_presentation_strength_numerator_unit', + 'SNOMED:has_unit_of_presentation'], + 'slot_uri': 'biolink:has_unit'} }) + has_numeric_value: Optional[float] = Field(None, description="""connects a quantity value to a number""", json_schema_extra = { "linkml_meta": {'alias': 'has_numeric_value', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_numeric_value', + 'domain': 'quantity value', + 'domain_of': ['quantity value'], + 'exact_mappings': ['qud:quantityValue'], + 'in_subset': ['samples'], + 'slot_uri': 'biolink:has_numeric_value'} })
+ + + +
+[docs] +class Entity(ConfiguredBaseModel): + """ + Root Biolink Model class for all things and informational relationships, real or imagined. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, + 'class_uri': 'biolink:Entity', + 'definition_uri': 'https://w3id.org/biolink/vocab/Entity', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema'}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/Entity","biolink:Entity"]] = Field(["biolink:Entity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} })
+ + + +
+[docs] +class NamedThing(Entity): + """ + a databased entity or concept/class + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:NamedThing', + 'definition_uri': 'https://w3id.org/biolink/vocab/NamedThing', + 'exact_mappings': ['BFO:0000001', + 'WIKIDATA:Q35120', + 'UMLSSG:OBJC', + 'STY:T071', + 'dcid:Thing'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema'}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/NamedThing","biolink:NamedThing"]] = Field(["biolink:NamedThing"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} })
+ + + +
+[docs] +class Attribute(NamedThing, OntologyClass): + """ + A property or characteristic of an entity. For example, an apple may have properties such as color, shape, age, crispiness. An environmental sample may have attributes such as depth, lat, long, material. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:Attribute', + 'definition_uri': 'https://w3id.org/biolink/vocab/Attribute', + 'exact_mappings': ['SIO:000614'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['EDAM-DATA', 'EDAM-FORMAT', 'EDAM-OPERATION', 'EDAM-TOPIC'], + 'in_subset': ['samples'], + 'mixins': ['ontology class'], + 'slot_usage': {'name': {'description': "The human-readable 'attribute name' " + 'can be set to a string which reflects ' + 'its context of interpretation, e.g. ' + 'SEPIO evidence/provenance/confidence ' + 'annotation or it can default to the ' + "name associated with the 'has " + "attribute type' slot ontology term.", + 'name': 'name'}}}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + category: List[Literal["https://w3id.org/biolink/vocab/Attribute","biolink:Attribute"]] = Field(["biolink:Attribute"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + name: Optional[str] = Field(None, description="""The human-readable 'attribute name' can be set to a string which reflects its context of interpretation, e.g. SEPIO evidence/provenance/confidence annotation or it can default to the name associated with the 'has attribute type' slot ontology term.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + has_attribute_type: str = Field(..., description="""connects an attribute to a class that describes it""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute_type', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute_type', + 'domain': 'attribute', + 'domain_of': ['attribute'], + 'in_subset': ['samples'], + 'narrow_mappings': ['LOINC:has_modality_type', 'LOINC:has_view_type'], + 'slot_uri': 'biolink:has_attribute_type'} }) + has_quantitative_value: Optional[List[QuantityValue]] = Field(None, description="""connects an attribute to a value""", json_schema_extra = { "linkml_meta": {'alias': 'has_quantitative_value', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_quantitative_value', + 'domain': 'attribute', + 'domain_of': ['attribute'], + 'exact_mappings': ['qud:quantityValue'], + 'in_subset': ['samples'], + 'narrow_mappings': ['SNOMED:has_concentration_strength_numerator_value', + 'SNOMED:has_presentation_strength_denominator_value', + 'SNOMED:has_presentation_strength_numerator_value'], + 'slot_uri': 'biolink:has_quantitative_value'} }) + has_qualitative_value: Optional[str] = Field(None, description="""connects an attribute to a value""", json_schema_extra = { "linkml_meta": {'alias': 'has_qualitative_value', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_qualitative_value', + 'domain': 'attribute', + 'domain_of': ['attribute'], + 'in_subset': ['samples'], + 'slot_uri': 'biolink:has_qualitative_value'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} })
+ + + +
+[docs] +class TaxonomicRank(OntologyClass): + """ + A descriptor for the rank within a taxonomic classification. Example instance: TAXRANK:0000017 (kingdom) + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:TaxonomicRank', + 'definition_uri': 'https://w3id.org/biolink/vocab/TaxonomicRank', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['TAXRANK'], + 'mappings': ['WIKIDATA:Q427626']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} })
+ + + +
+[docs] +class OrganismTaxon(NamedThing): + """ + A classification of a set of organisms. Example instances: NCBITaxon:9606 (Homo sapiens), NCBITaxon:2 (Bacteria). Can also be used to represent strains or subspecies. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'aliases': ['taxon', 'taxonomic classification'], + 'class_uri': 'biolink:OrganismTaxon', + 'definition_uri': 'https://w3id.org/biolink/vocab/OrganismTaxon', + 'exact_mappings': ['WIKIDATA:Q16521', 'STY:T001', 'bioschemas:Taxon'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['NCBITaxon', 'MESH', 'UMLS'], + 'in_subset': ['model_organism_database'], + 'narrow_mappings': ['dcid:BiologicalSpecies'], + 'values_from': ['NCBITaxon']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/OrganismTaxon","biolink:OrganismTaxon"]] = Field(["biolink:OrganismTaxon"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + has_taxonomic_rank: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'has_taxonomic_rank', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_taxonomic_rank', + 'domain': 'named thing', + 'domain_of': ['organism taxon'], + 'is_a': 'node property', + 'mappings': ['WIKIDATA:P105'], + 'slot_uri': 'biolink:has_taxonomic_rank'} })
+ + + +
+[docs] +class InformationContentEntity(NamedThing): + """ + a piece of information that typically describes some topic of discourse or is used as support. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, + 'aliases': ['information', 'information artefact', 'information entity'], + 'class_uri': 'biolink:InformationContentEntity', + 'definition_uri': 'https://w3id.org/biolink/vocab/InformationContentEntity', + 'exact_mappings': ['IAO:0000030'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['doi'], + 'narrow_mappings': ['UMLSSG:CONC', + 'STY:T077', + 'STY:T078', + 'STY:T079', + 'STY:T080', + 'STY:T081', + 'STY:T082', + 'STY:T089', + 'STY:T102', + 'STY:T169', + 'STY:T171', + 'STY:T185']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/InformationContentEntity","biolink:InformationContentEntity"]] = Field(["biolink:InformationContentEntity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + license: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'license', + 'definition_uri': 'https://w3id.org/biolink/vocab/license', + 'domain': 'information content entity', + 'domain_of': ['information content entity', 'dataset'], + 'exact_mappings': ['dct:license'], + 'is_a': 'node property', + 'narrow_mappings': ['WIKIDATA_PROPERTY:P275'], + 'slot_uri': 'biolink:license'} }) + rights: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'rights', + 'definition_uri': 'https://w3id.org/biolink/vocab/rights', + 'domain': 'information content entity', + 'domain_of': ['information content entity', 'dataset'], + 'exact_mappings': ['dct:rights'], + 'is_a': 'node property', + 'slot_uri': 'biolink:rights'} }) + format: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'format', + 'definition_uri': 'https://w3id.org/biolink/vocab/format', + 'domain': 'information content entity', + 'domain_of': ['information content entity', 'dataset'], + 'exact_mappings': ['dct:format', 'WIKIDATA_PROPERTY:P2701'], + 'is_a': 'node property', + 'slot_uri': 'biolink:format'} }) + creation_date: Optional[date] = Field(None, description="""date on which an entity was created. This can be applied to nodes or edges""", json_schema_extra = { "linkml_meta": {'alias': 'creation_date', + 'aliases': ['publication date'], + 'definition_uri': 'https://w3id.org/biolink/vocab/creation_date', + 'domain': 'named thing', + 'domain_of': ['information content entity', 'dataset'], + 'exact_mappings': ['dct:createdOn', 'WIKIDATA_PROPERTY:P577'], + 'is_a': 'node property', + 'slot_uri': 'biolink:creation_date'} })
+ + + +
+[docs] +class Dataset(InformationContentEntity): + """ + an item that refers to a collection of data from a data source. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:Dataset', + 'definition_uri': 'https://w3id.org/biolink/vocab/Dataset', + 'exact_mappings': ['IAO:0000100', + 'dctypes:Dataset', + 'schema:dataset', + 'dcid:Dataset'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema'}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/Dataset","biolink:Dataset"]] = Field(["biolink:Dataset"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + license: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'license', + 'definition_uri': 'https://w3id.org/biolink/vocab/license', + 'domain': 'information content entity', + 'domain_of': ['information content entity', 'dataset'], + 'exact_mappings': ['dct:license'], + 'is_a': 'node property', + 'narrow_mappings': ['WIKIDATA_PROPERTY:P275'], + 'slot_uri': 'biolink:license'} }) + rights: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'rights', + 'definition_uri': 'https://w3id.org/biolink/vocab/rights', + 'domain': 'information content entity', + 'domain_of': ['information content entity', 'dataset'], + 'exact_mappings': ['dct:rights'], + 'is_a': 'node property', + 'slot_uri': 'biolink:rights'} }) + format: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'format', + 'definition_uri': 'https://w3id.org/biolink/vocab/format', + 'domain': 'information content entity', + 'domain_of': ['information content entity', 'dataset'], + 'exact_mappings': ['dct:format', 'WIKIDATA_PROPERTY:P2701'], + 'is_a': 'node property', + 'slot_uri': 'biolink:format'} }) + creation_date: Optional[date] = Field(None, description="""date on which an entity was created. This can be applied to nodes or edges""", json_schema_extra = { "linkml_meta": {'alias': 'creation_date', + 'aliases': ['publication date'], + 'definition_uri': 'https://w3id.org/biolink/vocab/creation_date', + 'domain': 'named thing', + 'domain_of': ['information content entity', 'dataset'], + 'exact_mappings': ['dct:createdOn', 'WIKIDATA_PROPERTY:P577'], + 'is_a': 'node property', + 'slot_uri': 'biolink:creation_date'} })
+ + + +
+[docs] +class PhysicalEssenceOrOccurrent(ConfiguredBaseModel): + """ + Either a physical or processual entity. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:PhysicalEssenceOrOccurrent', + 'definition_uri': 'https://w3id.org/biolink/vocab/PhysicalEssenceOrOccurrent', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixin': True}) + + pass
+ + + +
+[docs] +class PhysicalEssence(PhysicalEssenceOrOccurrent): + """ + Semantic mixin concept. Pertains to entities that have physical properties such as mass, volume, or charge. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:PhysicalEssence', + 'definition_uri': 'https://w3id.org/biolink/vocab/PhysicalEssence', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixin': True}) + + pass
+ + + +
+[docs] +class PhysicalEntity(PhysicalEssence, NamedThing): + """ + An entity that has material reality (a.k.a. physical essence). + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:PhysicalEntity', + 'definition_uri': 'https://w3id.org/biolink/vocab/PhysicalEntity', + 'exact_mappings': ['STY:T072'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixins': ['physical essence'], + 'narrow_mappings': ['STY:T073']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/PhysicalEntity","biolink:PhysicalEntity"]] = Field(["biolink:PhysicalEntity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} })
+ + + +
+[docs] +class Occurrent(PhysicalEssenceOrOccurrent): + """ + A processual entity. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:Occurrent', + 'definition_uri': 'https://w3id.org/biolink/vocab/Occurrent', + 'exact_mappings': ['BFO:0000003'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixin': True}) + + pass
+ + + +
+[docs] +class ActivityAndBehavior(Occurrent): + """ + Activity or behavior of any independent integral living, organization or mechanical actor in the world + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:ActivityAndBehavior', + 'definition_uri': 'https://w3id.org/biolink/vocab/ActivityAndBehavior', + 'exact_mappings': ['UMLSSG:ACTI'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixin': True}) + + pass
+ + + +
+[docs] +class Activity(ActivityAndBehavior, NamedThing): + """ + An activity is something that occurs over a period of time and acts upon or with entities; it may include consuming, processing, transforming, modifying, relocating, using, or generating entities. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:Activity', + 'definition_uri': 'https://w3id.org/biolink/vocab/Activity', + 'exact_mappings': ['prov:Activity', 'NCIT:C43431', 'STY:T052'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixins': ['activity and behavior'], + 'narrow_mappings': ['STY:T056', + 'STY:T057', + 'STY:T064', + 'STY:T066', + 'STY:T062', + 'STY:T065', + 'STY:T058']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/Activity","biolink:Activity"]] = Field(["biolink:Activity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} })
+ + + +
+[docs] +class Procedure(ActivityAndBehavior, NamedThing): + """ + A series of actions conducted in a certain order or manner + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:Procedure', + 'definition_uri': 'https://w3id.org/biolink/vocab/Procedure', + 'exact_mappings': ['UMLSSG:PROC', 'dcid:MedicalProcedure'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['CPT'], + 'mixins': ['activity and behavior'], + 'narrow_mappings': ['STY:T059', 'STY:T060', 'STY:T061', 'STY:T063']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/Procedure","biolink:Procedure"]] = Field(["biolink:Procedure"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} })
+ + + +
+[docs] +class SubjectOfInvestigation(ConfiguredBaseModel): + """ + An entity that has the role of being studied in an investigation, study, or experiment + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:SubjectOfInvestigation', + 'definition_uri': 'https://w3id.org/biolink/vocab/SubjectOfInvestigation', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixin': True}) + + pass
+ + + +
+[docs] +class MaterialSample(SubjectOfInvestigation, PhysicalEntity): + """ + A sample is a limited quantity of something (e.g. an individual or set of individuals from a population, or a portion of a substance) to be used for testing, analysis, inspection, investigation, demonstration, or trial use. [SIO] + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'aliases': ['biospecimen', 'sample', 'biosample', 'physical sample'], + 'class_uri': 'biolink:MaterialSample', + 'definition_uri': 'https://w3id.org/biolink/vocab/MaterialSample', + 'exact_mappings': ['OBI:0000747', 'SIO:001050'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['BIOSAMPLE', 'GOLD.META'], + 'mixins': ['subject of investigation']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/MaterialSample","biolink:MaterialSample"]] = Field(["biolink:MaterialSample"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} })
+ + + +
+[docs] +class ThingWithTaxon(ConfiguredBaseModel): + """ + A mixin that can be used on any entity that can be taxonomically classified. This includes individual organisms; genes, their products and other molecular entities; body parts; biological processes + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:ThingWithTaxon', + 'definition_uri': 'https://w3id.org/biolink/vocab/ThingWithTaxon', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixin': True}) + + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon', + 'aliases': ['instance of', + 'is organism source of gene product', + 'organism has gene', + 'gene found in organism', + 'gene product has organism source'], + 'annotations': {'canonical_predicate': {'tag': 'canonical_predicate', + 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['RO:0002162', 'WIKIDATA_PROPERTY:P703'], + 'in_subset': ['translator_minimal'], + 'inherited': True, + 'is_a': 'related to at instance level', + 'narrow_mappings': ['RO:0002160'], + 'slot_uri': 'biolink:in_taxon'} }) + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon_label', + 'annotations': {'denormalized': {'tag': 'denormalized', 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon_label', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P225'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'slot_uri': 'biolink:in_taxon_label'} })
+ + + +
+[docs] +class BiologicalEntity(ThingWithTaxon, NamedThing): + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, + 'aliases': ['bioentity'], + 'class_uri': 'biolink:BiologicalEntity', + 'definition_uri': 'https://w3id.org/biolink/vocab/BiologicalEntity', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixins': ['thing with taxon'], + 'narrow_mappings': ['WIKIDATA:Q28845870', + 'STY:T050', + 'SIO:010046', + 'STY:T129']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/BiologicalEntity","biolink:BiologicalEntity"]] = Field(["biolink:BiologicalEntity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon', + 'aliases': ['instance of', + 'is organism source of gene product', + 'organism has gene', + 'gene found in organism', + 'gene product has organism source'], + 'annotations': {'canonical_predicate': {'tag': 'canonical_predicate', + 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['RO:0002162', 'WIKIDATA_PROPERTY:P703'], + 'in_subset': ['translator_minimal'], + 'inherited': True, + 'is_a': 'related to at instance level', + 'narrow_mappings': ['RO:0002160'], + 'slot_uri': 'biolink:in_taxon'} }) + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon_label', + 'annotations': {'denormalized': {'tag': 'denormalized', 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon_label', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P225'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'slot_uri': 'biolink:in_taxon_label'} })
+ + + +
+[docs] +class GenomicEntity(ConfiguredBaseModel): + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:GenomicEntity', + 'definition_uri': 'https://w3id.org/biolink/vocab/GenomicEntity', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'in_subset': ['translator_minimal'], + 'mixin': True, + 'narrow_mappings': ['STY:T028', 'GENO:0000897']}) + + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""", json_schema_extra = { "linkml_meta": {'alias': 'has_biological_sequence', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_biological_sequence', + 'domain': 'named thing', + 'domain_of': ['genomic entity', 'gene', 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:has_biological_sequence'} })
+ + + +
+[docs] +class ChemicalEntityOrGeneOrGeneProduct(ConfiguredBaseModel): + """ + A union of chemical entities and children, and gene or gene product. This mixin is helpful to use when searching across chemical entities that must include genes and their children as chemical entities. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:ChemicalEntityOrGeneOrGeneProduct', + 'definition_uri': 'https://w3id.org/biolink/vocab/ChemicalEntityOrGeneOrGeneProduct', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixin': True}) + + pass
+ + + +
+[docs] +class MacromolecularMachineMixin(ConfiguredBaseModel): + """ + A union of gene locus, gene product, and macromolecular complex. These are the basic units of function in a cell. They either carry out individual biological activities, or they encode molecules which do this. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:MacromolecularMachineMixin', + 'definition_uri': 'https://w3id.org/biolink/vocab/MacromolecularMachineMixin', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'mixin': True}) + + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} })
+ + + +
+[docs] +class GeneOrGeneProduct(MacromolecularMachineMixin): + """ + A union of gene loci or gene products. Frequently an identifier for one will be used as proxy for another + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:GeneOrGeneProduct', + 'definition_uri': 'https://w3id.org/biolink/vocab/GeneOrGeneProduct', + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['CHEMBL.TARGET', 'IUPHAR.FAMILY'], + 'mixin': True}) + + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} })
+ + + +
+[docs] +class Gene(GeneOrGeneProduct, ChemicalEntityOrGeneOrGeneProduct, GenomicEntity, BiologicalEntity, PhysicalEssence, OntologyClass): + """ + A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene locus may include regulatory regions, transcribed regions and/or other functional sequence regions. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'broad_mappings': ['NCIT:C45822'], + 'class_uri': 'biolink:Gene', + 'definition_uri': 'https://w3id.org/biolink/vocab/Gene', + 'exact_mappings': ['SO:0000704', 'SIO:010035', 'WIKIDATA:Q7187', 'dcid:Gene'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'id_prefixes': ['NCBIGene', + 'ENSEMBL', + 'HGNC', + 'MGI', + 'ZFIN', + 'dictyBase', + 'WB', + 'WormBase', + 'FB', + 'RGD', + 'SGD', + 'PomBase', + 'OMIM', + 'KEGG.GENES', + 'UMLS', + 'Xenbase', + 'AspGD', + 'PHARMGKB.GENE'], + 'in_subset': ['translator_minimal', 'model_organism_database'], + 'mixins': ['gene or gene product', + 'genomic entity', + 'chemical entity or gene or gene product', + 'physical essence', + 'ontology class'], + 'narrow_mappings': ['bioschemas:gene']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/Gene","biolink:Gene"]] = Field(["biolink:Gene"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon', + 'aliases': ['instance of', + 'is organism source of gene product', + 'organism has gene', + 'gene found in organism', + 'gene product has organism source'], + 'annotations': {'canonical_predicate': {'tag': 'canonical_predicate', + 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['RO:0002162', 'WIKIDATA_PROPERTY:P703'], + 'in_subset': ['translator_minimal'], + 'inherited': True, + 'is_a': 'related to at instance level', + 'narrow_mappings': ['RO:0002160'], + 'slot_uri': 'biolink:in_taxon'} }) + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon_label', + 'annotations': {'denormalized': {'tag': 'denormalized', 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon_label', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P225'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'slot_uri': 'biolink:in_taxon_label'} }) + symbol: Optional[str] = Field(None, description="""Symbol for a particular thing""", json_schema_extra = { "linkml_meta": {'alias': 'symbol', + 'definition_uri': 'https://w3id.org/biolink/vocab/symbol', + 'domain': 'named thing', + 'domain_of': ['gene'], + 'exact_mappings': ['AGRKB:symbol', 'gpi:DB_Object_Symbol'], + 'is_a': 'node property', + 'slot_uri': 'biolink:symbol'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""", json_schema_extra = { "linkml_meta": {'alias': 'has_biological_sequence', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_biological_sequence', + 'domain': 'named thing', + 'domain_of': ['genomic entity', 'gene', 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:has_biological_sequence'} })
+ + + +
+[docs] +class Genome(GenomicEntity, BiologicalEntity, PhysicalEssence, OntologyClass): + """ + A genome is the sum of genetic material within a cell or virion. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'biolink:Genome', + 'close_mappings': ['dcid:GenomeAssemblyUnit'], + 'definition_uri': 'https://w3id.org/biolink/vocab/Genome', + 'exact_mappings': ['SO:0001026', 'SIO:000984', 'WIKIDATA:Q7020'], + 'from_schema': 'https://w3id.org/biolink/bican-biolink-schema', + 'in_subset': ['model_organism_database'], + 'mixins': ['genomic entity', 'physical essence', 'ontology class']}) + + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://w3id.org/biolink/vocab/Genome","biolink:Genome"]] = Field(["biolink:Genome"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon', + 'aliases': ['instance of', + 'is organism source of gene product', + 'organism has gene', + 'gene found in organism', + 'gene product has organism source'], + 'annotations': {'canonical_predicate': {'tag': 'canonical_predicate', + 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['RO:0002162', 'WIKIDATA_PROPERTY:P703'], + 'in_subset': ['translator_minimal'], + 'inherited': True, + 'is_a': 'related to at instance level', + 'narrow_mappings': ['RO:0002160'], + 'slot_uri': 'biolink:in_taxon'} }) + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon_label', + 'annotations': {'denormalized': {'tag': 'denormalized', 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon_label', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P225'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'slot_uri': 'biolink:in_taxon_label'} }) + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""", json_schema_extra = { "linkml_meta": {'alias': 'has_biological_sequence', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_biological_sequence', + 'domain': 'named thing', + 'domain_of': ['genomic entity', 'gene', 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:has_biological_sequence'} })
+ + + +
+[docs] +class Checksum(Entity): + """ + Checksum values associated with digital entities. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://identifiers.org/brain-bican/bican-core-schema'}) + + checksum_algorithm: Optional[DigestType] = Field(None, description="""The type of cryptographic hash function used to calculate the checksum value.""", json_schema_extra = { "linkml_meta": {'alias': 'checksum_algorithm', 'domain_of': ['checksum']} }) + value: Optional[str] = Field(None, description="""The checksum value obtained from a specific cryotographic hash function.""", json_schema_extra = { "linkml_meta": {'alias': 'value', 'domain_of': ['checksum']} }) + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://identifiers.org/brain-bican/vocab/Checksum","bican:Checksum"]] = Field(["bican:Checksum"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} })
+ + + +
+[docs] +class GeneAnnotation(Gene): + """ + An annotation describing the location, boundaries, and functions of individual genes within a genome annotation. + """ + + def __hash__(self): + return hash(tuple([self.id, self.name, self.molecular_type, self.description])) + + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://identifiers.org/brain-bican/genome-annotation-schema', + 'id_prefixes': ['ENSEMBL', 'MGI', 'NCBIGene']}) + + molecular_type: Optional[Union[BioType, str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'molecular_type', + 'any_of': [{'range': 'BioType'}, {'range': 'string'}], + 'domain_of': ['gene annotation']} }) + source_id: Optional[str] = Field(None, description="""The authority specific identifier.""", json_schema_extra = { "linkml_meta": {'alias': 'source_id', + 'domain_of': ['gene annotation'], + 'slot_uri': 'schema:identifier'} }) + referenced_in: Union[GenomeAnnotation, str] = Field(..., description="""The genome annotation that this gene annotation was referenced from.""", json_schema_extra = { "linkml_meta": {'alias': 'referenced_in', + 'any_of': [{'range': 'genome annotation'}, {'range': 'string'}], + 'domain_of': ['gene annotation']} }) + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://identifiers.org/brain-bican/vocab/GeneAnnotation","bican:GeneAnnotation"]] = Field(["bican:GeneAnnotation"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon', + 'aliases': ['instance of', + 'is organism source of gene product', + 'organism has gene', + 'gene found in organism', + 'gene product has organism source'], + 'annotations': {'canonical_predicate': {'tag': 'canonical_predicate', + 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['RO:0002162', 'WIKIDATA_PROPERTY:P703'], + 'in_subset': ['translator_minimal'], + 'inherited': True, + 'is_a': 'related to at instance level', + 'narrow_mappings': ['RO:0002160'], + 'slot_uri': 'biolink:in_taxon'} }) + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon_label', + 'annotations': {'denormalized': {'tag': 'denormalized', 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon_label', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P225'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'slot_uri': 'biolink:in_taxon_label'} }) + symbol: Optional[str] = Field(None, description="""Symbol for a particular thing""", json_schema_extra = { "linkml_meta": {'alias': 'symbol', + 'definition_uri': 'https://w3id.org/biolink/vocab/symbol', + 'domain': 'named thing', + 'domain_of': ['gene'], + 'exact_mappings': ['AGRKB:symbol', 'gpi:DB_Object_Symbol'], + 'is_a': 'node property', + 'slot_uri': 'biolink:symbol'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""", json_schema_extra = { "linkml_meta": {'alias': 'has_biological_sequence', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_biological_sequence', + 'domain': 'named thing', + 'domain_of': ['genomic entity', 'gene', 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:has_biological_sequence'} })
+ + + +
+[docs] +class GenomeAnnotation(Genome): + """ + Location and nomenclature of genes and all of the coding regions in a genome assembly and the classification of genes and transcripts into types. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://identifiers.org/brain-bican/genome-annotation-schema'}) + + version: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'version', + 'broad_mappings': ['pav:version', 'owl:versionInfo'], + 'definition_uri': 'https://w3id.org/biolink/vocab/version', + 'domain': 'dataset', + 'domain_of': ['genome annotation', 'genome assembly'], + 'is_a': 'node property', + 'slot_uri': 'biolink:version'} }) + digest: Optional[List[Union[Checksum, str]]] = Field(None, description="""Stores checksum information.""", json_schema_extra = { "linkml_meta": {'alias': 'digest', + 'any_of': [{'range': 'checksum'}, {'range': 'string'}], + 'domain_of': ['genome annotation']} }) + content_url: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'content_url', + 'domain_of': ['genome annotation'], + 'slot_uri': 'schema:url'} }) + authority: Optional[AuthorityType] = Field(None, description="""The organization responsible for publishing the data.""", json_schema_extra = { "linkml_meta": {'alias': 'authority', 'domain_of': ['genome annotation']} }) + reference_assembly: Union[GenomeAssembly, str] = Field(..., description="""The reference genome assembly that this genome annotation was created from.""", json_schema_extra = { "linkml_meta": {'alias': 'reference_assembly', + 'any_of': [{'range': 'genome assembly'}, {'range': 'string'}], + 'domain_of': ['genome annotation']} }) + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://identifiers.org/brain-bican/vocab/GenomeAnnotation","bican:GenomeAnnotation"]] = Field(["bican:GenomeAnnotation"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} }) + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon', + 'aliases': ['instance of', + 'is organism source of gene product', + 'organism has gene', + 'gene found in organism', + 'gene product has organism source'], + 'annotations': {'canonical_predicate': {'tag': 'canonical_predicate', + 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['RO:0002162', 'WIKIDATA_PROPERTY:P703'], + 'in_subset': ['translator_minimal'], + 'inherited': True, + 'is_a': 'related to at instance level', + 'narrow_mappings': ['RO:0002160'], + 'slot_uri': 'biolink:in_taxon'} }) + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon_label', + 'annotations': {'denormalized': {'tag': 'denormalized', 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon_label', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P225'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'slot_uri': 'biolink:in_taxon_label'} }) + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""", json_schema_extra = { "linkml_meta": {'alias': 'has_biological_sequence', + 'definition_uri': 'https://w3id.org/biolink/vocab/has_biological_sequence', + 'domain': 'named thing', + 'domain_of': ['genomic entity', 'gene', 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:has_biological_sequence'} })
+ + + +
+[docs] +class GenomeAssembly(ThingWithTaxon, NamedThing): + """ + Genome assembly to contain version and label information + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://identifiers.org/brain-bican/genome-annotation-schema', + 'mixins': ['thing with taxon']}) + + version: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'version', + 'broad_mappings': ['pav:version', 'owl:versionInfo'], + 'definition_uri': 'https://w3id.org/biolink/vocab/version', + 'domain': 'dataset', + 'domain_of': ['genome annotation', 'genome assembly'], + 'is_a': 'node property', + 'slot_uri': 'biolink:version'} }) + strain: Optional[str] = Field(None, description="""The genetic variant or subtype of a species or organism.""", json_schema_extra = { "linkml_meta": {'alias': 'strain', 'domain_of': ['genome assembly']} }) + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon', + 'aliases': ['instance of', + 'is organism source of gene product', + 'organism has gene', + 'gene found in organism', + 'gene product has organism source'], + 'annotations': {'canonical_predicate': {'tag': 'canonical_predicate', + 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['RO:0002162', 'WIKIDATA_PROPERTY:P703'], + 'in_subset': ['translator_minimal'], + 'inherited': True, + 'is_a': 'related to at instance level', + 'narrow_mappings': ['RO:0002160'], + 'slot_uri': 'biolink:in_taxon'} }) + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""", json_schema_extra = { "linkml_meta": {'alias': 'in_taxon_label', + 'annotations': {'denormalized': {'tag': 'denormalized', 'value': True}}, + 'definition_uri': 'https://w3id.org/biolink/vocab/in_taxon_label', + 'domain': 'thing with taxon', + 'domain_of': ['thing with taxon', 'biological entity', 'gene', 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P225'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'slot_uri': 'biolink:in_taxon_label'} }) + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'definition_uri': 'https://w3id.org/biolink/vocab/id', + 'domain': 'entity', + 'domain_of': ['ontology class', + 'entity', + 'attribute', + 'named thing', + 'taxonomic rank', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['AGRKB:primaryId', 'gff3:ID', 'gpi:DB_Object_ID'], + 'in_subset': ['translator_minimal'], + 'slot_uri': 'biolink:id'} }) + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""", json_schema_extra = { "linkml_meta": {'alias': 'iri', + 'definition_uri': 'https://w3id.org/biolink/vocab/iri', + 'domain_of': ['attribute', + 'entity', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['WIKIDATA_PROPERTY:P854'], + 'in_subset': ['translator_minimal', 'samples'], + 'slot_uri': 'biolink:iri'} }) + category: List[Literal["https://identifiers.org/brain-bican/vocab/GenomeAssembly","bican:GenomeAssembly"]] = Field(["bican:GenomeAssembly"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""", json_schema_extra = { "linkml_meta": {'alias': 'category', + 'definition_uri': 'https://w3id.org/biolink/vocab/category', + 'designates_type': True, + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'type', + 'is_class_field': True, + 'slot_uri': 'biolink:category'} }) + type: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'type', + 'definition_uri': 'https://w3id.org/biolink/vocab/type', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:type', 'gpi:DB_Object_Type'], + 'mappings': ['rdf:type'], + 'slot_uri': 'rdf:type'} }) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'aliases': ['label', 'display name', 'title'], + 'definition_uri': 'https://w3id.org/biolink/vocab/name', + 'domain': 'entity', + 'domain_of': ['attribute', + 'entity', + 'macromolecular machine mixin', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene or gene product', + 'gene', + 'genome'], + 'exact_mappings': ['gff3:Name', 'gpi:DB_Object_Name'], + 'in_subset': ['translator_minimal', 'samples'], + 'mappings': ['rdfs:label'], + 'narrow_mappings': ['dct:title', 'WIKIDATA_PROPERTY:P1476'], + 'slot_uri': 'rdfs:label'} }) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'aliases': ['definition'], + 'definition_uri': 'https://w3id.org/biolink/vocab/description', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['IAO:0000115', 'skos:definitions'], + 'in_subset': ['translator_minimal'], + 'mappings': ['dct:description'], + 'narrow_mappings': ['gff3:Description'], + 'slot_uri': 'dct:description'} }) + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""", json_schema_extra = { "linkml_meta": {'alias': 'has_attribute', + 'close_mappings': ['OBI:0001927'], + 'definition_uri': 'https://w3id.org/biolink/vocab/has_attribute', + 'domain': 'entity', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['SIO:000008'], + 'in_subset': ['samples'], + 'narrow_mappings': ['OBAN:association_has_subject_property', + 'OBAN:association_has_object_property', + 'CPT:has_possibly_included_panel_element', + 'DRUGBANK:category', + 'EFO:is_executed_in', + 'HANCESTRO:0301', + 'LOINC:has_action_guidance', + 'LOINC:has_adjustment', + 'LOINC:has_aggregation_view', + 'LOINC:has_approach_guidance', + 'LOINC:has_divisor', + 'LOINC:has_exam', + 'LOINC:has_method', + 'LOINC:has_modality_subtype', + 'LOINC:has_object_guidance', + 'LOINC:has_scale', + 'LOINC:has_suffix', + 'LOINC:has_time_aspect', + 'LOINC:has_time_modifier', + 'LOINC:has_timing_of', + 'NCIT:R88', + 'NCIT:eo_disease_has_property_or_attribute', + 'NCIT:has_data_element', + 'NCIT:has_pharmaceutical_administration_method', + 'NCIT:has_pharmaceutical_basic_dose_form', + 'NCIT:has_pharmaceutical_intended_site', + 'NCIT:has_pharmaceutical_release_characteristics', + 'NCIT:has_pharmaceutical_state_of_matter', + 'NCIT:has_pharmaceutical_transformation', + 'NCIT:is_qualified_by', + 'NCIT:qualifier_applies_to', + 'NCIT:role_has_domain', + 'NCIT:role_has_range', + 'INO:0000154', + 'HANCESTRO:0308', + 'OMIM:has_inheritance_type', + 'orphanet:C016', + 'orphanet:C017', + 'RO:0000053', + 'RO:0000086', + 'RO:0000087', + 'SNOMED:has_access', + 'SNOMED:has_clinical_course', + 'SNOMED:has_count_of_base_of_active_ingredient', + 'SNOMED:has_dose_form_administration_method', + 'SNOMED:has_dose_form_release_characteristic', + 'SNOMED:has_dose_form_transformation', + 'SNOMED:has_finding_context', + 'SNOMED:has_finding_informer', + 'SNOMED:has_inherent_attribute', + 'SNOMED:has_intent', + 'SNOMED:has_interpretation', + 'SNOMED:has_laterality', + 'SNOMED:has_measurement_method', + 'SNOMED:has_method', + 'SNOMED:has_priority', + 'SNOMED:has_procedure_context', + 'SNOMED:has_process_duration', + 'SNOMED:has_property', + 'SNOMED:has_revision_status', + 'SNOMED:has_scale_type', + 'SNOMED:has_severity', + 'SNOMED:has_specimen', + 'SNOMED:has_state_of_matter', + 'SNOMED:has_subject_relationship_context', + 'SNOMED:has_surgical_approach', + 'SNOMED:has_technique', + 'SNOMED:has_temporal_context', + 'SNOMED:has_time_aspect', + 'SNOMED:has_units', + 'UMLS:has_structural_class', + 'UMLS:has_supported_concept_property', + 'UMLS:has_supported_concept_relationship', + 'UMLS:may_be_qualified_by'], + 'slot_uri': 'biolink:has_attribute'} }) + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""", json_schema_extra = { "linkml_meta": {'alias': 'deprecated', + 'definition_uri': 'https://w3id.org/biolink/vocab/deprecated', + 'domain_of': ['entity', + 'attribute', + 'named thing', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'exact_mappings': ['oboInOwl:ObsoleteClass'], + 'slot_uri': 'biolink:deprecated'} }) + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""", json_schema_extra = { "linkml_meta": {'alias': 'provided_by', + 'definition_uri': 'https://w3id.org/biolink/vocab/provided_by', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:provided_by'} }) + xref: Optional[List[str]] = Field(None, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""", json_schema_extra = { "linkml_meta": {'alias': 'xref', + 'aliases': ['dbxref', 'Dbxref', 'DbXref', 'record_url', 'source_record_urls'], + 'definition_uri': 'https://w3id.org/biolink/vocab/xref', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'gene', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'genome'], + 'in_subset': ['translator_minimal'], + 'narrow_mappings': ['gff3:Dbxref', 'gpi:DB_Xrefs'], + 'slot_uri': 'biolink:xref'} }) + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'full_name', + 'definition_uri': 'https://w3id.org/biolink/vocab/full_name', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'is_a': 'node property', + 'slot_uri': 'biolink:full_name'} }) + synonym: Optional[List[str]] = Field(None, description="""Alternate human-readable names for a thing""", json_schema_extra = { "linkml_meta": {'alias': 'synonym', + 'aliases': ['alias'], + 'definition_uri': 'https://w3id.org/biolink/vocab/synonym', + 'domain': 'named thing', + 'domain_of': ['named thing', + 'attribute', + 'organism taxon', + 'information content entity', + 'dataset', + 'physical entity', + 'activity', + 'procedure', + 'material sample', + 'biological entity', + 'gene', + 'genome'], + 'in_subset': ['translator_minimal'], + 'is_a': 'node property', + 'narrow_mappings': ['skos:altLabel', + 'gff3:Alias', + 'AGRKB:synonyms', + 'gpi:DB_Object_Synonyms', + 'HANCESTRO:0330', + 'IAO:0000136', + 'RXNORM:has_tradename'], + 'slot_uri': 'biolink:synonym'} })
+ + + +
+[docs] +class AnnotationCollection(ConfiguredBaseModel): + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://identifiers.org/brain-bican/genome-annotation-schema', + 'tree_root': True}) + + annotations: Optional[List[GeneAnnotation]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'annotations', 'domain_of': ['annotation collection']} }) + genome_annotations: Optional[List[GenomeAnnotation]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'genome_annotations', 'domain_of': ['annotation collection']} }) + genome_assemblies: Optional[List[GenomeAssembly]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'genome_assemblies', 'domain_of': ['annotation collection']} })
+ + + +# Model rebuild +# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model +OntologyClass.model_rebuild() +Annotation.model_rebuild() +QuantityValue.model_rebuild() +Entity.model_rebuild() +NamedThing.model_rebuild() +Attribute.model_rebuild() +TaxonomicRank.model_rebuild() +OrganismTaxon.model_rebuild() +InformationContentEntity.model_rebuild() +Dataset.model_rebuild() +PhysicalEssenceOrOccurrent.model_rebuild() +PhysicalEssence.model_rebuild() +PhysicalEntity.model_rebuild() +Occurrent.model_rebuild() +ActivityAndBehavior.model_rebuild() +Activity.model_rebuild() +Procedure.model_rebuild() +SubjectOfInvestigation.model_rebuild() +MaterialSample.model_rebuild() +ThingWithTaxon.model_rebuild() +BiologicalEntity.model_rebuild() +GenomicEntity.model_rebuild() +ChemicalEntityOrGeneOrGeneProduct.model_rebuild() +MacromolecularMachineMixin.model_rebuild() +GeneOrGeneProduct.model_rebuild() +Gene.model_rebuild() +Genome.model_rebuild() +Checksum.model_rebuild() +GeneAnnotation.model_rebuild() +GenomeAnnotation.model_rebuild() +GenomeAssembly.model_rebuild() +AnnotationCollection.model_rebuild() + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/utils/get_ncbi_taxonomy.html b/_modules/bkbit/utils/get_ncbi_taxonomy.html new file mode 100644 index 0000000..a5e7a97 --- /dev/null +++ b/_modules/bkbit/utils/get_ncbi_taxonomy.html @@ -0,0 +1,318 @@ + + + + + + + + bkbit.utils.get_ncbi_taxonomy — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.utils.get_ncbi_taxonomy

+"""
+This script downloads a zip file containing taxonomic data from a given URL, extracts and processes 
+the content of the 'names.dmp' file in memory, and saves the parsed data into JSON files. The script
+includes three main functions:
+
+1. download_and_extract_zip_in_memory(url):
+    Downloads a zip file from the given URL and extracts the content of the 'names.dmp' file in memory.
+
+2. parse_dmp_content(dmp_content):
+    Parses the content of a DMP file and extracts taxonomic information into dictionaries.
+
+3. process_and_save_taxdmp_in_memory(url, output_dir):
+    Downloads and processes the taxdump file from the given URL, and saves the parsed data into 
+    separate JSON files in the specified output directory.
+
+Usage:
+    The script can be executed as a standalone program. Modify the URL and output directory as needed.
+"""
+
+import json
+import zipfile
+import io
+import os
+import requests
+import pkg_resources
+import click
+
+NCBI_TAXON_URL = "https://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip"
+OUTPUT_DIR_NAME = "ncbi_taxonomy"
+OUTPUT_DIR_PATH = pkg_resources.resource_filename(__name__, OUTPUT_DIR_NAME)
+SCIENTIFIC_NAME_TO_TAXONID_PATH = pkg_resources.resource_filename(__name__, "ncbi_taxonomy/scientific_name_to_taxid.json")
+TAXON_SCIENTIFIC_NAME_PATH = pkg_resources.resource_filename(__name__, "ncbi_taxonomy/taxid_to_scientific_name.json")
+TAXON_COMMON_NAME_PATH = pkg_resources.resource_filename(__name__, "ncbi_taxonomy/taxid_to_common_name.json")
+
+
+
+
+[docs] +def download_and_extract_zip_in_memory(url): + """ + Downloads a zip file from the given URL and extracts the content of the 'names.dmp' file in memory. + + Args: + url (str): The URL of the zip file to download. + + Returns: + str: The content of the 'names.dmp' file as a string. + + Raises: + requests.exceptions.HTTPError: If the file download fails with a non-200 status code. + """ + # Download the file + response = requests.get(url, timeout=30) + if response.status_code == 200: + # Unzip the file in memory + with zipfile.ZipFile(io.BytesIO(response.content)) as z: + # Extract names.dmp file content into memory + with z.open("names.dmp") as names_dmp_file: + names_dmp_content = names_dmp_file.read().decode("utf-8") + return names_dmp_content + else: + raise requests.exceptions.HTTPError( + f"Failed to download file, status code: {response.status_code}" + )
+ + + +
+[docs] +def parse_dmp_content(dmp_content): + """ + Parses the content of a DMP file and extracts taxonomic information. + + Args: + dmp_content (str): The content of the DMP file. + + Returns: + tuple: A tuple containing three dictionaries: + - taxid_to_scientific_name: A dictionary mapping taxonomic IDs to scientific names. + - taxid_to_common_name: A dictionary mapping taxonomic IDs to common names. + - scientific_name_to_taxid: A dictionary mapping scientific names to taxonomic IDs. + """ + taxid_to_scientific_name = {} + taxid_to_common_name = {} + scientific_name_to_taxid = {} + + for line in dmp_content.strip().split("\n"): + # Split the line by the delimiter '|' + parts = line.strip().split("|") + + # Remove leading and trailing whitespace from each part + parts = [part.strip() for part in parts] + # Taxonomy names file (names.dmp): + # tax_id-- the id of node associated with this name + # name_txt-- name itself + # unique name-- the unique variant of this name if name not unique + # name class-- (synonym, common name, ...) + taxid = parts[0] + name = parts[1] + unique_name = parts[2] + name_class = parts[3] + + # Create a dictionary with the parsed data + if name_class == "scientific name" and taxid not in taxid_to_scientific_name: + if unique_name: + taxid_to_scientific_name[taxid] = unique_name + scientific_name_to_taxid[unique_name] = taxid + else: + taxid_to_scientific_name[taxid] = name + scientific_name_to_taxid[name] = taxid + elif name_class == "genbank common name" and taxid not in taxid_to_common_name: + taxid_to_common_name[taxid] = name + return taxid_to_scientific_name, taxid_to_common_name, scientific_name_to_taxid
+ + + +
+[docs] +def process_and_save_taxdmp_in_memory(url, output_dir): + """ + Downloads and processes the taxdump file from the given URL, + and saves the parsed data into separate JSON files in the specified output directory. + + Args: + url (str): The URL of the taxdump file to download and process. + output_dir (str): The directory where the parsed data will be saved. + + Returns: + None + """ + # Ensure the output directory exists + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Step 1: Download and unzip the folder in memory + names_dmp_content = download_and_extract_zip_in_memory(url) + + # Step 2: Parse the names.dmp content + taxid_to_scientific_name, taxid_to_common_name, scientific_name_to_taxid = ( + parse_dmp_content(names_dmp_content) + ) + + # Step 3: Save the dictionaries to files + with open( + os.path.join(output_dir, "taxid_to_common_name.json"), "w", encoding="utf-8" + ) as f: + json.dump(taxid_to_common_name, f, indent=4) + + with open( + os.path.join(output_dir, "taxid_to_scientific_name.json"), "w", encoding="utf-8" + ) as f: + json.dump(taxid_to_scientific_name, f, indent=4) + + with open( + os.path.join(output_dir, "scientific_name_to_taxid.json"), "w", encoding="utf-8" + ) as f: + json.dump(scientific_name_to_taxid, f, indent=4)
+ + + + +
+[docs] +def load_json(file_path): + """ + Load JSON data from a file. + + Args: + file_path (str): The path to the JSON file. + + Returns: + dict: The loaded JSON data. + + """ + with open(file_path, "r", encoding="utf-8") as f: + return json.load(f)
+ + +@click.command() +@click.option("--reload", '-r', is_flag=True, help="Reload NCBI taxonomy data") + +def download_ncbi_taxonomy(reload=False): + + """ + Load JSON data from a file. + + Args: + file_path (str): The path to the JSON file. + + Returns: + dict: The loaded JSON data as a dictionary. + """ + if reload or not os.path.exists(SCIENTIFIC_NAME_TO_TAXONID_PATH) or not os.path.exists(TAXON_SCIENTIFIC_NAME_PATH) or not os.path.exists(TAXON_COMMON_NAME_PATH): + process_and_save_taxdmp_in_memory(NCBI_TAXON_URL, OUTPUT_DIR_PATH) + else: + print("PRINT already downloaded") + +if __name__ == "__main__": + download_ncbi_taxonomy() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/utils/load_json.html b/_modules/bkbit/utils/load_json.html new file mode 100644 index 0000000..947349f --- /dev/null +++ b/_modules/bkbit/utils/load_json.html @@ -0,0 +1,141 @@ + + + + + + + + bkbit.utils.load_json — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for bkbit.utils.load_json

+import json
+
+
+[docs] +def load_json(file_path): + """ + Load a JSON file from the given file path. + + Args: + file_path (str): The path to the JSON file. + + Returns: + dict: The contents of the JSON file as a dictionary. + + Raises: + FileNotFoundError: If the file does not exist. + JSONDecodeError: If the file is not a valid JSON. + + """ + with open(file_path, "r", encoding="utf-8") as f: + return json.load(f)
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/utils/nimp_api_endpoints.html b/_modules/bkbit/utils/nimp_api_endpoints.html new file mode 100644 index 0000000..7bc952b --- /dev/null +++ b/_modules/bkbit/utils/nimp_api_endpoints.html @@ -0,0 +1,254 @@ + + + + + + + + bkbit.utils.nimp_api_endpoints — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for bkbit.utils.nimp_api_endpoints

+import inspect
+import requests
+
+API_URL_PREFIX = "https://brain-specimenportal.org/api/v1/nhash_ids/"
+INFO_URL_SUFFIX = "info?id="
+ANCESTORS_URL_SUFFIX = "ancestors?id="
+DESCENDANTS_URL_SUFFIX = "descendants?id="
+PARENTS_URL_SUFFIX = "parents?id="
+NHASH_ONLY_SUFFIX = "&nhash_only="
+DONORS_URL_SUFFIX = "donors"
+
+
+
+[docs] +def get_data(nhash_id, jwt_token): + """ + Retrieve information of any record with a NHash ID in the system. + + Parameters: + nhash_id (str): The NHash ID of the record to retrieve. + jwt_token (str): The JWT token for authentication. + + Returns: + dict: The JSON response containing the information of the record. + + Raises: + requests.exceptions.HTTPError: If there is an error retrieving the data. + + """ + headers = {"Authorization": f"Bearer {jwt_token}"} + response = requests.get( + f"{API_URL_PREFIX}{INFO_URL_SUFFIX}{nhash_id}", + headers=headers, + timeout=10, # ? is this an appropriate timeout value? + ) + if response.status_code == 200: + return response.json() + + raise requests.exceptions.HTTPError( + f"Error getting data for NHash ID = {nhash_id}. Status Code: {response.status_code}" + )
+ + +
+[docs] +def get_ancestors(nhash_id, jwt_token, nhash_only=True, depth=None): + """ + Retrieve information of all ancestors of a record with the given NHash ID. + + Parameters: + nhash_id (str): The NHash ID of the record. + jwt_token (str): The JWT token for authentication. + nhash_only (bool): Flag indicating whether to retrieve only NHash IDs or complete record information. Default is True. + depth (int): The depth of ancestors to retrieve. Default is 1. + + Returns: + dict: The JSON response containing information of all ancestors. + + Raises: + requests.exceptions.HTTPError: If there is an error getting data for the NHash ID. + + """ + headers = {"Authorization": f"Bearer {jwt_token}"} + + response = requests.get( + f"{API_URL_PREFIX}{ANCESTORS_URL_SUFFIX}{nhash_id}{NHASH_ONLY_SUFFIX}{nhash_only}", + headers=headers, + timeout=10, # This is an appropriate timeout value. + ) + if response.status_code == 200: + return response.json() + + raise requests.exceptions.HTTPError( + f"Error getting data for NHash ID = {nhash_id}. Status Code: {response.status_code}" + )
+ + +
+[docs] +def get_descendants(nhash_id, jwt_token, nhash_only=True, depth=None): + """ + Retrieve information of all descendents of a record with the given NHash ID. + + Parameters: + nhash_id (str): The NHash ID of the record. + jwt_token (str): The JWT token for authentication. + nhash_only (bool): Flag indicating whether to retrieve only NHash IDs or complete record information. Default is True. + depth (int): The depth of descendents to retrieve. Default is 1. + + Returns: + dict: The JSON response containing information of all descendents. + + Raises: + requests.exceptions.HTTPError: If there is an error getting data for the NHash ID. + + """ + headers = {"Authorization": f"Bearer {jwt_token}"} + + response = requests.get( + f"{API_URL_PREFIX}{DESCENDANTS_URL_SUFFIX}{nhash_id}{NHASH_ONLY_SUFFIX}{nhash_only}", + headers=headers, + timeout=30, # This is an appropriate timeout value. + ) + if response.status_code == 200: + return response.json() + + raise requests.exceptions.HTTPError( + f"Error getting data for NHash ID = {nhash_id}. Status Code: {response.status_code}" + )
+ + +
+[docs] +def get_donor(jwt_token, donor_local_id=None, donor_nhash_id=None, age_of_death=None, ethnicity=None, race=None, sex=None, species=None): + headers = {"Authorization": f"Bearer {jwt_token}"} + # Create a dictionary of parameters to be sent in the request + params = {} + + # Iterate through the function's parameters and add them to the params dictionary if they are not None + for param_name in inspect.signature(get_donor).parameters.keys(): + if param_name in ['jwt_token']: # Skip the non-optional parameters + continue + value = locals().get(param_name) + if value is not None: + params[param_name] = value + + # Make the request with the dynamically created params + response = requests.get(API_URL_PREFIX + DONORS_URL_SUFFIX, headers=headers, params=params, timeout=10) + if response.status_code == 200: + return response.json() + + raise requests.exceptions.HTTPError( + f"Error getting donor data. Status Code: {response.status_code}" + )
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/bkbit/utils/setup_logger.html b/_modules/bkbit/utils/setup_logger.html new file mode 100644 index 0000000..c7e2afe --- /dev/null +++ b/_modules/bkbit/utils/setup_logger.html @@ -0,0 +1,202 @@ + + + + + + + + bkbit.utils.setup_logger — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for bkbit.utils.setup_logger

+"""
+Logger Setup Module
+
+This module provides a utility function to configure and set up logging for an application.
+The `setup_logger` function allows for customizable logging levels and output destinations,
+either to a file or to the console.
+
+Available log levels:
+- DEBUG
+- INFO
+- WARNING
+- ERROR
+- CRITICAL
+
+Example usage:
+    from setup_logger import setup_logger
+    import logging
+
+    # Set up the logger to log to a file with INFO level
+    logger = setup_logger(log_level="INFO", log_to_file=True)
+
+    # Log some messages
+    logger.info("This is an info message")
+    logger.error("This is an error message")
+
+Functions:
+    setup_logger(log_level="WARNING", log_to_file=False):
+        Configures and returns a logger with the specified log level and output destination.
+
+Attributes:
+    LOG_LEVELS (dict): A dictionary mapping log level names to their corresponding logging constants.
+"""
+
+import logging
+
+LOG_LEVELS = {
+    "DEBUG": logging.DEBUG,
+    "INFO": logging.INFO,
+    "WARNING": logging.WARNING,
+    "ERROR": logging.ERROR,
+    "CRITICAL": logging.CRITICAL,
+}
+
+
+
+[docs] +def setup_logger( + file_name, + log_level="WARNING", + log_to_file=False, +): + """ + Set up a logger with the specified log level and log destination. + + Args: + log_level (str, optional): The desired log level. Defaults to "WARNING". + log_to_file (bool, optional): Whether to log to a file. Defaults to False. + + Returns: + logger: The configured logger object. + + Raises: + ValueError: If an invalid log level is provided. + """ + if log_level.upper() not in LOG_LEVELS: + raise ValueError(f"Invalid log level: {log_level}") + if log_to_file: + logging.basicConfig( + filename=file_name, + format="%(levelname)s: %(message)s (%(asctime)s)", + datefmt="%m/%d/%Y %I:%M:%S %p", + level=LOG_LEVELS[log_level.upper()], + ) + else: + logging.basicConfig( + format="%(levelname)s: %(message)s (%(asctime)s)", + datefmt="%m/%d/%Y %I:%M:%S %p", + level=LOG_LEVELS[log_level.upper()], + ) + + logger = logging.getLogger(__name__) + return logger
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/index.html b/_modules/index.html new file mode 100644 index 0000000..96e3e06 --- /dev/null +++ b/_modules/index.html @@ -0,0 +1,130 @@ + + + + + + + + Overview: module code — bkbit documentation + + + + + + + + + + + + + + + +
+ + +
+ + +
+
+ + + + \ No newline at end of file diff --git a/_sources/bkbit.cli.rst.txt b/_sources/bkbit.cli.rst.txt new file mode 100644 index 0000000..3413b46 --- /dev/null +++ b/_sources/bkbit.cli.rst.txt @@ -0,0 +1,7 @@ +bkbit.cli module +================ + +.. automodule:: bkbit.cli + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.data_translators.anatomical_structure_translator.rst.txt b/_sources/bkbit.data_translators.anatomical_structure_translator.rst.txt new file mode 100644 index 0000000..6ed25d6 --- /dev/null +++ b/_sources/bkbit.data_translators.anatomical_structure_translator.rst.txt @@ -0,0 +1,7 @@ +bkbit.data\_translators.anatomical\_structure\_translator module +================================================================ + +.. automodule:: bkbit.data_translators.anatomical_structure_translator + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.data_translators.file_manifest_translator.rst.txt b/_sources/bkbit.data_translators.file_manifest_translator.rst.txt new file mode 100644 index 0000000..8b649cc --- /dev/null +++ b/_sources/bkbit.data_translators.file_manifest_translator.rst.txt @@ -0,0 +1,7 @@ +bkbit.data\_translators.file\_manifest\_translator module +========================================================= + +.. automodule:: bkbit.data_translators.file_manifest_translator + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.data_translators.genome_annotation_translator.rst.txt b/_sources/bkbit.data_translators.genome_annotation_translator.rst.txt new file mode 100644 index 0000000..2a4bcca --- /dev/null +++ b/_sources/bkbit.data_translators.genome_annotation_translator.rst.txt @@ -0,0 +1,7 @@ +bkbit.data\_translators.genome\_annotation\_translator module +============================================================= + +.. automodule:: bkbit.data_translators.genome_annotation_translator + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.data_translators.library_generation_translator.rst.txt b/_sources/bkbit.data_translators.library_generation_translator.rst.txt new file mode 100644 index 0000000..d950d00 --- /dev/null +++ b/_sources/bkbit.data_translators.library_generation_translator.rst.txt @@ -0,0 +1,7 @@ +bkbit.data\_translators.library\_generation\_translator module +============================================================== + +.. automodule:: bkbit.data_translators.library_generation_translator + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.data_translators.rst.txt b/_sources/bkbit.data_translators.rst.txt new file mode 100644 index 0000000..e26fb62 --- /dev/null +++ b/_sources/bkbit.data_translators.rst.txt @@ -0,0 +1,21 @@ +bkbit.data\_translators package +=============================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + bkbit.data_translators.anatomical_structure_translator + bkbit.data_translators.file_manifest_translator + bkbit.data_translators.genome_annotation_translator + bkbit.data_translators.library_generation_translator + +Module contents +--------------- + +.. automodule:: bkbit.data_translators + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.data_translators.specimen_metadata_translator.rst.txt b/_sources/bkbit.data_translators.specimen_metadata_translator.rst.txt new file mode 100644 index 0000000..6da37ad --- /dev/null +++ b/_sources/bkbit.data_translators.specimen_metadata_translator.rst.txt @@ -0,0 +1,7 @@ +bkbit.data\_translators.specimen\_metadata\_translator module +============================================================= + +.. automodule:: bkbit.data_translators.specimen_metadata_translator + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.model_converters.rst.txt b/_sources/bkbit.model_converters.rst.txt new file mode 100644 index 0000000..f005a28 --- /dev/null +++ b/_sources/bkbit.model_converters.rst.txt @@ -0,0 +1,19 @@ +bkbit.model\_converters package +=============================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + bkbit.model_converters.sheets_converter + bkbit.model_converters.yaml2sheet_converter + +Module contents +--------------- + +.. automodule:: bkbit.model_converters + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.model_converters.sheets_converter.rst.txt b/_sources/bkbit.model_converters.sheets_converter.rst.txt new file mode 100644 index 0000000..211fd1f --- /dev/null +++ b/_sources/bkbit.model_converters.sheets_converter.rst.txt @@ -0,0 +1,7 @@ +bkbit.model\_converters.sheets\_converter module +================================================ + +.. automodule:: bkbit.model_converters.sheets_converter + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.model_converters.yaml2sheet_converter.rst.txt b/_sources/bkbit.model_converters.yaml2sheet_converter.rst.txt new file mode 100644 index 0000000..870e3e8 --- /dev/null +++ b/_sources/bkbit.model_converters.yaml2sheet_converter.rst.txt @@ -0,0 +1,7 @@ +bkbit.model\_converters.yaml2sheet\_converter module +==================================================== + +.. automodule:: bkbit.model_converters.yaml2sheet_converter + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.model_editors.add_dunderMethods_genomeAnnotation.rst.txt b/_sources/bkbit.model_editors.add_dunderMethods_genomeAnnotation.rst.txt new file mode 100644 index 0000000..6c22288 --- /dev/null +++ b/_sources/bkbit.model_editors.add_dunderMethods_genomeAnnotation.rst.txt @@ -0,0 +1,7 @@ +bkbit.model\_editors.add\_dunderMethods\_genomeAnnotation module +================================================================ + +.. automodule:: bkbit.model_editors.add_dunderMethods_genomeAnnotation + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.model_editors.linkml_trimmer.rst.txt b/_sources/bkbit.model_editors.linkml_trimmer.rst.txt new file mode 100644 index 0000000..6820857 --- /dev/null +++ b/_sources/bkbit.model_editors.linkml_trimmer.rst.txt @@ -0,0 +1,7 @@ +bkbit.model\_editors.linkml\_trimmer module +=========================================== + +.. automodule:: bkbit.model_editors.linkml_trimmer + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.model_editors.rst.txt b/_sources/bkbit.model_editors.rst.txt new file mode 100644 index 0000000..51fa6ad --- /dev/null +++ b/_sources/bkbit.model_editors.rst.txt @@ -0,0 +1,19 @@ +bkbit.model\_editors package +============================ + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + bkbit.model_editors.add_dunderMethods_genomeAnnotation + bkbit.model_editors.linkml_trimmer + +Module contents +--------------- + +.. automodule:: bkbit.model_editors + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.models.anatomical_structure.rst.txt b/_sources/bkbit.models.anatomical_structure.rst.txt new file mode 100644 index 0000000..eed3cca --- /dev/null +++ b/_sources/bkbit.models.anatomical_structure.rst.txt @@ -0,0 +1,7 @@ +bkbit.models.anatomical\_structure module +========================================= + +.. automodule:: bkbit.models.anatomical_structure + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.models.genome_annotation.rst.txt b/_sources/bkbit.models.genome_annotation.rst.txt new file mode 100644 index 0000000..2f0c865 --- /dev/null +++ b/_sources/bkbit.models.genome_annotation.rst.txt @@ -0,0 +1,7 @@ +bkbit.models.genome\_annotation module +====================================== + +.. automodule:: bkbit.models.genome_annotation + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.models.library_generation.rst.txt b/_sources/bkbit.models.library_generation.rst.txt new file mode 100644 index 0000000..6caacd4 --- /dev/null +++ b/_sources/bkbit.models.library_generation.rst.txt @@ -0,0 +1,7 @@ +bkbit.models.library\_generation module +======================================= + +.. automodule:: bkbit.models.library_generation + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.models.rst.txt b/_sources/bkbit.models.rst.txt new file mode 100644 index 0000000..f5faced --- /dev/null +++ b/_sources/bkbit.models.rst.txt @@ -0,0 +1,20 @@ +bkbit.models package +==================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + bkbit.models.anatomical_structure + bkbit.models.genome_annotation + bkbit.models.library_generation + +Module contents +--------------- + +.. automodule:: bkbit.models + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.rst.txt b/_sources/bkbit.rst.txt new file mode 100644 index 0000000..498dac0 --- /dev/null +++ b/_sources/bkbit.rst.txt @@ -0,0 +1,30 @@ +bkbit package +============= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + bkbit.data_translators + bkbit.model_converters + bkbit.model_editors + bkbit.models + bkbit.utils + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + bkbit.cli + +Module contents +--------------- + +.. automodule:: bkbit + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.utils.get_ncbi_taxonomy.rst.txt b/_sources/bkbit.utils.get_ncbi_taxonomy.rst.txt new file mode 100644 index 0000000..7447d04 --- /dev/null +++ b/_sources/bkbit.utils.get_ncbi_taxonomy.rst.txt @@ -0,0 +1,7 @@ +bkbit.utils.get\_ncbi\_taxonomy module +====================================== + +.. automodule:: bkbit.utils.get_ncbi_taxonomy + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.utils.load_json.rst.txt b/_sources/bkbit.utils.load_json.rst.txt new file mode 100644 index 0000000..c8d5535 --- /dev/null +++ b/_sources/bkbit.utils.load_json.rst.txt @@ -0,0 +1,7 @@ +bkbit.utils.load\_json module +============================= + +.. automodule:: bkbit.utils.load_json + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.utils.nimp_api_endpoints.rst.txt b/_sources/bkbit.utils.nimp_api_endpoints.rst.txt new file mode 100644 index 0000000..f7e9fb1 --- /dev/null +++ b/_sources/bkbit.utils.nimp_api_endpoints.rst.txt @@ -0,0 +1,7 @@ +bkbit.utils.nimp\_api\_endpoints module +======================================= + +.. automodule:: bkbit.utils.nimp_api_endpoints + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.utils.rst.txt b/_sources/bkbit.utils.rst.txt new file mode 100644 index 0000000..a609e9a --- /dev/null +++ b/_sources/bkbit.utils.rst.txt @@ -0,0 +1,21 @@ +bkbit.utils package +=================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + bkbit.utils.get_ncbi_taxonomy + bkbit.utils.load_json + bkbit.utils.nimp_api_endpoints + bkbit.utils.setup_logger + +Module contents +--------------- + +.. automodule:: bkbit.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/bkbit.utils.setup_logger.rst.txt b/_sources/bkbit.utils.setup_logger.rst.txt new file mode 100644 index 0000000..aaf1972 --- /dev/null +++ b/_sources/bkbit.utils.setup_logger.rst.txt @@ -0,0 +1,7 @@ +bkbit.utils.setup\_logger module +================================ + +.. automodule:: bkbit.utils.setup_logger + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/contributing.rst.txt b/_sources/contributing.rst.txt new file mode 100644 index 0000000..16f5c09 --- /dev/null +++ b/_sources/contributing.rst.txt @@ -0,0 +1,10 @@ +.. _contributing: + +Contributing +=========== + + + + + + diff --git a/_sources/genome_annotation.rst.txt b/_sources/genome_annotation.rst.txt new file mode 100644 index 0000000..7f2ab5d --- /dev/null +++ b/_sources/genome_annotation.rst.txt @@ -0,0 +1,79 @@ +.. _genome_annotation: + +Annotated Genome Data +---------------------- + +Overview +......... + +Generate JSON-LD files for annotated genes from a given GFF3 file. Currently GFF3 files from ENSEMBL and NCBI are supported. + +Each JSON-LD file will contain: + +- GeneAnnotation objects +- 1 GenomeAnnotation object +- 1 GenomeAssembly object +- 1 OrganismTaxon object +- 1 Checksum object + +Command Line +............. + +``bkbit gff2jsonld`` +,,,,,,,,,,,,,,,,,,,,, + + .. code-block:: bash + + $ bkbit gff2jsonld [OPTIONS] GFF3_URL + +Options +,,,,,,,, + + ``-a, --assembly_accession `` + ID assigned to the genomic assembly used in the GFF3 file. + + .. note:: + Must be provided when using ENSEMBL GFF3 files + + ``-s, --assembly_strain `` + Specific strain of the organism associated with the GFF3 file. + + ``-l, --log_level `` + Logging level. + + Default: + WARNING + Options: + DEBUG | INFO | WARNING | ERROR | CRITICIAL + + ``-f, --log_to_file`` + Log to a file instead of the console. + + Default: + False + +Arguments +,,,,,,,,,,, + + ``GFF3_URL`` + Required argument + +Examples +......... + +Example 1: NCBI GFF3 file +,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + # Run gff2jsonld command + $ bkbit gff2jsonld 'https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9823/106/GCF_000003025.6_Sscrofa11.1/GCF_000003025.6_Sscrofa11.1_genomic.gff.gz' > output.jsonld + + +Example 2: ENSEMBL GFF3 file +,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + # Run gff2jsonld command + $ bkbit gff2jsonld -a 'GCF_003339765.1' 'https://ftp.ensembl.org/pub/release-104/gff3/macaca_mulatta/Macaca_mulatta.Mmul_10.104.gff3.gz' > output.jsonld diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt new file mode 100644 index 0000000..338c482 --- /dev/null +++ b/_sources/index.rst.txt @@ -0,0 +1,47 @@ +.. bkbit documentation master file, created by + sphinx-quickstart on Thu Oct 3 12:01:17 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Brain Knowledge Base Interaction Toolkit Documentation +=============================================================== +This package contains tools to use the BICAN Knowledgebase Data Models. + +.. toctree:: + :maxdepth: 1 + :caption: GETTING STARTED + + install + +.. toctree:: + :maxdepth: 1 + :caption: DATA TRANSLATORS + + specimen_file_manifest + specimen_metadata + genome_annotation + +.. toctree:: + :maxdepth: 1 + :caption: MODEL CONVERTERS + + spreadsheet_converter + +.. toctree:: + :maxdepth: 1 + :caption: MODEL EDITORS + + linkml_trimmer + +.. toctree:: + :maxdepth: 1 + :caption: REFERENCE + + modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +.. * :ref:`search` diff --git a/_sources/install.rst.txt b/_sources/install.rst.txt new file mode 100644 index 0000000..238f224 --- /dev/null +++ b/_sources/install.rst.txt @@ -0,0 +1,19 @@ +.. _installation: + +Installation +============ + +You can install the latest version of ``bkbit`` directly from PyPI using pip: + +.. code-block:: bash + + $ pip install bkbit + +Alternatively, you can install the latest version of ``bkbit`` from the source code on GitHub: + +.. code-block:: bash + + $ git clone https://github.com/brain-bican/bkbit.git + $ cd bkbit + $ pip install . + diff --git a/_sources/linkml_trimmer.rst.txt b/_sources/linkml_trimmer.rst.txt new file mode 100644 index 0000000..48a3f8e --- /dev/null +++ b/_sources/linkml_trimmer.rst.txt @@ -0,0 +1,55 @@ +.. _linkml_trimmer: + +LinkML Schema Trimmer +---------------------- + +Overview +......... +Generate a trimmed version of a LinkML schema by only including a specific subset of classes, slots, and enums. + + +Command Line +............. + +``bkbit linkml-trimmer`` +,,,,,,,,,,,,,,,,,,,,,,,, + + .. code-block:: bash + + $ bkbit linkml-trimmer [OPTIONS] SCHEMA + +Options +,,,,,,, + + ``-c, --classes `` + **Required option** + + List of 'classes' to include in the trimmed schema. + + **Note**: Classes must be separated by commas and enclosed in quotes. + ``-s, --slots `` + List of 'slots' to include in the trimmed schema. + + **Note**: Slots must be separated by commas and enclosed in quotes. + + ``-e, --enums `` + List of 'enums' to include in the trimmed schema. + + **Note**: Enums must be separated by commas and enclosed in quotes. + +Arguments +,,,,,,,,, + + ``SCHEMA`` + Required argument + + +Examples +......... + +Example 1: Trim `Biolink Schema `_ +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + $ bkbit linkml-trimmer --classes "gene, genome, organism taxon, thing with taxon, material sample, procedure, entity, activity, named thing" biolink.yaml > bican-biolink.yaml \ No newline at end of file diff --git a/_sources/modules.rst.txt b/_sources/modules.rst.txt new file mode 100644 index 0000000..bbaf377 --- /dev/null +++ b/_sources/modules.rst.txt @@ -0,0 +1,7 @@ +bkbit +===== + +.. toctree:: + :maxdepth: 4 + + bkbit diff --git a/_sources/quickstart.rst.txt b/_sources/quickstart.rst.txt new file mode 100644 index 0000000..b6986c0 --- /dev/null +++ b/_sources/quickstart.rst.txt @@ -0,0 +1,4 @@ +.. _quickstart: + +Quickstart +=========== \ No newline at end of file diff --git a/_sources/specimen_file_manifest.rst.txt b/_sources/specimen_file_manifest.rst.txt new file mode 100644 index 0000000..a4888f2 --- /dev/null +++ b/_sources/specimen_file_manifest.rst.txt @@ -0,0 +1,69 @@ +.. _specimen_file_manifest: + +Specimen File Manifest +---------------------- + +Overview +......... + +Generates a JSON-LD file containing specimen file data using the BICAN Library Generation Schema. + +The input file manifest must be in CSV format and contain the following columns: + + - Project ID + - Specimen ID + - File Name + - Checksum + - File Type + - Archive + - Archive URI + +Command Line +............. + +``bkbit filemanifest2jsonld`` +,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + $ bkbit filemanifest2jsonld [OPTIONS] FILE_MANIFEST_CSV + +**Options** + + ``--list_library_aliquots`` + A boolean flag that, when provided, generates a list of unique library aliquots contained in the given file manifest and saves output in file called 'file_manifest_library_aliquots.txt'. + If this flag is not set (DEFAULT), then only the JSON-LD output will be generated. + +**Arguments** + + ``FILE_MANIFEST_CSV`` + Required argument + +Examples +......... + +Example 1: Only generate JSON-LD output +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + # Run filemanifest2jsonld command + $ bkbit filemanifest2jsonld file_manifest.csv > output.jsonld + +Example 2: Generate JSON-LD output and list of library aliquots +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + # Run filemanifest2jsonld command + $ bkbit filemanifest2jsonld --list_library_aliquots file_manifest.csv > output.jsonld + + # Generated output files + $ ls . + output.jsonld + file_manifest_library_aliquots.txt + + # Contents of file_manifest_library_aliquots.txt + $ cat file_manifest_library_aliquots.txt + LP-123 + LP-345 \ No newline at end of file diff --git a/_sources/specimen_metadata.rst.txt b/_sources/specimen_metadata.rst.txt new file mode 100644 index 0000000..210dfb2 --- /dev/null +++ b/_sources/specimen_metadata.rst.txt @@ -0,0 +1,118 @@ +.. _specimen_metadata: + +Specimen Metadata +---------------------- + +Overview +......... + +Generate JSON-LD files for specimens, subjects, and their repective ancestors or descendants. Data is retrieved from the `BICAN Specimen Portal `_. + +Command Line +............. + +``bkbit specimen2jsonld`` +,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + $ bkbit specimen2jsonld [OPTIONS] NHASH_ID_OR_FILE + +**Options** + + ``-d, --decendants`` + A boolean flag that, when provided, generates BICAN objects for the given NHASH_ID and all of its descendants. + If this flag is not set (DEFAULT), then the ancestors will be processed. + +**Arguments** + + ``NHASH_ID_OR_FILE`` + The NHASH_ID of the specimen or a file containing a list of NHASH_IDs. + If a file is provided, the file should contain one NHASH_ID per line. + +Environment Variables +............. + +jwt_token +,,,,,,,,, + +Token is used to authenticate with the Specimen Portal API and retrieve the specimen metadata. + +.. note:: + You **must** set the Specimen Portal Personal API Token as an environment variable **before** running ``bkbit specimen2jsonld``. + +.. code-block:: bash + + $ export jwt_token=specimen_portal_personal_api_token + +Examples +......... + +Example 1: Parse a single record and its ancestors +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + # If first time running specimen2jsonld or if token is expired, set jwt_token environment variable + $ export jwt_token=specimen_portal_personal_api_token + + # Run specimen2jsonld command + $ bkbit specimen2jsonld 'LP-CVFLMQ819998' > output.jsonld + +Example 2: Parse a single record and its descendants +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + # If first time running specimen2jsonld or if token is expired, set jwt_token environment variable + $ export jwt_token=specimen_portal_personal_api_token + + # Run specimen2jsonld command. Important: include '--descendants' flag + $ bkbit specimen2jsonld -d 'DO-GICE7463' > output.jsonld + +Example 3: Parse a file containing record(s) and their respective ancestors +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + # If first time running specimen2jsonld or if token is expired, set jwt_token environment variable + $ export jwt_token=specimen_portal_personal_api_token + + # Contents of input file + $ cat input_nhash_ids.txt + LA-TZWCWB265559FVVNTS329147 + LA-IAXCCV360563HBFKKM103455 + LA-JFCEST535498UIPMOH349083 + + # Run specimen2jsonld command + $ bkbit specimen2jsonld input_nhash_ids.txt + + # Expected output + $ ls . + LA-TZWCWB265559FVVNTS329147.jsonld + LA-IAXCCV360563HBFKKM103455.jsonld + LA-JFCEST535498UIPMOH349083.jsonld + +Example 4: Parse a file containing record(s) and their respective descendants +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + # If first time running specimen2jsonld or if token is expired, set jwt_token environment variable + $ export jwt_token=specimen_portal_personal_api_token + + # Contents of input file + $ cat input_nhash_ids.txt + DO-XIQQ6047 + DO-WFFF3774 + DO-RMRL6873 + + # Run specimenjsonld command. Important: include '--descendants' flag + $ bkbit specimen2jsonld -d input_nhash_ids.txt + + # Expected output + $ ls . + DO-XIQQ6047.jsonld + DO-WFFF3774.jsonld + DO-RMRL6873.jsonld + diff --git a/_sources/spreadsheet_converter.rst.txt b/_sources/spreadsheet_converter.rst.txt new file mode 100644 index 0000000..581e960 --- /dev/null +++ b/_sources/spreadsheet_converter.rst.txt @@ -0,0 +1,89 @@ +.. _spreadsheet_converter: + +Spreadsheet to LinkML Schema +============================= + +Overview +......... +Create a yaml linkml model from set of spreadsheets. It can use either tsv files or Google Sheet as an input. + +The default behavior is to run the converter starting with tsv files, specifying their paths as arguments, for example, model_spreadsheets/*tsv. + +If ``--gsheet`` option is used, the converter starts from downloading spreadsheets from Google Sheets. +The argument must be a YAML file that has ``gsheet_id`` and a list of ``sheets`` with ``gid`` (a unique identifier for each individual sheet) +and ``name`` (optionally) that will be used as a name of the downloaded TSV file (if not available ``gid`` wil be used). + +Command Line +............. + +``bkbit schema2model`` +,,,,,,,,,,,,,,,,,,,,,,, + +.. code-block:: bash + + $ bkbit schema2model [OPTIONS] SPREADSHEETS + +**Options** + + ``-o, --output `` + Path for the yaml output file. + + ``-t, --template