Merge pull request #46 from puja-trivedi/create_docs_20241003

Create docs 20241003
brain-bican · Oct 8, 2024 · 285f5a2 · 285f5a2
2 parents c11011b + ff09711
commit 285f5a2
Show file tree

Hide file tree

Showing 10 changed files with 269 additions and 24 deletions.
diff --git a/.gitattributes b/.gitattributes
diff --git a/bkbit/cli.py b/bkbit/cli.py
@@ -5,6 +5,7 @@
 from bkbit.data_translators.file_manifest_translator import filemanifest2jsonld
 from bkbit.data_translators.genome_annotation_translator import gff2jsonld
 from bkbit.utils.get_ncbi_taxonomy import download_ncbi_taxonomy
+from bkbit.model_editors.linkml_trimmer import linkml_trimmer
 
 @click.group()
 def cli():
@@ -18,6 +19,7 @@ def cli():
 cli.add_command(filemanifest2jsonld)
 cli.add_command(gff2jsonld)
 cli.add_command(download_ncbi_taxonomy)
+cli.add_command(linkml_trimmer)
 
 if __name__ == '__main__':
     cli()
diff --git a/bkbit/model_editors/linkml_trimmer.py b/bkbit/model_editors/linkml_trimmer.py
@@ -1,15 +1,65 @@
+"""
+This script provides a utility for trimming a LinkML schema by retaining specified classes, slots, and enums, along with their dependencies.
+
+It defines a `YamlTrimmer` class for schema manipulation and offers a command-line interface using Click for easy usage from the terminal.
+
+Usage:
+    python script.py [OPTIONS] SCHEMA
+
+Options:
+    --classes, -c TEXT  Comma-separated list of classes to include in the trimmed schema (required).
+    --slots, -s TEXT    Comma-separated list of slots to include in the trimmed schema.
+    --enums, -e TEXT    Comma-separated list of enums to include in the trimmed schema.
+
+Example:
+    python script.py schema.yaml -c Person,Organization -s name,age -e StatusEnum
+
+The script performs the following steps:
+1. Loads the specified LinkML schema.
+2. Trims the schema by keeping only the specified classes, slots, and enums, along with their dependencies.
+3. Serializes and prints the trimmed schema in YAML format.
+
+Dependencies:
+    - click
+    - linkml-runtime
+    - linkml
+
+"""
+
 from dataclasses import dataclass
 from typing import Union
 from pathlib import Path
 from linkml_runtime.linkml_model.meta import SchemaDefinition
 from linkml_runtime.utils.schemaview import SchemaView
-
 from linkml._version import __version__
 from linkml.generators.yamlgen import YAMLGenerator
-
+import click
 
 @dataclass
 class YamlTrimmer:
+    """
+    A utility class for trimming a LinkML schema by retaining specified classes, slots, and enums, along with their dependencies.
+
+    This class helps in generating a simplified version of a LinkML schema by removing all elements that are not reachable from the specified classes, slots, and enums to keep.
+
+    Args:
+        schema (Union[str, Path, SchemaDefinition]): The LinkML schema to be trimmed. It can be a file path, URL, or a `SchemaDefinition` object.
+
+    Attributes:
+        schemaview (SchemaView): An object representing the loaded schema, used for manipulation and traversal.
+
+    Methods:
+        trim_model(keep_classes: list[str], keep_slots: list[str] = [], keep_enums: list[str] = []):
+            Trims the schema by keeping only the specified classes, slots, and enums, and their dependencies.
+
+        serialize():
+            Serializes and prints the trimmed schema in YAML format.
+
+    Example:
+        >>> yt = YamlTrimmer('path/to/schema.yaml')
+        >>> yt.trim_model(['Person', 'Organization'], keep_slots=['name'], keep_enums=['StatusEnum'])
+        >>> yt.serialize()
+    """
     def __init__(self, schema: Union[str, Path, SchemaDefinition]):
         self.schemaview = SchemaView(schema)
 
@@ -113,5 +163,30 @@ def serialize(self):
         print(YAMLGenerator(self.schemaview.schema).serialize())
 
 
+@click.command()
+## ARGUMENTS ##
+# Argument #1: Schema file
+@click.argument("schema", type=click.Path(exists=True))
+
+## OPTIONS ##
+# Option #1: Classes
+@click.option('--classes', '-c', required=True, help='Comma-separated list of classes to include in trimmed schema')
+# Option #2: Slots
+@click.option('--slots', '-s', help='Comma-separated list of slots to include in trimmed schema')
+# Option #3: Enums
+@click.option('--enums', '-e', help='Comma-separated list of enums to include in trimmed schema')
+
+def linkml_trimmer(schema, classes, slots, enums):
+    """
+    Trim a LinkMl schema based on a list of classes, slots, and enums to keep.
+    """
+    classes = classes.split(',')
+    slots = slots.split(',') if slots else []
+    enums = enums.split(',') if enums else []
+
+    yt = YamlTrimmer(schema)
+    yt.trim_model(classes, slots, enums)
+    yt.serialize()
+
 if __name__ == "__main__":
-    pass
+    linkml_trimmer()
diff --git a/docs/conf.py b/docs/conf.py
@@ -37,4 +37,12 @@
 # other themes = 'sphinx_rtd_theme', 'classic', 'furo'
 html_theme = 'sphinx_rtd_theme'
 html_static_path = ['_static']
-source_suffix = ['.rst', '.md']
+html_show_sourcelink = False
+html_context = {
+    "display_github": True, # Integrate GitHub
+    "github_user": "brain-bican", # Username
+    "github_repo": "bkbit", # Repo name
+    "github_version": "main", # Version
+    "conf_py_path": "/docs/", # Path in the checkout to the docs root
+}
+# source_suffix = ['.rst', '.md']
diff --git a/docs/genome_annotation.rst b/docs/genome_annotation.rst
@@ -29,14 +29,16 @@ Command Line
 Options
 ,,,,,,,,
 
-    ``-a, --assembly_accession``
+    ``-a, --assembly_accession <assembly_accession>``
         ID assigned to the genomic assembly used in the GFF3 file.
-        **Note: Must be provided when using ENSEMBL GFF3 files**
 
-    ``-s, --assembly_strain``
+    .. note::
+        Must be provided when using ENSEMBL GFF3 files
+
+    ``-s, --assembly_strain <assembly_strain>``
         Specific strain of the organism associated with the GFF3 file.
 
-    ``-l, --log_level``
+    ``-l, --log_level <log_level>``
         Logging level.
 
         Default:
@@ -48,13 +50,13 @@ Options
         Log to a file instead of the console.
 
         Default:
-            FALSE
+            False
 
 Arguments
 ,,,,,,,,,,,
 
     ``GFF3_URL``
-        URL to the GFF3 file.
+        Required argument
 
 Examples 
 .........

diff --git a/docs/index.rst b/docs/index.rst
@@ -21,6 +21,18 @@ This package contains tools to use the BICAN Knowledgebase Data Models.
    specimen_metadata
    genome_annotation
 
+.. toctree::
+   :maxdepth: 1
+   :caption: MODEL CONVERTERS
+
+   spreadsheet_converter
+
+.. toctree::
+   :maxdepth: 1
+   :caption: MODEL EDITORS
+
+   linkml_trimmer
+
 .. toctree::
    :maxdepth: 1
    :caption: REFERENCE

diff --git a/docs/linkml_trimmer.rst b/docs/linkml_trimmer.rst
@@ -0,0 +1,55 @@
+.. _linkml_trimmer:
+
+LinkML Schema Trimmer
+----------------------
+
+Overview
+.........
+Generate a trimmed version of a LinkML schema by only including a specific subset of classes, slots, and enums.
+
+
+Command Line
+.............
+
+``bkbit linkml-trimmer``
+,,,,,,,,,,,,,,,,,,,,,,,,
+
+    .. code-block:: bash
+        
+        $ bkbit linkml-trimmer [OPTIONS] SCHEMA
+
+Options
+,,,,,,,
+
+    ``-c, --classes <classes>``
+        **Required option**
+
+        List of 'classes' to include in the trimmed schema.
+
+        **Note**: Classes must be separated by commas and enclosed in quotes.
+    ``-s, --slots <slots>``
+        List of 'slots' to include in the trimmed schema.
+
+        **Note**: Slots must be separated by commas and enclosed in quotes.
+
+    ``-e, --enums <enums>``
+        List of 'enums' to include in the trimmed schema.
+
+        **Note**: Enums must be separated by commas and enclosed in quotes.
+
+Arguments
+,,,,,,,,,
+
+    ``SCHEMA``
+        Required argument
+
+
+Examples
+.........
+
+Example 1: Trim `Biolink Schema <https://biolink.github.io/biolink-model/>`_
+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+
+.. code-block:: bash
+
+    $ bkbit linkml-trimmer --classes "gene, genome, organism taxon, thing with taxon, material sample, procedure, entity, activity, named thing" biolink.yaml > bican-biolink.yaml
diff --git a/docs/specimen_file_manifest.rst b/docs/specimen_file_manifest.rst
@@ -8,6 +8,16 @@ Overview
 
 Generates a JSON-LD file containing specimen file data using the BICAN Library Generation Schema. 
 
+The input file manifest must be in CSV format and contain the following columns:
+
+        - Project ID	
+        - Specimen ID	
+        - File Name	
+        - Checksum	
+        - File Type	
+        - Archive	
+        - Archive URI
+
 Command Line
 .............
 
@@ -27,16 +37,7 @@ Command Line
 **Arguments**
 
     ``FILE_MANIFEST_CSV``
-        Required argument. 
-        FILE_MANIFEST_CSV can be optained from Brain Knowledge Platform and **must** contains the following columns:
-
-            - Project ID	
-            - Specimen ID	
-            - File Name	
-            - Checksum	
-            - File Type	
-            - Archive	
-            - Archive URI
+        Required argument
 
 Examples
 .........

diff --git a/docs/specimen_metadata.rst b/docs/specimen_metadata.rst
@@ -36,12 +36,14 @@ Environment Variables
 jwt_token
 ,,,,,,,,,
 
-    You **must** set the SpecimenPortal Personal API Token as an environment variable before running ``bkbit specimen2jsonld``. Once set, the token will be used to authenticate with the Specimen Portal API and retrieve the specimen metadata.
+Token is used to authenticate with the Specimen Portal API and retrieve the specimen metadata.
 
-    .. code-block:: bash
+.. note::
+    You **must** set the Specimen Portal Personal API Token as an environment variable **before** running ``bkbit specimen2jsonld``. 
 
-        $ export jwt_token=specimen_portal_personal_api_token
+.. code-block:: bash
 
+    $ export jwt_token=specimen_portal_personal_api_token
 
 Examples 
 .........

diff --git a/docs/spreadsheet_converter.rst b/docs/spreadsheet_converter.rst
@@ -0,0 +1,89 @@
+.. _spreadsheet_converter:
+
+Spreadsheet to LinkML Schema
+=============================
+
+Overview
+.........
+Create a yaml linkml model from set of spreadsheets. It can use either tsv files or Google Sheet as an input.
+
+The default behavior is to run the converter starting with tsv files, specifying their paths as arguments, for example, model_spreadsheets/*tsv.
+
+If ``--gsheet`` option is used, the converter starts from downloading spreadsheets  from Google Sheets.
+The argument must be a YAML file that has ``gsheet_id`` and a list of ``sheets``  with ``gid`` (a unique identifier for each individual sheet) 
+and ``name`` (optionally) that will be used as a name of the downloaded TSV file (if not available ``gid`` wil be used).
+
+Command Line
+.............
+
+``bkbit schema2model``
+,,,,,,,,,,,,,,,,,,,,,,,
+
+.. code-block:: bash
+
+    $ bkbit schema2model [OPTIONS] SPREADSHEETS
+
+**Options**
+
+    ``-o, --output <output>``
+        Path for the yaml output file.
+
+    ``-t, --template <template>``
+        Optional template yaml file with standard classes that will be added to the model.
+
+    ``--gsheet, --no-gsheet``
+        Using Google Sheet as a source. If True, the arguments MUST be a yaml file with gsheet_id
+        and gid of all the sheets.
+
+        Default:
+            False
+
+    ``--gsheet-download-dir <gsheet_download_dir>`` 
+        Path used to download Google Sheets. If not specified a default directory will be created.
+
+    ``--fix_tsv, --no-fix_tsv``
+        Fixing known issues with tsv files from Google Sheets. 
+
+        Default:
+            True
+
+    ``--fix_tsv_save, --no-fix_tsv_save``
+        Keeping the fixed files, relevant only if fix_tsv is True  
+
+        Default: 
+            False
+
+    ``--repair, --no-repair``
+        Standard Linkml auto-repair schema
+
+        Default:
+            True
+    ``--fix_bican_model, --no-fix_bican_model``
+        Automated repair specifically for the BICAN YAML model  
+
+        Default:
+            True
+
+**Arguments**
+
+        ``SPREADSHEETS``
+            Required argument
+
+Examples
+.........
+
+Example 1: Schema defined in tsv files
+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+
+.. code-block:: bash
+
+    # Run schema2model command 
+    $ bkbit schema2model -o model.yaml source_model/spreadsheets/*.tsv
+
+Example 2: Schema defined in Google Sheets
+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+
+.. code-block:: bash
+
+    # Run schema2model command 
+    $ bkbit schema2model -o model.yaml  --gsheet --gsheet-download-dir source_model/spreadsheets  source_model/gsheet.yaml