Skip to content

Commit

Permalink
Merge pull request #47 from brain-bican/add_cli_library_generation
Browse files Browse the repository at this point in the history
Add functionality to parse data catalog's large file metadata manifest
  • Loading branch information
puja-trivedi authored Nov 7, 2024
2 parents 5294835 + 92706e9 commit a95145d
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 3 deletions.
6 changes: 4 additions & 2 deletions bkbit/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import click
from bkbit.model_converters.sheets_converter import schema2model
from bkbit.data_translators.library_generation_translator import specimen2jsonld
from bkbit.model_converters.yaml2sheet_converter import yaml2cvs
from bkbit.data_translators.library_generation_translator import specimen2jsonld
from bkbit.data_translators.file_manifest_translator import filemanifest2jsonld
from bkbit.data_translators.specimen_metadata_translator import list_library_aliquot
from bkbit.data_translators.genome_annotation_translator import gff2jsonld
from bkbit.utils.get_ncbi_taxonomy import download_ncbi_taxonomy
from bkbit.model_editors.linkml_trimmer import linkml_trimmer
Expand All @@ -14,9 +15,10 @@ def cli():

# Add commands to the CLI group
cli.add_command(schema2model)
cli.add_command(specimen2jsonld)
cli.add_command(yaml2cvs)
cli.add_command(specimen2jsonld)
cli.add_command(filemanifest2jsonld)
cli.add_command(list_library_aliquot)
cli.add_command(gff2jsonld)
cli.add_command(download_ncbi_taxonomy)
cli.add_command(linkml_trimmer)
Expand Down
10 changes: 9 additions & 1 deletion bkbit/data_translators/file_manifest_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,17 @@ def serialize_to_jsonld(objects):

##OPTIONS##
# Option #1: List all library aliquots in the file manifest.
@click.option('--list_library_aliquots', is_flag=True, help='List all library aliquots in the file manifest.')
@click.option('-l', '--list_library_aliquots', is_flag=True, help='List all library aliquots in the file manifest.')

def filemanifest2jsonld(file_manifest_path: str, list_library_aliquots: bool):
"""
Generates a JSON-LD representation of the digital objects and checksums in the given file manifest.
Args:
file_manifest_path (str): The path to the file manifest CSV file.
list_library_aliquots (bool): List all library aliquots in the file manifest.
"""

digital_and_checksum_objects, specimen_ids = process_csv(file_manifest_path)
if list_library_aliquots:
with open('file_manifest_library_aliquots.txt', 'w') as f:
Expand Down
37 changes: 37 additions & 0 deletions bkbit/data_translators/specimen_metadata_translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import csv
import click

def extract_specimen_ids(csv_file_path):
"""
Reads a CSV file and prints the 'Specimen ID' column to the command line.
Parameters:
- csv_file_path: str, path to the input CSV file.
"""
with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
reader = csv.DictReader(csv_file)

# Ensure 'Specimen ID' column exists in the CSV
if 'Specimen ID' not in reader.fieldnames:
raise ValueError("The CSV file does not contain the 'Specimen ID' column.")

# Print 'Specimen ID' values to the command line
for row in reader:
specimen_id = row['Specimen ID']
if specimen_id.startswith("LA"):
print(specimen_id)

@click.command
@click.argument('specimen_metadata_file_path')

def list_library_aliquot(specimen_metadata_file_path):
"""
Extracts and prints all the Library Aliquot NHash IDs from Data Catalog's specimen metadata file.
Args:
specimen_metadata_file_path (str): Path to the specimen metadata file.
"""
extract_specimen_ids(specimen_metadata_file_path)

if __name__ == '__main__':
list_library_aliquot()

0 comments on commit a95145d

Please sign in to comment.