Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
B3rse committed Feb 14, 2024
1 parent 8038605 commit b6c4003
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 24 deletions.
1 change: 1 addition & 0 deletions pipeline_utils/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def main(args=None):
pipeline_deploy_parser.add_argument('--post-software', action='store_true', help='POST|PATCH Software objects')
pipeline_deploy_parser.add_argument('--post-file-format', action='store_true', help='POST|PATCH FileFormat objects')
pipeline_deploy_parser.add_argument('--post-file-reference', action='store_true', help='POST|PATCH ReferenceFile objects')
pipeline_deploy_parser.add_argument('--post-reference-genome', action='store_true', help='POST|PATCH ReferenceGenome objects')
pipeline_deploy_parser.add_argument('--post-workflow', action='store_true', help='POST|PATCH Workflow objects')
pipeline_deploy_parser.add_argument('--post-metaworkflow', action='store_true', help='POST|PATCH MetaWorkflow objects')
pipeline_deploy_parser.add_argument('--post-wfl', action='store_true', help='Upload Workflow Description files (.cwl, .wdl)')
Expand Down
82 changes: 72 additions & 10 deletions pipeline_utils/lib/yaml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pipeline_utils.schemas.yaml_software import yaml_software_schema
from pipeline_utils.schemas.yaml_reference_file import yaml_reference_file_schema
from pipeline_utils.schemas.yaml_file_format import yaml_file_format_schema
from pipeline_utils.schemas.yaml_reference_genome import yaml_reference_genome_schema


###############################################################
Expand Down Expand Up @@ -118,8 +119,11 @@ class YAMLTemplate(object):
METAWORKFLOW_TYPE_SCHEMA = 'MetaWorkflow'
FILEFORMAT_TYPE_SCHEMA = 'FileFormat'
REFERENCEFILE_TYPE_SCHEMA = 'ReferenceFile'
REFERENCEGENOME_TYPE_SCHEMA = 'ReferenceGenome'
SOFTWARE_TYPE_SCHEMA = 'Software'
VARIANT_TYPE_SCHEMA = "variant_type"
CODE_SCHEMA = 'code'
IDENTIFIER_SCHEMA = 'identifier'

def __init__(self, data, schema):
"""Constructor method.
Expand All @@ -145,17 +149,23 @@ def _clean_newline(self, line):
line = line.replace('|', '')
return line

def _link_title(self, name, version):
def _link_title(self, name, version=None):
"""Helper to create a "title" field.
"""
title = getattr(self, self.TITLE_SCHEMA, None)
if title:
if version in title:
return title
if version:
if version in title:
return title
else:
return f'{title} [{version}]'
else:
return f'{title} [{version}]'
return title
else:
return f'{name.replace("_", " ")} [{version}]'
if version:
return f'{name.replace("_", " ")} [{version}]'
else:
return f'{name.replace("_", " ")}'

def _string_consortia(self, consortia):
"""Helper to create a string from "consortia" field.
Expand Down Expand Up @@ -549,7 +559,7 @@ def to_json(
if getattr(self, self.SOURCE_URL_SCHEMA, None):
sftwr_json[self.SOURCE_URL_SCHEMA] = self.source_url

sftwr_json[self.TITLE_SCHEMA] = self._link_title(self.name, version)
sftwr_json[self.TITLE_SCHEMA] = self._link_title(self.name)
sftwr_json[self.ALIASES_SCHEMA] = [f'{self._string_consortia(consortia)}:{self.SOFTWARE_TYPE_SCHEMA}-{self.name}_{version}']

# uuid, accession if specified
Expand All @@ -562,6 +572,10 @@ def to_json(
if getattr(self, self.LICENSE_SCHEMA, None):
sftwr_json[self.LICENSE_SCHEMA] = self.license

# code
if getattr(self, self.CODE_SCHEMA, None):
sftwr_json[self.CODE_SCHEMA] = self.code

return sftwr_json


Expand Down Expand Up @@ -638,11 +652,10 @@ class YAMLFileFormat(YAMLTemplate):
"""

# schema constants
IDENTIFIER_SCHEMA = 'identifier'
STANDARD_FILE_EXTENSION_SCHEMA = 'standard_file_extension'
# VALID_ITEM_TYPES_SCHEMA = 'valid_item_types'
VALID_ITEM_TYPES_SCHEMA = 'valid_item_types'
EXTRA_FILE_FORMATS_SCHEMA = 'extra_file_formats'
# FILE_TYPES_SCHEMA = 'file_types'
FILE_TYPES_SCHEMA = 'file_types'

def __init__(self, data):
"""Constructor method.
Expand Down Expand Up @@ -672,7 +685,7 @@ def to_json(
frmt_json[self.CONSORTIA_SCHEMA] = consortia
frmt_json[self.DESCRIPTION_SCHEMA] = self.description
frmt_json[self.STANDARD_FILE_EXTENSION_SCHEMA] = self.extension
# frmt_json[self.VALID_ITEM_TYPES_SCHEMA] = getattr(self, self.FILE_TYPES_SCHEMA, ['ReferenceFile', 'FileProcessed'])
frmt_json[self.VALID_ITEM_TYPES_SCHEMA] = getattr(self, self.FILE_TYPES_SCHEMA, ['ReferenceFile', 'OutputFile'])
# check for secondary formats
if getattr(self, self.SECONDARY_FORMATS_SCHEMA, None):
frmt_json[self.EXTRA_FILE_FORMATS_SCHEMA] = getattr(self, self.SECONDARY_FORMATS_SCHEMA)
Expand All @@ -685,3 +698,52 @@ def to_json(
frmt_json[self.ACCESSION_SCHEMA] = self.accession

return frmt_json

###############################################################
# YAMLReferenceGenome, YAML ReferenceGenome
###############################################################
class YAMLReferenceGenome(YAMLTemplate):
"""Class to work with YAML documents representing ReferenceGenome objects.
"""

def __init__(self, data):
"""Constructor method.
"""
super().__init__(data, yaml_reference_genome_schema)
# validate data with schema
self._validate()
# load attributes
for key, val in data.items():
setattr(self, key, val)

def to_json(
self,
submission_centers, # alias list
consortia # alias list
):
"""Function to build the corresponding object in JSON format.
"""
gen_json = {}

# common metadata
gen_json[self.IDENTIFIER_SCHEMA] = self.name.lower()
gen_json[self.ALIASES_SCHEMA] = [f'{self._string_consortia(consortia)}:{self.REFERENCEGENOME_TYPE_SCHEMA}-{self.name}_{self.version}']
gen_json[self.SUBMISSION_CENTERS_SCHEMA] = submission_centers
gen_json[self.CONSORTIA_SCHEMA] = consortia
gen_json[self.TITLE_SCHEMA] = self._link_title(self.name, self.version)
gen_json[self.CODE_SCHEMA] = self.code

# uuid, accession if specified
if getattr(self, self.UUID_SCHEMA, None):
gen_json[self.UUID_SCHEMA] = self.uuid
if getattr(self, self.ACCESSION_SCHEMA, None):
gen_json[self.ACCESSION_SCHEMA] = self.accession

# check linked files
if getattr(self, self.FILES_SCHEMA, None):
gen_json[self.FILES_SCHEMA] = []
for file in self.files:
gen_json[self.FILES_SCHEMA].append(
f'{self._string_consortia(consortia)}:{self.REFERENCEFILE_TYPE_SCHEMA}-{file.replace("@", "_")}')

return gen_json
9 changes: 8 additions & 1 deletion pipeline_utils/pipeline_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def __init__(self, args, repo, version='VERSION', pipeline='PIPELINE'):
'Software': yaml_parser.YAMLSoftware,
'FileFormat': yaml_parser.YAMLFileFormat,
'ReferenceFile': yaml_parser.YAMLReferenceFile,
'ReferenceGenome': yaml_parser.YAMLReferenceGenome,
'Workflow': yaml_parser.YAMLWorkflow,
'MetaWorkflow': yaml_parser.YAMLMetaWorkflow
}
Expand All @@ -84,10 +85,12 @@ def __init__(self, args, repo, version='VERSION', pipeline='PIPELINE'):
'Software': 'portal_objects/software.yaml',
'FileFormat': 'portal_objects/file_format.yaml',
'ReferenceFile': 'portal_objects/file_reference.yaml',
'ReferenceGenome': 'portal_objects/reference_genome.yaml',
# .yml files
'Software_yml': 'portal_objects/software.yml',
'FileFormat_yml': 'portal_objects/file_format.yml',
'ReferenceFile_yml': 'portal_objects/file_reference.yml',
'ReferenceGenome_yml': 'portal_objects/reference_genome.yml',
# folders
'Workflow': 'portal_objects/workflows',
'MetaWorkflow': 'portal_objects/metaworkflows',
Expand Down Expand Up @@ -206,7 +209,7 @@ def _yaml_to_json(self, data_yaml, YAMLClass, **kwargs):

def _post_patch_file(self, type):
"""
'Software', 'FileFormat', 'ReferenceFile'
'Software', 'FileFormat', 'ReferenceFile', 'ReferenceGenome'
"""
logger.info(f'@ {type}...')

Expand Down Expand Up @@ -414,6 +417,10 @@ def run_post_patch(self):
if self.post_file_reference:
self._post_patch_file('ReferenceFile')

# ReferenceGenome
if self.post_reference_genome:
self._post_patch_file('ReferenceGenome')

# Workflow
if self.post_workflow:
self._post_patch_folder('Workflow')
Expand Down
16 changes: 8 additions & 8 deletions pipeline_utils/schemas/yaml_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
schema.DESCRIPTION: 'Extension of the FileFormat',
schema.TYPE: schema.STRING
},
# 'file_types': {
# schema.DESCRIPTION: 'File types that can use the FileFormat',
# schema.TYPE: schema.ARRAY,
# schema.ITEMS: {
# schema.TYPE: schema.STRING,
# schema.PATTERN: 'ReferenceFile|FileProcessed|FileSubmitted|FileFastq'
# }
# },
'file_types': {
schema.DESCRIPTION: 'File types that can use the FileFormat',
schema.TYPE: schema.ARRAY,
schema.ITEMS: {
schema.TYPE: schema.STRING,
schema.PATTERN: 'ReferenceFile|FileProcessed|FileSubmitted|FileFastq'
}
},
'status': {
schema.TYPE: schema.STRING
},
Expand Down
32 changes: 32 additions & 0 deletions pipeline_utils/schemas/yaml_reference_genome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pipeline_utils.schemas import schema

yaml_reference_genome_schema = {
## Schema #########################
schema.SCHEMA: 'https://json-schema.org/draft/2020-12/schema',
schema.ID: '/schemas/YAMLReferenceGenome',
schema.TITLE: 'YAMLReferenceGenome',
schema.DESCRIPTION: 'Schema to validate a YAML description of a ReferenceGenome',
schema.TYPE: schema.OBJECT,
schema.PROPERTIES: {
'name': {
schema.DESCRIPTION: 'Name of the ReferenceGenome',
schema.TYPE: schema.STRING
},
'version': {
schema.DESCRIPTION: 'Version of the ReferenceGenome',
schema.TYPE: schema.STRING
},
'code': {
schema.DESCRIPTION: 'Code for the ReferenceGenome',
schema.TYPE: schema.STRING
},
'files': {
schema.DESCRIPTION: 'Associated reference files',
schema.TYPE: schema.ARRAY,
schema.ITEMS: {
schema.TYPE: schema.STRING
}
}
},
schema.REQUIRED: ['name', 'version', 'code']
}
12 changes: 12 additions & 0 deletions tests/repo_correct/portal_objects/reference_genome.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
################################################################
# GRCh38 Genome Reference
################################################################
name: GRCh38
version: GCA_000001405.15
files:
- complete-reference-fasta-no-alt@GCA_000001405.15_GRCh38_no_decoy
- complete-reference-bwt-no-alt@GCA_000001405.15_GRCh38_no_decoy
# Required for displaying in the file name
code: GRCh38
# This is required to sync with a previously generated object
uuid: e89937e6-80d3-4605-8dea-4a74c7981a9f
12 changes: 12 additions & 0 deletions tests/repo_correct/portal_objects/software.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ source_url: 'http:/broad'
description: gatk software package
category:
- Aligner
code: pippo

---

Expand All @@ -19,3 +20,14 @@ category:
commit: 324ePT
uuid: efdac7ec-7da3-4f23-9056-7a04abbc5e8b
accession: GAPMKF1LL29K

---

# Sentieon
name: Sentieon_BWA-MEM
code: sentieon_bwamem
version: '202308.01'
source_url: https://www.sentieon.com
category:
- Alignment
uuid: b42e44e5-a829-4687-aeff-65cd040b1528
6 changes: 3 additions & 3 deletions tests/test_yaml_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def test_file_format():
"submission_centers": ["hms-dbmi"],
"consortia": ["cgap-core"],
"standard_file_extension": "bam",
"status": "obsolete"
# "valid_item_types": ["ReferenceFile", "FileProcessed"]
"status": "obsolete",
"valid_item_types": ["ReferenceFile", "OutputFile"]
},
{
"accession": 'GAPFIXRDPDK1',
Expand All @@ -32,7 +32,7 @@ def test_file_format():
"consortia": ["cgap-core"],
"standard_file_extension": "bam.bai",
"status": "released",
# "valid_item_types": ["ReferenceFile", "FileProcessed"],
"valid_item_types": ["ReferenceFile", "OutputFile"],
"uuid": '1936f246-22e1-45dc-bb5c-9cfd55537fe9'
}
]
Expand Down
36 changes: 36 additions & 0 deletions tests/test_yaml_reference_genome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#################################################################
# Libraries
#################################################################
import sys, os
import pytest
from pipeline_utils.lib import yaml_parser

#################################################################
# Tests
#################################################################
def test_software():
"""
"""
res = [
{"code": "GRCh38",
"title": "GRCh38 [GCA_000001405.15]",
"consortia": ["cgap-core"],
"identifier": "grch38",
"submission_centers": ["hms-dbmi"],
"uuid": "e89937e6-80d3-4605-8dea-4a74c7981a9f",
"files": [
"cgap-core:ReferenceFile-complete-reference-fasta-no-alt_GCA_000001405.15_GRCh38_no_decoy",
"cgap-core:ReferenceFile-complete-reference-bwt-no-alt_GCA_000001405.15_GRCh38_no_decoy"
],
"aliases": ["cgap-core:ReferenceGenome-GRCh38_GCA_000001405.15"]}

]

for i, d in enumerate(yaml_parser.load_yaml('tests/repo_correct/portal_objects/reference_genome.yaml')):
# creating JSON object
d_ = yaml_parser.YAMLReferenceGenome(d).to_json(
submission_centers=["hms-dbmi"],
consortia=["cgap-core"]
)
# check
assert d_ == res[i]
18 changes: 16 additions & 2 deletions tests/test_yaml_software.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def test_software():
"source_url": "http:/broad",
"title": "gatk 4.1.2",
"version": "4.1.2",
"category": ["Aligner"]
"category": ["Aligner"],
"code": "pippo"
},
{
"accession": "GAPMKF1LL29K",
Expand All @@ -30,10 +31,23 @@ def test_software():
"submission_centers": ["hms-dbmi"],
"name": "picard",
"consortia": ["cgap-core"],
"title": "picard [324ePT]",
"title": "picard",
"uuid": "efdac7ec-7da3-4f23-9056-7a04abbc5e8b",
"category": ["Variant Caller"]
},
{
"aliases": ["cgap-core:Software-Sentieon_BWA-MEM_202308.01"],
"version": "202308.01",
"submission_centers": ["hms-dbmi"],
"name": "Sentieon_BWA-MEM",
"consortia": ["cgap-core"],
"title": "Sentieon BWA-MEM",
"uuid": "b42e44e5-a829-4687-aeff-65cd040b1528",
"category": ["Alignment"],
"code": "sentieon_bwamem",
"source_url": "https://www.sentieon.com",
}

]

for i, d in enumerate(yaml_parser.load_yaml('tests/repo_correct/portal_objects/software.yaml')):
Expand Down

0 comments on commit b6c4003

Please sign in to comment.