Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

. #2

Merged
merged 3 commits into from
Feb 27, 2024
Merged

. #2

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pipeline_utils/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def main(args=None):
pipeline_deploy_parser.add_argument('--post-software', action='store_true', help='POST|PATCH Software objects')
pipeline_deploy_parser.add_argument('--post-file-format', action='store_true', help='POST|PATCH FileFormat objects')
pipeline_deploy_parser.add_argument('--post-file-reference', action='store_true', help='POST|PATCH ReferenceFile objects')
pipeline_deploy_parser.add_argument('--post-reference-genome', action='store_true', help='POST|PATCH ReferenceGenome objects')
pipeline_deploy_parser.add_argument('--post-workflow', action='store_true', help='POST|PATCH Workflow objects')
pipeline_deploy_parser.add_argument('--post-metaworkflow', action='store_true', help='POST|PATCH MetaWorkflow objects')
pipeline_deploy_parser.add_argument('--post-wfl', action='store_true', help='Upload Workflow Description files (.cwl, .wdl)')
Expand Down
82 changes: 72 additions & 10 deletions pipeline_utils/lib/yaml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pipeline_utils.schemas.yaml_software import yaml_software_schema
from pipeline_utils.schemas.yaml_reference_file import yaml_reference_file_schema
from pipeline_utils.schemas.yaml_file_format import yaml_file_format_schema
from pipeline_utils.schemas.yaml_reference_genome import yaml_reference_genome_schema


###############################################################
Expand Down Expand Up @@ -118,8 +119,11 @@ class YAMLTemplate(object):
METAWORKFLOW_TYPE_SCHEMA = 'MetaWorkflow'
FILEFORMAT_TYPE_SCHEMA = 'FileFormat'
REFERENCEFILE_TYPE_SCHEMA = 'ReferenceFile'
REFERENCEGENOME_TYPE_SCHEMA = 'ReferenceGenome'
SOFTWARE_TYPE_SCHEMA = 'Software'
VARIANT_TYPE_SCHEMA = "variant_type"
CODE_SCHEMA = 'code'
IDENTIFIER_SCHEMA = 'identifier'

def __init__(self, data, schema):
"""Constructor method.
Expand All @@ -145,17 +149,23 @@ def _clean_newline(self, line):
line = line.replace('|', '')
return line

def _link_title(self, name, version):
def _link_title(self, name, version=None):
"""Helper to create a "title" field.
"""
title = getattr(self, self.TITLE_SCHEMA, None)
if title:
if version in title:
return title
if version:
if version in title:
return title
else:
return f'{title} [{version}]'
else:
return f'{title} [{version}]'
return title
else:
return f'{name.replace("_", " ")} [{version}]'
if version:
return f'{name.replace("_", " ")} [{version}]'
else:
return f'{name.replace("_", " ")}'

def _string_consortia(self, consortia):
"""Helper to create a string from "consortia" field.
Expand Down Expand Up @@ -549,7 +559,7 @@ def to_json(
if getattr(self, self.SOURCE_URL_SCHEMA, None):
sftwr_json[self.SOURCE_URL_SCHEMA] = self.source_url

sftwr_json[self.TITLE_SCHEMA] = self._link_title(self.name, version)
sftwr_json[self.TITLE_SCHEMA] = self._link_title(self.name)
sftwr_json[self.ALIASES_SCHEMA] = [f'{self._string_consortia(consortia)}:{self.SOFTWARE_TYPE_SCHEMA}-{self.name}_{version}']

# uuid, accession if specified
Expand All @@ -562,6 +572,10 @@ def to_json(
if getattr(self, self.LICENSE_SCHEMA, None):
sftwr_json[self.LICENSE_SCHEMA] = self.license

# code
if getattr(self, self.CODE_SCHEMA, None):
sftwr_json[self.CODE_SCHEMA] = self.code

return sftwr_json


Expand Down Expand Up @@ -638,11 +652,10 @@ class YAMLFileFormat(YAMLTemplate):
"""

# schema constants
IDENTIFIER_SCHEMA = 'identifier'
STANDARD_FILE_EXTENSION_SCHEMA = 'standard_file_extension'
# VALID_ITEM_TYPES_SCHEMA = 'valid_item_types'
VALID_ITEM_TYPES_SCHEMA = 'valid_item_types'
EXTRA_FILE_FORMATS_SCHEMA = 'extra_file_formats'
# FILE_TYPES_SCHEMA = 'file_types'
FILE_TYPES_SCHEMA = 'file_types'

def __init__(self, data):
"""Constructor method.
Expand Down Expand Up @@ -672,7 +685,7 @@ def to_json(
frmt_json[self.CONSORTIA_SCHEMA] = consortia
frmt_json[self.DESCRIPTION_SCHEMA] = self.description
frmt_json[self.STANDARD_FILE_EXTENSION_SCHEMA] = self.extension
# frmt_json[self.VALID_ITEM_TYPES_SCHEMA] = getattr(self, self.FILE_TYPES_SCHEMA, ['ReferenceFile', 'FileProcessed'])
frmt_json[self.VALID_ITEM_TYPES_SCHEMA] = getattr(self, self.FILE_TYPES_SCHEMA, ['ReferenceFile', 'OutputFile'])
# check for secondary formats
if getattr(self, self.SECONDARY_FORMATS_SCHEMA, None):
frmt_json[self.EXTRA_FILE_FORMATS_SCHEMA] = getattr(self, self.SECONDARY_FORMATS_SCHEMA)
Expand All @@ -685,3 +698,52 @@ def to_json(
frmt_json[self.ACCESSION_SCHEMA] = self.accession

return frmt_json

###############################################################
# YAMLReferenceGenome, YAML ReferenceGenome
###############################################################
class YAMLReferenceGenome(YAMLTemplate):
"""Class to work with YAML documents representing ReferenceGenome objects.
"""

def __init__(self, data):
"""Constructor method.
"""
super().__init__(data, yaml_reference_genome_schema)
# validate data with schema
self._validate()
# load attributes
for key, val in data.items():
setattr(self, key, val)

def to_json(
self,
submission_centers, # alias list
consortia # alias list
):
"""Function to build the corresponding object in JSON format.
"""
gen_json = {}

# common metadata
gen_json[self.IDENTIFIER_SCHEMA] = self.name
gen_json[self.ALIASES_SCHEMA] = [f'{self._string_consortia(consortia)}:{self.REFERENCEGENOME_TYPE_SCHEMA}-{self.name}_{self.version}']
gen_json[self.SUBMISSION_CENTERS_SCHEMA] = submission_centers
gen_json[self.CONSORTIA_SCHEMA] = consortia
gen_json[self.TITLE_SCHEMA] = self._link_title(self.name, self.version)
gen_json[self.CODE_SCHEMA] = self.code

# uuid, accession if specified
if getattr(self, self.UUID_SCHEMA, None):
gen_json[self.UUID_SCHEMA] = self.uuid
if getattr(self, self.ACCESSION_SCHEMA, None):
gen_json[self.ACCESSION_SCHEMA] = self.accession

# check linked files
if getattr(self, self.FILES_SCHEMA, None):
gen_json[self.FILES_SCHEMA] = []
for file in self.files:
gen_json[self.FILES_SCHEMA].append(
f'{self._string_consortia(consortia)}:{self.REFERENCEFILE_TYPE_SCHEMA}-{file.replace("@", "_")}')

return gen_json
9 changes: 8 additions & 1 deletion pipeline_utils/pipeline_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def __init__(self, args, repo, version='VERSION', pipeline='PIPELINE'):
'Software': yaml_parser.YAMLSoftware,
'FileFormat': yaml_parser.YAMLFileFormat,
'ReferenceFile': yaml_parser.YAMLReferenceFile,
'ReferenceGenome': yaml_parser.YAMLReferenceGenome,
'Workflow': yaml_parser.YAMLWorkflow,
'MetaWorkflow': yaml_parser.YAMLMetaWorkflow
}
Expand All @@ -84,10 +85,12 @@ def __init__(self, args, repo, version='VERSION', pipeline='PIPELINE'):
'Software': 'portal_objects/software.yaml',
'FileFormat': 'portal_objects/file_format.yaml',
'ReferenceFile': 'portal_objects/file_reference.yaml',
'ReferenceGenome': 'portal_objects/reference_genome.yaml',
# .yml files
'Software_yml': 'portal_objects/software.yml',
'FileFormat_yml': 'portal_objects/file_format.yml',
'ReferenceFile_yml': 'portal_objects/file_reference.yml',
'ReferenceGenome_yml': 'portal_objects/reference_genome.yml',
# folders
'Workflow': 'portal_objects/workflows',
'MetaWorkflow': 'portal_objects/metaworkflows',
Expand Down Expand Up @@ -206,7 +209,7 @@ def _yaml_to_json(self, data_yaml, YAMLClass, **kwargs):

def _post_patch_file(self, type):
"""
'Software', 'FileFormat', 'ReferenceFile'
'Software', 'FileFormat', 'ReferenceFile', 'ReferenceGenome'
"""
logger.info(f'@ {type}...')

Expand Down Expand Up @@ -414,6 +417,10 @@ def run_post_patch(self):
if self.post_file_reference:
self._post_patch_file('ReferenceFile')

# ReferenceGenome
if self.post_reference_genome:
self._post_patch_file('ReferenceGenome')

# Workflow
if self.post_workflow:
self._post_patch_folder('Workflow')
Expand Down
16 changes: 8 additions & 8 deletions pipeline_utils/schemas/yaml_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
schema.DESCRIPTION: 'Extension of the FileFormat',
schema.TYPE: schema.STRING
},
# 'file_types': {
# schema.DESCRIPTION: 'File types that can use the FileFormat',
# schema.TYPE: schema.ARRAY,
# schema.ITEMS: {
# schema.TYPE: schema.STRING,
# schema.PATTERN: 'ReferenceFile|FileProcessed|FileSubmitted|FileFastq'
# }
# },
'file_types': {
schema.DESCRIPTION: 'File types that can use the FileFormat',
schema.TYPE: schema.ARRAY,
schema.ITEMS: {
schema.TYPE: schema.STRING,
schema.PATTERN: 'ReferenceFile|OutputFile|AlignedReads|UnalignedReads|VariantCalls'
}
},
'status': {
schema.TYPE: schema.STRING
},
Expand Down
32 changes: 32 additions & 0 deletions pipeline_utils/schemas/yaml_reference_genome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pipeline_utils.schemas import schema

yaml_reference_genome_schema = {
## Schema #########################
schema.SCHEMA: 'https://json-schema.org/draft/2020-12/schema',
schema.ID: '/schemas/YAMLReferenceGenome',
schema.TITLE: 'YAMLReferenceGenome',
schema.DESCRIPTION: 'Schema to validate a YAML description of a ReferenceGenome',
schema.TYPE: schema.OBJECT,
schema.PROPERTIES: {
'name': {
schema.DESCRIPTION: 'Name of the ReferenceGenome',
schema.TYPE: schema.STRING
},
'version': {
schema.DESCRIPTION: 'Version of the ReferenceGenome',
schema.TYPE: schema.STRING
},
'code': {
schema.DESCRIPTION: 'Code for the ReferenceGenome',
schema.TYPE: schema.STRING
},
'files': {
schema.DESCRIPTION: 'Associated reference files',
schema.TYPE: schema.ARRAY,
schema.ITEMS: {
schema.TYPE: schema.STRING
}
}
},
schema.REQUIRED: ['name', 'version', 'code']
}
12 changes: 12 additions & 0 deletions tests/repo_correct/portal_objects/reference_genome.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
################################################################
# GRCh38 Genome Reference
################################################################
name: GRCh38
version: GCA_000001405.15
files:
- complete-reference-fasta-no-alt@GCA_000001405.15_GRCh38_no_decoy
- complete-reference-bwt-no-alt@GCA_000001405.15_GRCh38_no_decoy
# Required for displaying in the file name
code: GRCh38
# This is required to sync with a previously generated object
uuid: e89937e6-80d3-4605-8dea-4a74c7981a9f
12 changes: 12 additions & 0 deletions tests/repo_correct/portal_objects/software.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ source_url: 'http:/broad'
description: gatk software package
category:
- Aligner
code: pippo

---

Expand All @@ -19,3 +20,14 @@ category:
commit: 324ePT
uuid: efdac7ec-7da3-4f23-9056-7a04abbc5e8b
accession: GAPMKF1LL29K

---

# Sentieon
name: Sentieon_BWA-MEM
code: sentieon_bwamem
version: '202308.01'
source_url: https://www.sentieon.com
category:
- Alignment
uuid: b42e44e5-a829-4687-aeff-65cd040b1528
6 changes: 3 additions & 3 deletions tests/test_yaml_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def test_file_format():
"submission_centers": ["hms-dbmi"],
"consortia": ["cgap-core"],
"standard_file_extension": "bam",
"status": "obsolete"
# "valid_item_types": ["ReferenceFile", "FileProcessed"]
"status": "obsolete",
"valid_item_types": ["ReferenceFile", "OutputFile"]
},
{
"accession": 'GAPFIXRDPDK1',
Expand All @@ -32,7 +32,7 @@ def test_file_format():
"consortia": ["cgap-core"],
"standard_file_extension": "bam.bai",
"status": "released",
# "valid_item_types": ["ReferenceFile", "FileProcessed"],
"valid_item_types": ["ReferenceFile", "OutputFile"],
"uuid": '1936f246-22e1-45dc-bb5c-9cfd55537fe9'
}
]
Expand Down
36 changes: 36 additions & 0 deletions tests/test_yaml_reference_genome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#################################################################
# Libraries
#################################################################
import sys, os
import pytest
from pipeline_utils.lib import yaml_parser

#################################################################
# Tests
#################################################################
def test_software():
"""
"""
res = [
{"code": "GRCh38",
"title": "GRCh38 [GCA_000001405.15]",
"consortia": ["cgap-core"],
"identifier": "GRCh38",
"submission_centers": ["hms-dbmi"],
"uuid": "e89937e6-80d3-4605-8dea-4a74c7981a9f",
"files": [
"cgap-core:ReferenceFile-complete-reference-fasta-no-alt_GCA_000001405.15_GRCh38_no_decoy",
"cgap-core:ReferenceFile-complete-reference-bwt-no-alt_GCA_000001405.15_GRCh38_no_decoy"
],
"aliases": ["cgap-core:ReferenceGenome-GRCh38_GCA_000001405.15"]}

]

for i, d in enumerate(yaml_parser.load_yaml('tests/repo_correct/portal_objects/reference_genome.yaml')):
# creating JSON object
d_ = yaml_parser.YAMLReferenceGenome(d).to_json(
submission_centers=["hms-dbmi"],
consortia=["cgap-core"]
)
# check
assert d_ == res[i]
18 changes: 16 additions & 2 deletions tests/test_yaml_software.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def test_software():
"source_url": "http:/broad",
"title": "gatk 4.1.2",
"version": "4.1.2",
"category": ["Aligner"]
"category": ["Aligner"],
"code": "pippo"
},
{
"accession": "GAPMKF1LL29K",
Expand All @@ -30,10 +31,23 @@ def test_software():
"submission_centers": ["hms-dbmi"],
"name": "picard",
"consortia": ["cgap-core"],
"title": "picard [324ePT]",
"title": "picard",
"uuid": "efdac7ec-7da3-4f23-9056-7a04abbc5e8b",
"category": ["Variant Caller"]
},
{
"aliases": ["cgap-core:Software-Sentieon_BWA-MEM_202308.01"],
"version": "202308.01",
"submission_centers": ["hms-dbmi"],
"name": "Sentieon_BWA-MEM",
"consortia": ["cgap-core"],
"title": "Sentieon BWA-MEM",
"uuid": "b42e44e5-a829-4687-aeff-65cd040b1528",
"category": ["Alignment"],
"code": "sentieon_bwamem",
"source_url": "https://www.sentieon.com",
}

]

for i, d in enumerate(yaml_parser.load_yaml('tests/repo_correct/portal_objects/software.yaml')):
Expand Down
Loading