diff --git a/janis_pipelines/wgs_germline_gatk/wgsgermlinegatk.py b/janis_pipelines/wgs_germline_gatk/wgsgermlinegatk.py index 667ff2a..b37507a 100644 --- a/janis_pipelines/wgs_germline_gatk/wgsgermlinegatk.py +++ b/janis_pipelines/wgs_germline_gatk/wgsgermlinegatk.py @@ -1,5 +1,6 @@ import operator import os +from datetime import date from typing import Optional, List from janis_bioinformatics.data_types import FastqGzPair, Bam, Vcf, CompressedVcf, BamBai @@ -7,7 +8,7 @@ from janis_bioinformatics.tools.babrahambioinformatics import FastQC_0_11_8 from janis_bioinformatics.tools.common import BwaAligner, MergeAndMarkBams_4_1_3 from janis_bioinformatics.tools.pmac import ParseFastqcAdaptors -from janis_core import String, Array, File +from janis_core import String, Array, File, WorkflowMetadata from janis_core.tool.test_classes import ( TTestCase, TTestExpectedOutput, @@ -26,7 +27,7 @@ def id(self): return "WGSGermlineGATK" def friendly_name(self): - return "WGS Germline (GATK)" + return "Janis Germline Variant-Calling Workflow (GATK)" def constructor(self): self.add_inputs() @@ -148,6 +149,43 @@ def tests(self) -> Optional[List[TTestCase]]: ) ] + def bind_metadata(self): + meta: WorkflowMetadata = self.metadata + + meta.keywords = ["wgs", "cancer", "germline", "variants", "gatk"] + meta.contributors = ["Richard Lupat", "Michael Franklin", "Jiaan Yu"] + meta.dateCreated = date(2018, 12, 24) + meta.dateUpdated = date(2021, 11, 10) + meta.short_documentation = "A variant-calling pipeline using the GATK HaplotypeCaller" + meta.documentation = """\ +This is a genomics pipeline to do a single germline sample variant-calling, adapted from GATK Best Practice Workflow. + +This workflow is a reference pipeline for using the Janis Python framework (pipelines assistant). +- Alignment: bwa-mem +- Variant-Calling: GATK HaplotypeCaller +- Outputs the final variants in the VCF format. + +**Resources** + +This pipeline has been tested using the HG38 reference set, available on Google Cloud Storage through: + +- https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/ + +This pipeline expects the assembly references to be as they appear in that storage \ + (".fai", ".amb", ".ann", ".bwt", ".pac", ".sa", "^.dict"). +The known sites (snps_dbsnp, snps_1000gp, known_indels, mills_indels) should be gzipped and tabix indexed. +""" + meta.sample_input_overrides = { + "fastqs": [ + ["sample1_R1.fastq.gz", "sample1_R2.fastq.gz"], + ["sample1_R1-TOPUP.fastq.gz", "sample1_R2-TOPUP.fastq.gz"], + ], + "reference": "Homo_sapiens_assembly38.fasta", + "snps_dbsnp": "Homo_sapiens_assembly38.dbsnp138.vcf.gz", + "snps_1000gp": "1000G_phase1.snps.high_confidence.hg38.vcf.gz", + "known_indels": "Homo_sapiens_assembly38.known_indels.vcf.gz", + "mills_indels": "Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", + } if __name__ == "__main__": # from toolbuilder.runtest.runner import run_test_case, EngineType