Skip to content

Commit

Permalink
Update WGS Germline GATK Janis metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
rlupat committed Nov 12, 2021
1 parent 9736ac9 commit f7f887b
Showing 1 changed file with 40 additions and 2 deletions.
42 changes: 40 additions & 2 deletions janis_pipelines/wgs_germline_gatk/wgsgermlinegatk.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import operator
import os
from datetime import date
from typing import Optional, List

from janis_bioinformatics.data_types import FastqGzPair, Bam, Vcf, CompressedVcf, BamBai
from janis_bioinformatics.tools import BioinformaticsTool
from janis_bioinformatics.tools.babrahambioinformatics import FastQC_0_11_8
from janis_bioinformatics.tools.common import BwaAligner, MergeAndMarkBams_4_1_3
from janis_bioinformatics.tools.pmac import ParseFastqcAdaptors
from janis_core import String, Array, File
from janis_core import String, Array, File, WorkflowMetadata
from janis_core.tool.test_classes import (
TTestCase,
TTestExpectedOutput,
Expand All @@ -26,7 +27,7 @@ def id(self):
return "WGSGermlineGATK"

def friendly_name(self):
return "WGS Germline (GATK)"
return "Janis Germline Variant-Calling Workflow (GATK)"

def constructor(self):
self.add_inputs()
Expand Down Expand Up @@ -148,6 +149,43 @@ def tests(self) -> Optional[List[TTestCase]]:
)
]

def bind_metadata(self):
meta: WorkflowMetadata = self.metadata

meta.keywords = ["wgs", "cancer", "germline", "variants", "gatk"]
meta.contributors = ["Richard Lupat", "Michael Franklin", "Jiaan Yu"]
meta.dateCreated = date(2018, 12, 24)
meta.dateUpdated = date(2021, 11, 10)
meta.short_documentation = "A variant-calling pipeline using the GATK HaplotypeCaller"
meta.documentation = """\
This is a genomics pipeline to do a single germline sample variant-calling, adapted from GATK Best Practice Workflow.
This workflow is a reference pipeline for using the Janis Python framework (pipelines assistant).
- Alignment: bwa-mem
- Variant-Calling: GATK HaplotypeCaller
- Outputs the final variants in the VCF format.
**Resources**
This pipeline has been tested using the HG38 reference set, available on Google Cloud Storage through:
- https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/
This pipeline expects the assembly references to be as they appear in that storage \
(".fai", ".amb", ".ann", ".bwt", ".pac", ".sa", "^.dict").
The known sites (snps_dbsnp, snps_1000gp, known_indels, mills_indels) should be gzipped and tabix indexed.
"""
meta.sample_input_overrides = {
"fastqs": [
["sample1_R1.fastq.gz", "sample1_R2.fastq.gz"],
["sample1_R1-TOPUP.fastq.gz", "sample1_R2-TOPUP.fastq.gz"],
],
"reference": "Homo_sapiens_assembly38.fasta",
"snps_dbsnp": "Homo_sapiens_assembly38.dbsnp138.vcf.gz",
"snps_1000gp": "1000G_phase1.snps.high_confidence.hg38.vcf.gz",
"known_indels": "Homo_sapiens_assembly38.known_indels.vcf.gz",
"mills_indels": "Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
}

if __name__ == "__main__":
# from toolbuilder.runtest.runner import run_test_case, EngineType
Expand Down

0 comments on commit f7f887b

Please sign in to comment.