From 094e68f838ec4fa45385e2bddc5275cf72d41d9e Mon Sep 17 00:00:00 2001 From: "Juan E. Arango Ossa" Date: Tue, 23 Jan 2024 13:37:37 -0500 Subject: [PATCH 01/20] =?UTF-8?q?=F0=9F=94=A7=20first=20attempt=20to=20add?= =?UTF-8?q?=20conditional=20matched?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.nf | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index 9660060..db731df 100644 --- a/main.nf +++ b/main.nf @@ -17,6 +17,8 @@ log.info """\ ---------------------------------------- tumor : ${params.tumor} tumorBam : ${params.tumorBam} + normal : ${params.normal} + normalBam : ${params.normalBam} outdir : ${params.outdir} cores : ${params.cores} ======================================== @@ -38,17 +40,23 @@ process runAmber { input: val tumor path tumorBam + optional path normal + optional path normalBam output: path "${tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv path "${tumor}.amber.baf.pcf", emit: amber_baf_pcf path "${tumor}.amber.qc", emit: amber_qc + optional path "${tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf + optional path "${normal}.amber.snp.vcf.gz", emit: amber_normal_snp_vcf + optional path "${normal}.amber.homozygousregion.tsv", emit: amber_normal_homozygousregion_tsv script: """ amber \ -tumor ${tumor} \ -tumor_bam ${tumorBam} \ + ${normal ? "-reference " + normal + " \\\n-reference_bam " + normalBam : ""} \ -output_dir \$PWD \ -threads ${params.cores} \ -loci ${params.loci} \ @@ -66,20 +74,23 @@ process runCobalt { input: val tumor path tumorBam + optional path normal + optional path normalBam output: path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_ratio_tsv path "${tumor}.cobalt.ratio.pcf", emit: cobalt_ratio_pcf + optional path "${reference}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf script: """ cobalt \ -tumor ${tumor} \ -tumor_bam ${tumorBam} \ + ${normal ? "-reference " + normal + " \\\n-reference_bam " + normalBam : "-tumor_only_diploid_bed * params.diploidRegions} \ -output_dir \$PWD \ -threads ${params.cores} \ - -gc_profile ${params.gcProfile} \ - -tumor_only_diploid_bed ${params.diploidRegions} + -gc_profile ${params.gcProfile} """.stripIndent() } @@ -92,11 +103,17 @@ process runPurple { input: val tumor + optional path normal path amber_baf_tsv path amber_baf_pcf path amber_qc + optional path amber_contamination_vcf + optional path amber_normal_snp_vcf path cobalt_ratio_tsv path cobalt_ratio_pcf + optional path amber_normal_homozygousregion_tsv + optional path cobalt_normal_ratio_pcf + output: path "${tumor}.purple.purity.tsv", emit: purple_purity_tsv @@ -132,7 +149,19 @@ workflow { tumor = Channel.value(params.tumor) tumorBam = Channel.fromPath(params.tumorBam) - runAmber(tumor, tumorBam) - runCobalt(tumor, tumorBam) - runPurple(tumor, runAmber.out, runCobalt.out) + normal = params.normal ? Channel.fromPath(params.normal) : Channel.empty() + normalBam = params.normal_bam ? Channel.fromPath(params.normal_bam) : Channel.empty() + + runAmber(tumor, tumorBam, normal, normalBam) + runCobalt(tumor, tumorBam, normal, normalBam) + runPurple(tumor, normal, runAmber.out, runCobalt.out) } + + +workflow.onComplete { + log.info ( + workflow.success + ? "\nDone! Purple ran successfully. See the results in: ${params.outdir}\n" + : "\nOops .. something went wrong\n" + ) +} \ No newline at end of file From dc1d0189f2aeed80fc3831dde4fda4bae3b837b0 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango Ossa" Date: Tue, 23 Jan 2024 17:23:51 -0500 Subject: [PATCH 02/20] =?UTF-8?q?=E2=9C=85=20works=20for=20matched,=20but?= =?UTF-8?q?=20not=20unmatched?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 10 ++++ main.nf | 52 +++++++++--------- nextflow.config | 2 +- tests/main.nf.test | 4 +- tests/main.runamber.nf.test | 8 ++- tests/main.runamber.nf.test.snap | 32 +++++++++--- tests/main.runcobalt.nf.test | 8 ++- tests/main.runcobalt.nf.test.snap | 16 ++++-- tests/main.runpurple.nf.test | 21 ++++---- tests/main.runpurple.nf.test.snap | 87 ------------------------------- 10 files changed, 102 insertions(+), 138 deletions(-) delete mode 100644 tests/main.runpurple.nf.test.snap diff --git a/README.md b/README.md index c8489bc..31c2c5c 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,16 @@ You need Nextflow installed. ```bash module load java/jdk-11.0.11 +# To run matched pipeline +nextflow papaemmelab/nf-purple \ + --tumor $tumor \ + --tumor_bam $TUMOR_BAM \ + --normal $normal \ + --normal_bam $NORMAL_BAM \ + --outdir $OUTDIR \ + ...refargs + +# To run unmatched tumor-only nextflow papaemmelab/nf-purple \ --tumor $tumor \ --tumor_bam $TUMOR_BAM \ diff --git a/main.nf b/main.nf index db731df..358cbbf 100644 --- a/main.nf +++ b/main.nf @@ -8,6 +8,8 @@ params.loci = "/data/copy_number/GermlineHetPon.37.vcf.gz" params.gcProfile = "/data/copy_number/GC_profile.1000bp.37.cnp" params.ensemblDataDir = "/data/common/ensembl_data" params.diploidRegions = "/data/copy_number/DiploidRegions.37.bed.gz" +params.normal = null +params.normalBam = null log.info """\ @@ -31,7 +33,7 @@ log.info """\ process runAmber { - tag "AMBER on ${params.tumor}" + tag "AMBER on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") publishDir "${params.outdir}/amber", mode: 'copy' cpus params.cores memory '4 GB' @@ -39,24 +41,25 @@ process runAmber { input: val tumor + val normal path tumorBam - optional path normal - optional path normalBam + path normalBam output: path "${tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv path "${tumor}.amber.baf.pcf", emit: amber_baf_pcf path "${tumor}.amber.qc", emit: amber_qc - optional path "${tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf - optional path "${normal}.amber.snp.vcf.gz", emit: amber_normal_snp_vcf - optional path "${normal}.amber.homozygousregion.tsv", emit: amber_normal_homozygousregion_tsv + path "${tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf + path "${normal}.amber.snp.vcf.gz", emit: amber_normal_snp_vcf, optional: true + path "${normal}.amber.homozygousregion.tsv", emit: amber_normal_homozygousregion_tsv, optional: true script: + def reference_args = normal ? "-reference ${normal} \\\n -reference_bam ${normalBam}" : "" """ amber \ -tumor ${tumor} \ -tumor_bam ${tumorBam} \ - ${normal ? "-reference " + normal + " \\\n-reference_bam " + normalBam : ""} \ + ${reference_args} \ -output_dir \$PWD \ -threads ${params.cores} \ -loci ${params.loci} \ @@ -65,7 +68,7 @@ process runAmber { } process runCobalt { - tag "COBALT on ${params.tumor}" + tag "COBALT on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") publishDir "${params.outdir}/cobalt", mode: 'copy' cpus params.cores memory '4 GB' @@ -73,21 +76,22 @@ process runCobalt { input: val tumor + val normal path tumorBam - optional path normal - optional path normalBam + path normalBam output: path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_ratio_tsv path "${tumor}.cobalt.ratio.pcf", emit: cobalt_ratio_pcf - optional path "${reference}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf + path "${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true script: + def reference_args = normal ? "-reference ${normal} \\\n -reference_bam ${normalBam}" : "-tumor_only_diploid_bed ${params.diploidRegions}" """ cobalt \ -tumor ${tumor} \ -tumor_bam ${tumorBam} \ - ${normal ? "-reference " + normal + " \\\n-reference_bam " + normalBam : "-tumor_only_diploid_bed * params.diploidRegions} \ + ${reference_args} \ -output_dir \$PWD \ -threads ${params.cores} \ -gc_profile ${params.gcProfile} @@ -95,7 +99,7 @@ process runCobalt { } process runPurple { - tag "PURPLE on ${params.tumor}" + tag "PURPLE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") publishDir "${params.outdir}/purple", mode: 'copy' cpus params.cores memory '4 GB' @@ -103,17 +107,16 @@ process runPurple { input: val tumor - optional path normal + val normal path amber_baf_tsv path amber_baf_pcf path amber_qc - optional path amber_contamination_vcf - optional path amber_normal_snp_vcf + path amber_contamination_vcf + path amber_normal_snp_vcf + path amber_normal_homozygousregion_tsv path cobalt_ratio_tsv path cobalt_ratio_pcf - optional path amber_normal_homozygousregion_tsv - optional path cobalt_normal_ratio_pcf - + path cobalt_normal_ratio_pcf output: path "${tumor}.purple.purity.tsv", emit: purple_purity_tsv @@ -131,9 +134,11 @@ process runPurple { path "plot/${tumor}.purity.range.png", emit: purple_purity_range_png script: + def reference_args = normal ? "-reference ${normal}" : "" """ purple \ -tumor ${tumor} \ + ${reference_args} \ -amber ${params.outdir}/amber \ -cobalt ${params.outdir}/cobalt \ -output_dir \$PWD \ @@ -147,13 +152,12 @@ process runPurple { workflow { tumor = Channel.value(params.tumor) + normal = Channel.value(params.normal) tumorBam = Channel.fromPath(params.tumorBam) + normalBam = Channel.fromPath(params.normalBam) - normal = params.normal ? Channel.fromPath(params.normal) : Channel.empty() - normalBam = params.normal_bam ? Channel.fromPath(params.normal_bam) : Channel.empty() - - runAmber(tumor, tumorBam, normal, normalBam) - runCobalt(tumor, tumorBam, normal, normalBam) + runAmber(tumor, normal, tumorBam, normalBam) + runCobalt(tumor, normal, tumorBam, normalBam) runPurple(tumor, normal, runAmber.out, runCobalt.out) } diff --git a/nextflow.config b/nextflow.config index 6077dc5..2a1364b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,7 +33,7 @@ profiles { cloud { docker { enabled = true - runOptions = '--entrypoint ""' + runOptions = "--entrypoint ''" } process { executor = 'local' diff --git a/tests/main.nf.test b/tests/main.nf.test index 27f968c..7a3f397 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -6,8 +6,10 @@ nextflow_pipeline { test("Should run Main.nf with failures on the purple step") { when { params { - tumor = "TEST" + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" tumorBam = "${projectDir}/tests/data/tumor.bam" + normalBam = "${projectDir}/tests/data/normal.bam" outdir = "${projectDir}/tests/outdir" refGenome = "${projectDir}/tests/data/reference.fasta" } diff --git a/tests/main.runamber.nf.test b/tests/main.runamber.nf.test index c63e290..cbce26a 100644 --- a/tests/main.runamber.nf.test +++ b/tests/main.runamber.nf.test @@ -8,14 +8,18 @@ nextflow_process { when { params { - tumor = "TEST" + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" tumorBam = "${projectDir}/tests/data/tumor.bam" + normalBam = "${projectDir}/tests/data/normal.bam" outdir = "${projectDir}/tests/outdir" } process { """ input[0] = Channel.value(params.tumor) - input[1] = Channel.fromPath(params.tumorBam) + input[1] = Channel.value(params.normal) + input[2] = Channel.fromPath(params.tumorBam) + input[3] = Channel.fromPath(params.normalBam) """ } } diff --git a/tests/main.runamber.nf.test.snap b/tests/main.runamber.nf.test.snap index ce90db2..7c5521e 100644 --- a/tests/main.runamber.nf.test.snap +++ b/tests/main.runamber.nf.test.snap @@ -3,25 +3,43 @@ "content": [ { "0": [ - "TEST.amber.baf.tsv.gz:md5,0c55ac207db6eb8e1f138a044990f4d1" + "TEST_TUMOR.amber.baf.tsv.gz:md5,582a86aee1eb90035f3a5d22cf6cdbd9" ], "1": [ - "TEST.amber.baf.pcf:md5,c889d8a8b2668045d35eed1fd8e2c7ce" + "TEST_TUMOR.amber.baf.pcf:md5,c8d8a68055028ceea067b3c53d55aae4" ], "2": [ - "TEST.amber.qc:md5,9547e68ba672e08d6bd49e1e6f654ee8" + "TEST_TUMOR.amber.qc:md5,9547e68ba672e08d6bd49e1e6f654ee8" + ], + "3": [ + "TEST_TUMOR.amber.contamination.vcf.gz:md5,941abd5cb8324ae2eabe82d9d0ebb6ee" + ], + "4": [ + "TEST_NORMAL.amber.snp.vcf.gz:md5,baa474a32f1fd4dd54989d490470924c" + ], + "5": [ + "TEST_NORMAL.amber.homozygousregion.tsv:md5,1d396d14d89860c41106934d2b5e16ac" ], "amber_baf_pcf": [ - "TEST.amber.baf.pcf:md5,c889d8a8b2668045d35eed1fd8e2c7ce" + "TEST_TUMOR.amber.baf.pcf:md5,c8d8a68055028ceea067b3c53d55aae4" ], "amber_baf_tsv": [ - "TEST.amber.baf.tsv.gz:md5,0c55ac207db6eb8e1f138a044990f4d1" + "TEST_TUMOR.amber.baf.tsv.gz:md5,582a86aee1eb90035f3a5d22cf6cdbd9" + ], + "amber_contamination_vcf": [ + "TEST_TUMOR.amber.contamination.vcf.gz:md5,941abd5cb8324ae2eabe82d9d0ebb6ee" + ], + "amber_normal_homozygousregion_tsv": [ + "TEST_NORMAL.amber.homozygousregion.tsv:md5,1d396d14d89860c41106934d2b5e16ac" + ], + "amber_normal_snp_vcf": [ + "TEST_NORMAL.amber.snp.vcf.gz:md5,baa474a32f1fd4dd54989d490470924c" ], "amber_qc": [ - "TEST.amber.qc:md5,9547e68ba672e08d6bd49e1e6f654ee8" + "TEST_TUMOR.amber.qc:md5,9547e68ba672e08d6bd49e1e6f654ee8" ] } ], - "timestamp": "2024-01-23T10:49:32.477756" + "timestamp": "2024-01-23T17:24:43.748894" } } \ No newline at end of file diff --git a/tests/main.runcobalt.nf.test b/tests/main.runcobalt.nf.test index 65671d7..93d734c 100644 --- a/tests/main.runcobalt.nf.test +++ b/tests/main.runcobalt.nf.test @@ -8,14 +8,18 @@ nextflow_process { when { params { - tumor = "TEST" + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" tumorBam = "${projectDir}/tests/data/tumor.bam" + normalBam = "${projectDir}/tests/data/normal.bam" outdir = "${projectDir}/tests/outdir" } process { """ input[0] = Channel.value(params.tumor) - input[1] = Channel.fromPath(params.tumorBam) + input[1] = Channel.value(params.normal) + input[2] = Channel.fromPath(params.tumorBam) + input[3] = Channel.fromPath(params.normalBam) """ } } diff --git a/tests/main.runcobalt.nf.test.snap b/tests/main.runcobalt.nf.test.snap index 9542102..9ce1d55 100644 --- a/tests/main.runcobalt.nf.test.snap +++ b/tests/main.runcobalt.nf.test.snap @@ -3,19 +3,25 @@ "content": [ { "0": [ - "TEST.cobalt.ratio.tsv.gz:md5,87cf8451b04e4373fc4dd7ccda3e6afe" + "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,f235d46690097a000fdf7ac31bb53108" ], "1": [ - "TEST.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" + "TEST_TUMOR.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" + ], + "2": [ + "TEST_NORMAL.cobalt.ratio.pcf:md5,4f035e0ee8e4290d64d24152ea5387b8" + ], + "cobalt_normal_ratio_pcf": [ + "TEST_NORMAL.cobalt.ratio.pcf:md5,4f035e0ee8e4290d64d24152ea5387b8" ], "cobalt_ratio_pcf": [ - "TEST.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" + "TEST_TUMOR.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" ], "cobalt_ratio_tsv": [ - "TEST.cobalt.ratio.tsv.gz:md5,87cf8451b04e4373fc4dd7ccda3e6afe" + "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,f235d46690097a000fdf7ac31bb53108" ] } ], - "timestamp": "2024-01-23T10:50:58.292541" + "timestamp": "2024-01-23T17:25:25.678659" } } \ No newline at end of file diff --git a/tests/main.runpurple.nf.test b/tests/main.runpurple.nf.test index ed0271f..99ef48e 100644 --- a/tests/main.runpurple.nf.test +++ b/tests/main.runpurple.nf.test @@ -5,45 +5,48 @@ nextflow_process { process "runPurple" test("Should run Purple with failures") { - setup { run("runAmber") { script "main.nf" process { """ input[0] = Channel.value(params.tumor) - input[1] = Channel.fromPath(params.tumorBam) + input[1] = Channel.value(params.normal) + input[2] = Channel.fromPath(params.tumorBam) + input[3] = Channel.fromPath(params.normalBam) """ } } - run("runCobalt") { script "main.nf" process { """ input[0] = Channel.value(params.tumor) - input[1] = Channel.fromPath(params.tumorBam) + input[1] = Channel.value(params.normal) + input[2] = Channel.fromPath(params.tumorBam) + input[3] = Channel.fromPath(params.normalBam) """ } } } - when { params { - tumor = "TEST" + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" tumorBam = "${projectDir}/tests/data/tumor.bam" + normalBam = "${projectDir}/tests/data/normal.bam" outdir = "${projectDir}/tests/outdir" refGenome = "${projectDir}/tests/data/reference.fasta" } process { """ input[0] = Channel.value(params.tumor) - input[1] = runAmber.out - input[2] = runCobalt.out + input[1] = Channel.value(params.normal) + input[2] = runAmber.out + input[3] = runCobalt.out """ } } - then { // Purple process should fail when trying to create the segmentation plots assert process.failed diff --git a/tests/main.runpurple.nf.test.snap b/tests/main.runpurple.nf.test.snap deleted file mode 100644 index 1ac5693..0000000 --- a/tests/main.runpurple.nf.test.snap +++ /dev/null @@ -1,87 +0,0 @@ -{ - "Should run Purple with failures": { - "content": [ - { - "0": [ - - ], - "1": [ - - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - - ], - "5": [ - - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - - ], - "purple_circos_png": [ - - ], - "purple_cnv_gene_tsv": [ - - ], - "purple_cnv_somatic_tsv": [ - - ], - "purple_copynumber_png": [ - - ], - "purple_input_png": [ - - ], - "purple_map_png": [ - - ], - "purple_purity_range_png": [ - - ], - "purple_purity_range_tsv": [ - - ], - "purple_purity_tsv": [ - - ], - "purple_qc": [ - - ], - "purple_segment_png": [ - - ], - "purple_segment_tsv": [ - - ], - "purple_somatic_clonality_tsv": [ - - ] - } - ], - "timestamp": "2024-01-23T11:18:57.811454" - } -} \ No newline at end of file From 1e6fe20582d7d94fe7ba1e48ffb02d212dba8b9e Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Fri, 8 Nov 2024 11:09:47 -0500 Subject: [PATCH 03/20] =?UTF-8?q?=F0=9F=A7=AC=20improve=20script=20to=20ru?= =?UTF-8?q?n=20matched=20TvsN=20setting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + README.md | 15 +++- bin/bin_cobalt.py | 99 ++++++++++++++++++++++ main.nf | 212 +++++++++++++++++++++++----------------------- nf-test.config | 11 ++- run.sh | 31 +++++-- 6 files changed, 251 insertions(+), 118 deletions(-) create mode 100755 bin/bin_cobalt.py diff --git a/.gitignore b/.gitignore index 2c1a94c..7ed863f 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ tmp tests/outdir/* outdir plugins +slurm*.out \ No newline at end of file diff --git a/README.md b/README.md index dd959f8..ae08e24 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,14 @@ [![nf-purple CI](https://github.com/papaemmelab/nf-purple/actions/workflows/ci.yml/badge.svg)](https://github.com/papaemmelab/nf-purple/actions/workflows/ci.yml) [![nf-test](https://img.shields.io/badge/tested_with-nf--test-337ab7.svg)](https://github.com/askimed/nf-test) -Nextflow Pipeline to run [Purple](https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md#tumor-only-mode) in *Tumor-Only* mode, uses [Amber](https://github.com/hartwigmedical/hmftools/tree/master/amber#tumor-only-mode) and [Cobalt](https://github.com/hartwigmedical/hmftools/tree/master/cobalt#tumor-only-mode) from HMFTools suite, of the Hartwig Foundation. +Nextflow Pipeline to run [Purple](https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md) in *Tumor-Only* mode, uses [Amber](https://github.com/hartwigmedical/hmftools/tree/master/amber) and [Cobalt](https://github.com/hartwigmedical/hmftools/tree/master/cobalt) from HMFTools suite, of the Hartwig Foundation. ## 🚀 Run Pipeline You need Nextflow installed. +### Tumor-Normal matched: + ```bash module load java/jdk-11.0.11 @@ -20,6 +22,14 @@ nextflow papaemmelab/nf-purple \ --normal_bam $NORMAL_BAM \ --outdir $OUTDIR \ ...refargs +``` + +- See more info: [Purple](https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md#arguments), [Amber](https://github.com/hartwigmedical/hmftools/tree/master/amber#paired-normaltumor-mode), [Cobalt](https://github.com/hartwigmedical/hmftools/tree/master/cobalt#mandatory-arguments) + +### Tumor only mode: + +```bash +module load java/jdk-11.0.11 # To run unmatched tumor-only nextflow papaemmelab/nf-purple \ @@ -29,6 +39,9 @@ nextflow papaemmelab/nf-purple \ ...refargs ``` +- See more info: [Purple](https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md#tumor-only-mode), [Amber](https://github.com/hartwigmedical/hmftools/tree/master/amber#tumor-only-mode), [Cobalt](https://github.com/hartwigmedical/hmftools/tree/master/cobalt#tumor-only-mode) + + ## 🧬 Get Reference Data Downloaded from [Purple Ref Data](https://console.cloud.google.com/storage/browser/hmf-public/HMFtools-Resources/dna_pipeline) for genome version 37. diff --git a/bin/bin_cobalt.py b/bin/bin_cobalt.py new file mode 100755 index 0000000..6ed8886 --- /dev/null +++ b/bin/bin_cobalt.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 + +import pandas as pd +import numpy as np +import argparse + +parser = argparse.ArgumentParser( + description=( + "Bin cobalt probes with similar LogR values " + "together to decrease oversegmentation." + ) +) +parser.add_argument( + "--in_pcf", + type=str, + required=True, + help="Path to the input cobalt ratio .pcf file.", +) +parser.add_argument( + "--out_pcf", + type=str, + required=True, + help="Path to ouput the binned cobalt ratios.", +) +parser.add_argument( + "--bin_probes", + type=int, + required=True, + help="Max probe bin size." +) +parser.add_argument( + "--bin_log_r", + type=float, + required=True, + help="Max probe logR difference to bin." +) +args = parser.parse_args() + +cobalt_ratio_pcf = pd.read_csv(args.in_pcf, sep="\t") +cobalt_ratio_pcf_probes = pd.DataFrame(columns=cobalt_ratio_pcf.columns) + +# First bin by probes +chrom_arm = None +last_idx = None +for idx, seg in cobalt_ratio_pcf.iterrows(): + if chrom_arm != "_".join(seg[["chrom", "arm"]].astype(str)): + chrom_arm = "_".join(seg[["chrom", "arm"]].astype(str)) + cobalt_ratio_pcf_probes = pd.concat( + [cobalt_ratio_pcf_probes, seg.to_frame().T], ignore_index=True + ) + last_idx = cobalt_ratio_pcf_probes.index[-1] + continue + if ( + cobalt_ratio_pcf_probes.loc[last_idx, "n.probes"] <= args.bin_probes + or seg["n.probes"] <= args.bin_probes + ): + means = [ + cobalt_ratio_pcf_probes.loc[last_idx, "mean"] + ] * cobalt_ratio_pcf_probes.loc[last_idx, "n.probes"] + means.extend([seg["mean"]] * seg["n.probes"]) + cobalt_ratio_pcf_probes.loc[last_idx, "mean"] = np.mean(means) + cobalt_ratio_pcf_probes.loc[last_idx, "n.probes"] += seg["n.probes"] + cobalt_ratio_pcf_probes.loc[last_idx, "end.pos"] = seg["end.pos"] + else: + cobalt_ratio_pcf_probes = pd.concat( + [cobalt_ratio_pcf_probes, seg.to_frame().T], ignore_index=True + ) + last_idx = cobalt_ratio_pcf_probes.index[-1] + +# Then bin by logR mean +cobalt_ratio_pcf_probes = cobalt_ratio_pcf_probes.reset_index().drop(columns="index") +cobalt_ratio_pcf_probes_logR = pd.DataFrame(columns=cobalt_ratio_pcf_probes.columns) +chrom_arm = None +for idx, seg in cobalt_ratio_pcf_probes.iterrows(): + if chrom_arm != "_".join(seg[["chrom", "arm"]].astype(str)): + chrom_arm = "_".join(seg[["chrom", "arm"]].astype(str)) + cobalt_ratio_pcf_probes_logR = pd.concat( + [cobalt_ratio_pcf_probes_logR, seg.to_frame().T], ignore_index=True + ) + last_idx = cobalt_ratio_pcf_probes_logR.index[-1] + continue + if ( + abs(cobalt_ratio_pcf_probes.loc[last_idx, "mean"] - seg["mean"]) + <= args.bin_log_r + ): + means = [ + cobalt_ratio_pcf_probes_logR.loc[last_idx, "mean"] + ] * cobalt_ratio_pcf_probes_logR.loc[last_idx, "n.probes"] + means.extend([seg["mean"]] * seg["n.probes"]) + cobalt_ratio_pcf_probes_logR.loc[last_idx, "mean"] = np.mean(means) + cobalt_ratio_pcf_probes_logR.loc[last_idx, "n.probes"] += seg["n.probes"] + cobalt_ratio_pcf_probes_logR.loc[last_idx, "end.pos"] = seg["end.pos"] + else: + cobalt_ratio_pcf_probes_logR = pd.concat( + [cobalt_ratio_pcf_probes_logR, seg.to_frame().T], ignore_index=True + ) + last_idx = cobalt_ratio_pcf_probes_logR.index[-1] + +cobalt_ratio_pcf_probes_logR.to_csv(args.out_pcf, sep="\t", index=False) diff --git a/main.nf b/main.nf index df62358..5d19c01 100644 --- a/main.nf +++ b/main.nf @@ -17,15 +17,23 @@ params.minPurity = 0.08 params.maxPurity = 1.0 -log.info """\ +def logMessage = """\ HMFTOOLS - PURPLE ======================================== + Running Mode: ${params.normal ? 'Matched' : 'Unmatched'} + ---------------------------------------- Params: ---------------------------------------- tumor : ${params.tumor} tumorBam : ${params.tumorBam} +""" +logMessage += (params.normal && params.normalBam) ? """\ normal : ${params.normal} normalBam : ${params.normalBam} +""" : "" +logMessage += """\ + somaticVcf : ${params.somaticVcf} + germlineVcf : ${params.germlineVcf} outdir : ${params.outdir} cores : ${params.cores} memory : ${params.memory} @@ -38,9 +46,9 @@ log.info """\ ---------------------------------------- Project : ${workflow.projectDir} Cmd line : ${workflow.commandLine} - """ - .stripIndent() +""" +log.info(logMessage.stripIndent()) process runAmber { tag "AMBER on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") @@ -64,23 +72,25 @@ process runAmber { path "${normal}.amber.homozygousregion.tsv", emit: amber_normal_homozygousregion_tsv, optional: true script: - def reference_args = normal ? "-reference ${normal} \\\n -reference_bam ${normalBam}" : "" + def reference_args = normal ? """-reference ${normal} \\\n -reference_bam ${normalBam}""" : "" """ - if [ -f "${params.outdir}/amber/${tumor}.amber.baf.tsv.gz" ] && \ - [ -f "${params.outdir}/amber/${tumor}.amber.baf.pcf" ] && \ - [ -f "${params.outdir}/amber/${tumor}.amber.qc" ]; then + if [ -f "${params.outdir}/amber/${tumor}.amber.baf.tsv.gz" ] && \\ + [ -f "${params.outdir}/amber/${tumor}.amber.baf.pcf" ] && \\ + [ -f "${params.outdir}/amber/${tumor}.amber.qc" ] && \\ + [ -f "${params.outdir}/amber/${tumor}.amber.contamination.vcf.gz" ]; then echo "Output files already exist. Skipping amber execution." - ln -s ${params.outdir}/amber/${tumor}.amber.baf.tsv.gz ${tumor}.amber.baf.tsv.gz - ln -s ${params.outdir}/amber/${tumor}.amber.baf.pcf ${tumor}.amber.baf.pcf - ln -s ${params.outdir}/amber/${tumor}.amber.qc ${tumor}.amber.qc + ln -fs ${params.outdir}/amber/${tumor}.amber.baf.tsv.gz ${tumor}.amber.baf.tsv.gz + ln -fs ${params.outdir}/amber/${tumor}.amber.baf.pcf ${tumor}.amber.baf.pcf + ln -fs ${params.outdir}/amber/${tumor}.amber.qc ${tumor}.amber.qc + ln -fs ${params.outdir}/amber/${tumor}.amber.contamination.vcf.gz ${tumor}.amber.contamination.vcf.gz else - amber \ - -tumor ${tumor} \ - -tumor_bam ${tumorBam} \ - ${reference_args} \ - -output_dir \$PWD \ - -threads ${params.cores} \ - -loci ${params.loci} \ + amber \\ + -tumor ${tumor} \\ + -tumor_bam ${tumorBam} \\ + ${reference_args} \\ + -output_dir \$PWD \\ + -threads ${params.cores} \\ + -loci ${params.loci} \\ -ref_genome_version ${params.genomeVersion} fi """.stripIndent() @@ -100,27 +110,27 @@ process runCobalt { path normalBam output: - path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_ratio_tsv - path "${tumor}.cobalt.ratio.pcf", emit: cobalt_ratio_pcf + path "${tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf path "${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true script: def reference_args = normal ? "-reference ${normal} \\\n -reference_bam ${normalBam}" : "-tumor_only_diploid_bed ${params.diploidRegions}" """ - if [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.tsv.gz" ] && \ - [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf" ]; then + if [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf" ]; then echo "Output files already exist. Skipping cobalt execution." - ln -s ${params.outdir}/cobalt/${tumor}.cobalt.ratio.tsv.gz ${tumor}.cobalt.ratio.tsv.gz ln -s ${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf ${tumor}.cobalt.ratio.pcf + + if [ -f "${params.outdir}/cobalt/${normal}.cobalt.ratio.pcf" ]; then + ln -s ${params.outdir}/cobalt/${normal}.cobalt.ratio.pcf ${normal}.cobalt.ratio.pcf + fi else - cobalt \ - -tumor ${tumor} \ - -tumor_bam ${tumorBam} \ - ${reference_args} \ - -output_dir \$PWD \ - -threads ${params.cores} \ - -gc_profile ${params.gcProfile} \ - -tumor_only_diploid_bed ${params.diploidRegions} + cobalt \\ + -tumor ${tumor} \\ + -tumor_bam ${tumorBam} \\ + ${reference_args} \\ + -output_dir \$PWD \\ + -threads ${params.cores} \\ + -gc_profile ${params.gcProfile} fi """.stripIndent() } @@ -128,74 +138,39 @@ process runCobalt { process binCobalt { tag "COBALT BIN on ${params.tumor}" publishDir "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR", mode: 'copy' - cpus params.cores - memory params.memory + cpus 1 + memory '4G' time '1h' input: val tumor + val normal val binProbes val binLogR - path cobalt_ratio_pcf - path cobalt_ratio_tsv + path cobalt_tumor_ratio_pcf + path cobalt_normal_ratio_pcf output: - path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_ratio_tsv - path "${tumor}.cobalt.ratio.pcf", emit: cobalt_ratio_pcf + path "${tumor}.cobalt.ratio.binned.pcf", emit: cobalt_tumor_ratio_pcf + path "${normal}.cobalt.ratio.binned.pcf", emit: cobalt_normal_ratio_pcf script: """ - #!/usr/bin/env python - import pandas as pd - import numpy as np - - cobalt_ratio_pcf = pd.read_csv('${cobalt_ratio_pcf}', sep='\\t') - cobalt_ratio_pcf_probes = pd.DataFrame(columns=cobalt_ratio_pcf.columns) - - # First bin by probes - chrom_arm = None - last_idx = None - for idx, seg in cobalt_ratio_pcf.iterrows(): - if chrom_arm != '_'.join(seg[['chrom','arm']].astype(str)): - chrom_arm = '_'.join(seg[['chrom','arm']].astype(str)) - cobalt_ratio_pcf_probes = pd.concat([cobalt_ratio_pcf_probes, seg.to_frame().T], ignore_index=True) - last_idx = cobalt_ratio_pcf_probes.index[-1] - continue - if ( - cobalt_ratio_pcf_probes.loc[last_idx, 'n.probes'] <= ${binProbes} - ) or ( - seg['n.probes'] <= ${binProbes} - ): - means = [cobalt_ratio_pcf_probes.loc[last_idx, 'mean']] * cobalt_ratio_pcf_probes.loc[last_idx, 'n.probes'] - means.extend([seg['mean']] * seg['n.probes']) - cobalt_ratio_pcf_probes.loc[last_idx, 'mean'] = np.mean(means) - cobalt_ratio_pcf_probes.loc[last_idx, 'n.probes'] += seg['n.probes'] - cobalt_ratio_pcf_probes.loc[last_idx, 'end.pos'] = seg['end.pos'] - else: - cobalt_ratio_pcf_probes = pd.concat([cobalt_ratio_pcf_probes, seg.to_frame().T], ignore_index=True) - last_idx = cobalt_ratio_pcf_probes.index[-1] + # Bin Cobalt Tumor Probes + bin_cobalt.py \\ + --in_pcf ${cobalt_tumor_ratio_pcf} \\ + --out_pcf ${tumor}.cobalt.ratio.binned.pcf \\ + --bin_probes ${binProbes} \\ + --bin_log_r ${binLogR} - # Then bin by logR mean - cobalt_ratio_pcf_probes = cobalt_ratio_pcf_probes.reset_index().drop(columns="index") - cobalt_ratio_pcf_probes_logR = pd.DataFrame(columns=cobalt_ratio_pcf_probes.columns) - chrom_arm = None - for idx, seg in cobalt_ratio_pcf_probes.iterrows(): - if chrom_arm != '_'.join(seg[['chrom','arm']].astype(str)): - chrom_arm = '_'.join(seg[['chrom','arm']].astype(str)) - cobalt_ratio_pcf_probes_logR = pd.concat([cobalt_ratio_pcf_probes_logR, seg.to_frame().T], ignore_index=True) - last_idx = cobalt_ratio_pcf_probes_logR.index[-1] - continue - if abs(cobalt_ratio_pcf_probes.loc[last_idx, 'mean'] - seg['mean']) <= ${binLogR}: - means = [cobalt_ratio_pcf_probes_logR.loc[last_idx, 'mean']] * cobalt_ratio_pcf_probes_logR.loc[last_idx, 'n.probes'] - means.extend([seg['mean']] * seg['n.probes']) - cobalt_ratio_pcf_probes_logR.loc[last_idx, 'mean'] = np.mean(means) - cobalt_ratio_pcf_probes_logR.loc[last_idx, 'n.probes'] += seg['n.probes'] - cobalt_ratio_pcf_probes_logR.loc[last_idx, 'end.pos'] = seg['end.pos'] - else: - cobalt_ratio_pcf_probes_logR = pd.concat([cobalt_ratio_pcf_probes_logR, seg.to_frame().T], ignore_index=True) - last_idx = cobalt_ratio_pcf_probes_logR.index[-1] - - cobalt_ratio_pcf_probes_logR.to_csv("${tumor}.cobalt.ratio.pcf", sep='\\t', index=False) + # Bin Cobalt Normal probes + if [ -f "${cobalt_normal_ratio_pcf}" ]; then + bin_cobalt.py \\ + --in_pcf ${cobalt_normal_ratio_pcf} \\ + --out_pcf ${normal}.cobalt.ratio.binned.pcf \\ + --bin_probes ${binProbes} \\ + --bin_log_r ${binLogR} + fi """.stripIndent() } @@ -209,14 +184,15 @@ process runPurple { input: val tumor val normal + path somatic_vcf + path germline_vcf path amber_baf_tsv path amber_baf_pcf path amber_qc path amber_contamination_vcf path amber_normal_snp_vcf path amber_normal_homozygousregion_tsv - path cobalt_ratio_tsv - path cobalt_ratio_pcf + path cobalt_tumor_ratio_pcf path cobalt_normal_ratio_pcf path cobalt_path @@ -236,20 +212,24 @@ process runPurple { path "plot/${tumor}.purity.range.png", emit: purple_purity_range_png script: - def reference_args = normal ? "-reference ${normal}" : "" + def reference_args = normal ? "-reference ${normal}" : "" + def somatic_vcf_args = somatic_vcf ? "-somatic_vcf ${somatic_vcf}" : "" + def germline_vcf_args = germline ? "-germline_vcf ${germline_vcf}" : "" """ - purple \ - -tumor ${tumor} \ - ${reference_args} \ - -amber ${params.outdir}/amber \ - -cobalt ${cobalt_path} \ - -output_dir \$PWD \ - -gc_profile ${params.gcProfile} \ - -ref_genome ${params.refGenome} \ - -ref_genome_version ${params.genomeVersion} \ - -ensembl_data_dir ${params.ensemblDataDir} \ - -circos ${params.circos} \ - -min_purity ${params.minPurity} \ + purple \\ + -tumor ${tumor} \\ + ${reference_args} \\ + ${somatic_vcf_args} \\ + ${germline_vcf_args} \\ + -amber ${params.outdir}/amber \\ + -cobalt ${cobalt_path} \\ + -output_dir \$PWD \\ + -gc_profile ${params.gcProfile} \\ + -ref_genome ${params.refGenome} \\ + -ref_genome_version ${params.genomeVersion} \\ + -ensembl_data_dir ${params.ensemblDataDir} \\ + -circos ${params.circos} \\ + -min_purity ${params.minPurity} \\ -max_purity ${params.maxPurity} rsync -a --no-links \$PWD/ ${params.outdir}/purple/ @@ -257,23 +237,39 @@ process runPurple { } workflow { + // Arguments tumor = Channel.value(params.tumor) normal = Channel.value(params.normal) tumorBam = Channel.fromPath(params.tumorBam) normalBam = Channel.fromPath(params.normalBam) binProbes = Channel.value(params.binProbes) binLogR = Channel.value(params.binLogR) + somaticVcf = Channel.value(params.somaticVcf) + germlineVcf = Channel.value(params.germlineVcf) + // Run Amber and Cobalt amberOutput = runAmber(tumor, normal, tumorBam, normalBam) cobaltOutput = runCobalt(tumor, normal, tumorBam, normalBam) - if (binProbes != 0 || binLogR != 0) { - binCobaltOutput = binCobalt(tumor, binProbes, binLogR, cobaltOutput.cobalt_ratio_pcf, cobaltOutput.cobalt_ratio_tsv) - - runPurple(tumor, normal, amberOutput, binCobaltOutput, "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR") - } else { - runPurple(tumor, normal, amberOutput, cobaltOutput, "${params.outdir}/cobalt") - } + // Bin Cobalt if expected + postCobaltOutput = (binProbes != 0 || binLogR != 0) + ? binCobalt(tumor, normal, binProbes, binLogR, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf) + : cobaltOutput + + cobaltPath = (binProbes != 0 || binLogR != 0) + ? "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR" + : "${params.outdir}/cobalt" + + // Run Purple + runPurple( + tumor, + normal, + somaticVcf, + germlineVcf, + amberOutput, + postCobaltOutput, + cobaltPath, + ) } workflow.onComplete { diff --git a/nf-test.config b/nf-test.config index 9ec7110..5bcf7a7 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,8 +1,15 @@ config { - testsDir "tests" workDir ".nf-test" configFile "tests/nextflow.config" profile "cloud" - } + +params { + memory = '4 GB' + tumor = "TEST" + binProbes = 100 + binLogR = 0.5 + cobalt_ratio_pcf = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.pcf" + cobalt_ratio_tsv = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.tsv.gz" +} \ No newline at end of file diff --git a/run.sh b/run.sh index d17950a..10063da 100755 --- a/run.sh +++ b/run.sh @@ -1,12 +1,20 @@ #!/bin/sh -TUMOR=IID_H208153_T01_01_WG01 +ROOT=/data1/papaemme + +TUMOR=IID_H211025_T01_01_WG01 +NORMAL=IID_H211025_N01_01_WG01 TUMOR_BAM=`isabl get-bams ${TUMOR}` -OUTDIR=/work/isabl/home/arangooj/run/purple/${TUMOR} +NORMAL_BAM=`isabl get-bams ${NORMAL}` +SOMATIC_VCF=/data1/papaemme/isabl/data/analyses/18/97/541897/merged/IID_H211025_T01_01_WG01_vs_IID_H211025_N01_01_WG01.snvs.pass.flagged.vcf.gz +GERMLINE_VCF=/data1/papaemme/isabl/data/analyses/17/51/541751/merged/IID_H211025_N01_01_WG01.snvs.vcf.gz -REFGENOME=/work/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta +NF_PURPLE=/data1/papaemme/isabl/home/svc_papaemme_bot/dev/nf-purple/main.nf +OUTDIR=/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_matched +REFGENOME=/data1/papaemme/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta GENOMEVERSION=V37 -REFDIR=/work/isabl/ref/homo_sapiens/37/hmftools + +REFDIR=/data1/papaemme/isabl/ref/homo_sapiens/37/hmftools LOCI=${REFDIR}/copy_number/GermlineHetPon.37.vcf.gz GCPROFILE=${REFDIR}/copy_number/GC_profile.1000bp.37.cnp DIPLOIDREGIONS=${REFDIR}/copy_number/DiploidRegions.37.bed.gz @@ -14,10 +22,12 @@ ENSEMBLDATADIR=${REFDIR}/common/ensembl_data CIRCOS=/opt/circos-0.69-2/bin/circos nextflow run \ - -profile hpc \ - /work/isabl/home/arangooj/dev/nf-purple/main.nf \ + -profile hpc_slurm \ + ${NF_PURPLE} \ --tumor ${TUMOR} \ --tumorBam ${TUMOR_BAM} \ + --normal ${NORMAL} \ + --normalBam ${NORMAL_BAM} \ --outdir ${OUTDIR} \ --loci ${LOCI} \ --gcProfile ${GCPROFILE} \ @@ -25,4 +35,11 @@ nextflow run \ --ensemblDataDir ${ENSEMBLDATADIR} \ --genomeVersion ${GENOMEVERSION} \ --refGenome ${REFGENOME} \ - --circos ${CIRCOS} -resume + --circos ${CIRCOS} \ + --cores 8 \ + --memory '64G' \ + --somaticVcf $SOMATIC_VCF \ + --germlineVcf $GERMLINE_VCF \ + --binProbes 100 \ + --binLogR 0.5 \ + -resume From 14a817b4018eea163df094574465cbc59c47bd01 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Thu, 14 Nov 2024 16:42:11 -0500 Subject: [PATCH 04/20] =?UTF-8?q?=F0=9F=94=A7=20fix=20binCobalt=20and=20ma?= =?UTF-8?q?in=20workflow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/bin_cobalt.py | 13 +++++-------- main.nf | 46 +++++++++++++++++++++++++++------------------- nextflow.config | 14 +++++++------- run.sh | 4 ++-- run_sage.sh | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+), 36 deletions(-) create mode 100755 run_sage.sh diff --git a/bin/bin_cobalt.py b/bin/bin_cobalt.py index 6ed8886..1a22a9d 100755 --- a/bin/bin_cobalt.py +++ b/bin/bin_cobalt.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 +import argparse +import shutil import pandas as pd import numpy as np -import argparse parser = argparse.ArgumentParser( description=( @@ -16,12 +17,6 @@ required=True, help="Path to the input cobalt ratio .pcf file.", ) -parser.add_argument( - "--out_pcf", - type=str, - required=True, - help="Path to ouput the binned cobalt ratios.", -) parser.add_argument( "--bin_probes", type=int, @@ -96,4 +91,6 @@ ) last_idx = cobalt_ratio_pcf_probes_logR.index[-1] -cobalt_ratio_pcf_probes_logR.to_csv(args.out_pcf, sep="\t", index=False) +# store input with another name to replace original +shutil.move(args.in_pcf, args.in_pcf.replace(".pcf", ".original.pcf")) +cobalt_ratio_pcf_probes_logR.to_csv(args.in_pcf, sep="\t", index=False) diff --git a/main.nf b/main.nf index 5d19c01..06ba099 100644 --- a/main.nf +++ b/main.nf @@ -3,7 +3,7 @@ params.memory = '4 GB' // Params Defaults in juno params.refGenome = "/work/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta" -params.genomeVersion = "V37" +params.genomeVersion = "37" params.circos = "/opt/circos-0.69-2/bin/circos" params.loci = "/data/copy_number/GermlineHetPon.37.vcf.gz" params.gcProfile = "/data/copy_number/GC_profile.1000bp.37.cnp" @@ -50,6 +50,7 @@ logMessage += """\ log.info(logMessage.stripIndent()) +// See https://github.com/hartwigmedical/hmftools/tree/master/amber process runAmber { tag "AMBER on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") publishDir "${params.outdir}/amber", mode: 'copy' @@ -68,8 +69,6 @@ process runAmber { path "${tumor}.amber.baf.pcf", emit: amber_baf_pcf path "${tumor}.amber.qc", emit: amber_qc path "${tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf - path "${normal}.amber.snp.vcf.gz", emit: amber_normal_snp_vcf, optional: true - path "${normal}.amber.homozygousregion.tsv", emit: amber_normal_homozygousregion_tsv, optional: true script: def reference_args = normal ? """-reference ${normal} \\\n -reference_bam ${normalBam}""" : "" @@ -91,11 +90,12 @@ process runAmber { -output_dir \$PWD \\ -threads ${params.cores} \\ -loci ${params.loci} \\ - -ref_genome_version ${params.genomeVersion} + -ref_genome_version V${params.genomeVersion} fi """.stripIndent() } +// See https://github.com/hartwigmedical/hmftools/tree/master/cobalt#mandatory-arguments process runCobalt { tag "COBALT on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") publishDir "${params.outdir}/cobalt", mode: 'copy' @@ -110,14 +110,17 @@ process runCobalt { path normalBam output: + path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv path "${tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf path "${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true script: - def reference_args = normal ? "-reference ${normal} \\\n -reference_bam ${normalBam}" : "-tumor_only_diploid_bed ${params.diploidRegions}" + def reference_args = normal ? "\t\t\t-reference ${normal} \\\n -reference_bam ${normalBam}" : "\t\t\t-tumor_only_diploid_bed ${params.diploidRegions}" """ - if [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf" ]; then + if [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.tsv.gz" ] && \ + [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf" ]; then echo "Output files already exist. Skipping cobalt execution." + ln -s ${params.outdir}/cobalt/${tumor}.cobalt.ratio.tsv.gz ${tumor}.cobalt.ratio.tsv.gz ln -s ${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf ${tumor}.cobalt.ratio.pcf if [ -f "${params.outdir}/cobalt/${normal}.cobalt.ratio.pcf" ]; then @@ -136,7 +139,7 @@ process runCobalt { } process binCobalt { - tag "COBALT BIN on ${params.tumor}" + tag "COBALT BIN on ${params.tumor}" + (params.normal ? " and ${params.normal}" : "") publishDir "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR", mode: 'copy' cpus 1 memory '4G' @@ -147,19 +150,20 @@ process binCobalt { val normal val binProbes val binLogR + path cobalt_tumor_ratio_tsv path cobalt_tumor_ratio_pcf path cobalt_normal_ratio_pcf output: - path "${tumor}.cobalt.ratio.binned.pcf", emit: cobalt_tumor_ratio_pcf - path "${normal}.cobalt.ratio.binned.pcf", emit: cobalt_normal_ratio_pcf + path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv + path "${tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf + path "${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf script: """ # Bin Cobalt Tumor Probes bin_cobalt.py \\ --in_pcf ${cobalt_tumor_ratio_pcf} \\ - --out_pcf ${tumor}.cobalt.ratio.binned.pcf \\ --bin_probes ${binProbes} \\ --bin_log_r ${binLogR} @@ -167,13 +171,13 @@ process binCobalt { if [ -f "${cobalt_normal_ratio_pcf}" ]; then bin_cobalt.py \\ --in_pcf ${cobalt_normal_ratio_pcf} \\ - --out_pcf ${normal}.cobalt.ratio.binned.pcf \\ --bin_probes ${binProbes} \\ --bin_log_r ${binLogR} fi """.stripIndent() } +// See https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md#arguments process runPurple { tag "PURPLE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") publishDir "${params.outdir}/purple/purple_${params.minPurity}_${params.maxPurity}", mode: 'copy' @@ -190,8 +194,7 @@ process runPurple { path amber_baf_pcf path amber_qc path amber_contamination_vcf - path amber_normal_snp_vcf - path amber_normal_homozygousregion_tsv + path cobalt_tumor_ratio_tsv path cobalt_tumor_ratio_pcf path cobalt_normal_ratio_pcf path cobalt_path @@ -214,7 +217,7 @@ process runPurple { script: def reference_args = normal ? "-reference ${normal}" : "" def somatic_vcf_args = somatic_vcf ? "-somatic_vcf ${somatic_vcf}" : "" - def germline_vcf_args = germline ? "-germline_vcf ${germline_vcf}" : "" + def germline_vcf_args = germline_vcf ? "-germline_vcf ${germline_vcf}" : "" """ purple \\ -tumor ${tumor} \\ @@ -253,10 +256,10 @@ workflow { // Bin Cobalt if expected postCobaltOutput = (binProbes != 0 || binLogR != 0) - ? binCobalt(tumor, normal, binProbes, binLogR, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf) + ? binCobalt(tumor, normal, binProbes, binLogR, cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf) : cobaltOutput - cobaltPath = (binProbes != 0 || binLogR != 0) + cobaltOutdir = (binProbes != 0 || binLogR != 0) ? "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR" : "${params.outdir}/cobalt" @@ -266,9 +269,14 @@ workflow { normal, somaticVcf, germlineVcf, - amberOutput, - postCobaltOutput, - cobaltPath, + amberOutput.amber_baf_tsv, + amberOutput.amber_baf_pcf, + amberOutput.amber_qc, + amberOutput.amber_contamination_vcf, + postCobaltOutput.cobalt_tumor_ratio_tsv, + postCobaltOutput.cobalt_tumor_ratio_pcf, + postCobaltOutput.cobalt_normal_ratio_pcf, + cobaltOutdir, ) } diff --git a/nextflow.config b/nextflow.config index cb45ec4..056b371 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,6 +18,12 @@ report { overwrite = true } +// Executor properties +executor { + name = 'lsf' + perJobMemLimit = true +} + // Profiles profiles { standard { @@ -51,10 +57,4 @@ profiles { container = 'papaemmelab/purple:v0.1.1' } } -} - -// Executor properties -executor { - name = 'lsf' - perJobMemLimit = true -} +} \ No newline at end of file diff --git a/run.sh b/run.sh index 10063da..909a1d8 100755 --- a/run.sh +++ b/run.sh @@ -28,6 +28,8 @@ nextflow run \ --tumorBam ${TUMOR_BAM} \ --normal ${NORMAL} \ --normalBam ${NORMAL_BAM} \ + --somaticVcf $SOMATIC_VCF \ + --germlineVcf $GERMLINE_VCF \ --outdir ${OUTDIR} \ --loci ${LOCI} \ --gcProfile ${GCPROFILE} \ @@ -38,8 +40,6 @@ nextflow run \ --circos ${CIRCOS} \ --cores 8 \ --memory '64G' \ - --somaticVcf $SOMATIC_VCF \ - --germlineVcf $GERMLINE_VCF \ --binProbes 100 \ --binLogR 0.5 \ -resume diff --git a/run_sage.sh b/run_sage.sh new file mode 100755 index 0000000..70b7419 --- /dev/null +++ b/run_sage.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +TUMOR=IID_H211025_T01_01_WG01 +NORMAL=IID_H211025_N01_01_WG01 +TUMOR_BAM=`isabl get-bams ${TUMOR}` +NORMAL_BAM=`isabl get-bams ${NORMAL}` +REFGENOME=/data1/papaemme/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta +GENOMEVERSION=V37 +REFDIR=/data1/papaemme/isabl/home/liosisk/benchmarking/callers/sage +OUTDIR=/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_matched/sage_run +CORES=16 + +mkdir -p ${OUTDIR} + +singularity run \ + --bind /data1:/data1 \ + --bind /scratch:/scratch \ + --bind /usersoftware:/usersoftware \ + /data1/papaemme/isabl/home/liosisk/images/sage.sif \ + java -Xms4G -Xmx64G -cp /sage_v3.0_beta.jar com.hartwig.hmftools.sage.SageApplication \ + -threads ${CORES} \ + -tumor ${TUMOR} \ + -tumor_bam ${TUMOR_BAM} \ + -reference ${NORMAL} \ + -reference_bam ${NORMAL_BAM} \ + -ref_genome_version ${GENOMEVERSION} \ + -ref_genome ${REFGENOME} \ + -hotspots ${REFDIR}/KnownHotspots.somatic.37.vcf.gz \ + -panel_bed ${REFDIR}/ActionableCodingPanel.somatic.37.bed.gz \ + -high_confidence_bed ${REFDIR}/GIAB-High-Conf/37/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed \ + -ensembl_data_dir ${REFDIR}/Ensembl-Data-Cache \ + -out "${REFDIR}/${TNAME}.sage.vcf.gz" From 8e7de21028796982949cd627960d619f1e6a8466 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Fri, 15 Nov 2024 19:19:24 -0500 Subject: [PATCH 05/20] =?UTF-8?q?=F0=9F=A7=AC=20add=20sage=20for=20somatic?= =?UTF-8?q?=20enrichment?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.nf | 181 ++++++++++++++++++++++++++---------------------- nextflow.config | 2 - run.sh | 10 ++- run_sage.sh | 27 ++++++-- 4 files changed, 124 insertions(+), 96 deletions(-) diff --git a/main.nf b/main.nf index 06ba099..d7d9486 100644 --- a/main.nf +++ b/main.nf @@ -3,7 +3,7 @@ params.memory = '4 GB' // Params Defaults in juno params.refGenome = "/work/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta" -params.genomeVersion = "37" +params.genomeVersion = 37 params.circos = "/opt/circos-0.69-2/bin/circos" params.loci = "/data/copy_number/GermlineHetPon.37.vcf.gz" params.gcProfile = "/data/copy_number/GC_profile.1000bp.37.cnp" @@ -33,7 +33,6 @@ logMessage += (params.normal && params.normalBam) ? """\ """ : "" logMessage += """\ somaticVcf : ${params.somaticVcf} - germlineVcf : ${params.germlineVcf} outdir : ${params.outdir} cores : ${params.cores} memory : ${params.memory} @@ -59,32 +58,34 @@ process runAmber { time '1h' input: - val tumor - val normal path tumorBam path normalBam output: - path "${tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv - path "${tumor}.amber.baf.pcf", emit: amber_baf_pcf - path "${tumor}.amber.qc", emit: amber_qc - path "${tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf + path "${params.tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv + path "${params.tumor}.amber.baf.pcf", emit: amber_baf_pcf + path "${params.tumor}.amber.qc", emit: amber_qc + path "${params.tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf script: - def reference_args = normal ? """-reference ${normal} \\\n -reference_bam ${normalBam}""" : "" + def reference_args = params.normal ? """ + -reference ${params.normal} \\ + -reference_bam ${normalBam} \\ + """ : "" + """ - if [ -f "${params.outdir}/amber/${tumor}.amber.baf.tsv.gz" ] && \\ - [ -f "${params.outdir}/amber/${tumor}.amber.baf.pcf" ] && \\ - [ -f "${params.outdir}/amber/${tumor}.amber.qc" ] && \\ - [ -f "${params.outdir}/amber/${tumor}.amber.contamination.vcf.gz" ]; then + if [ -f "${params.outdir}/amber/${params.tumor}.amber.baf.tsv.gz" ] && \\ + [ -f "${params.outdir}/amber/${params.tumor}.amber.baf.pcf" ] && \\ + [ -f "${params.outdir}/amber/${params.tumor}.amber.qc" ] && \\ + [ -f "${params.outdir}/amber/${params.tumor}.amber.contamination.vcf.gz" ]; then echo "Output files already exist. Skipping amber execution." - ln -fs ${params.outdir}/amber/${tumor}.amber.baf.tsv.gz ${tumor}.amber.baf.tsv.gz - ln -fs ${params.outdir}/amber/${tumor}.amber.baf.pcf ${tumor}.amber.baf.pcf - ln -fs ${params.outdir}/amber/${tumor}.amber.qc ${tumor}.amber.qc - ln -fs ${params.outdir}/amber/${tumor}.amber.contamination.vcf.gz ${tumor}.amber.contamination.vcf.gz + ln -fs ${params.outdir}/amber/${params.tumor}.amber.baf.tsv.gz ${params.tumor}.amber.baf.tsv.gz + ln -fs ${params.outdir}/amber/${params.tumor}.amber.baf.pcf ${params.tumor}.amber.baf.pcf + ln -fs ${params.outdir}/amber/${params.tumor}.amber.qc ${params.tumor}.amber.qc + ln -fs ${params.outdir}/amber/${params.tumor}.amber.contamination.vcf.gz ${params.tumor}.amber.contamination.vcf.gz else amber \\ - -tumor ${tumor} \\ + -tumor ${params.tumor} \\ -tumor_bam ${tumorBam} \\ ${reference_args} \\ -output_dir \$PWD \\ @@ -104,31 +105,33 @@ process runCobalt { time '1h' input: - val tumor - val normal path tumorBam path normalBam output: - path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv - path "${tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf - path "${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true + path "${params.tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv + path "${params.tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf + path "${params.normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true script: - def reference_args = normal ? "\t\t\t-reference ${normal} \\\n -reference_bam ${normalBam}" : "\t\t\t-tumor_only_diploid_bed ${params.diploidRegions}" + def reference_args = params.normal ? """ + -reference ${params.normal} \\ + -reference_bam ${normalBam}""" : """ + -tumor_only_diploid_bed ${params.diploidRegions}""" + """ - if [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.tsv.gz" ] && \ - [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf" ]; then + if [ -f "${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.tsv.gz" ] && \ + [ -f "${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.pcf" ]; then echo "Output files already exist. Skipping cobalt execution." - ln -s ${params.outdir}/cobalt/${tumor}.cobalt.ratio.tsv.gz ${tumor}.cobalt.ratio.tsv.gz - ln -s ${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf ${tumor}.cobalt.ratio.pcf + ln -s ${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.tsv.gz ${params.tumor}.cobalt.ratio.tsv.gz + ln -s ${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.pcf ${params.tumor}.cobalt.ratio.pcf - if [ -f "${params.outdir}/cobalt/${normal}.cobalt.ratio.pcf" ]; then - ln -s ${params.outdir}/cobalt/${normal}.cobalt.ratio.pcf ${normal}.cobalt.ratio.pcf + if [ -f "${params.outdir}/cobalt/${params.normal}.cobalt.ratio.pcf" ]; then + ln -s ${params.outdir}/cobalt/${params.normal}.cobalt.ratio.pcf ${params.normal}.cobalt.ratio.pcf fi else cobalt \\ - -tumor ${tumor} \\ + -tumor ${params.tumor} \\ -tumor_bam ${tumorBam} \\ ${reference_args} \\ -output_dir \$PWD \\ @@ -146,37 +149,64 @@ process binCobalt { time '1h' input: - val tumor - val normal - val binProbes - val binLogR path cobalt_tumor_ratio_tsv path cobalt_tumor_ratio_pcf path cobalt_normal_ratio_pcf output: - path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv - path "${tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf - path "${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf + path "${params.tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv + path "${params.tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf + path "${params.normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf script: """ # Bin Cobalt Tumor Probes bin_cobalt.py \\ --in_pcf ${cobalt_tumor_ratio_pcf} \\ - --bin_probes ${binProbes} \\ - --bin_log_r ${binLogR} + --bin_probes ${params.binProbes} \\ + --bin_log_r ${params.binLogR} # Bin Cobalt Normal probes if [ -f "${cobalt_normal_ratio_pcf}" ]; then bin_cobalt.py \\ --in_pcf ${cobalt_normal_ratio_pcf} \\ - --bin_probes ${binProbes} \\ - --bin_log_r ${binLogR} + --bin_probes ${params.binProbes} \\ + --bin_log_r ${params.binLogR} fi """.stripIndent() } +// See https://github.com/hartwigmedical/hmftools/blob/master/sage/README.md#usage +process runSage { + tag "SAGE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") + publishDir "${params.outdir}/sage", mode: 'copy' + cpus params.cores + memory params.memory + time '4h' + + input: + path tumorBam + path normalBam + + output: + path "${params.tumor}_vs_${params.normal}.vcf.gz", emit: sage_vcf + + script: + """ + sage \\ + -tumor ${params.tumor} \\ + -tumor_bam ${tumorBam} \\ + -reference ${params.normal} \\ + -reference_bam ${normalBam} \\ + -ref_genome ${params.refGenome} \\ + -ref_genome_version ${params.genomeVersion} \\ + -output_vcf \$PWD/${params.tumor}_vs_${params.normal}.vcf.gz \\ + -threads ${params.cores} \\ + -ensembl_data_dir ${params.ensemblDataDir} + """.stripIndent() +} + + // See https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md#arguments process runPurple { tag "PURPLE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") @@ -186,10 +216,6 @@ process runPurple { time '1h' input: - val tumor - val normal - path somatic_vcf - path germline_vcf path amber_baf_tsv path amber_baf_pcf path amber_qc @@ -198,32 +224,31 @@ process runPurple { path cobalt_tumor_ratio_pcf path cobalt_normal_ratio_pcf path cobalt_path + path sage_vcf output: - path "${tumor}.purple.purity.tsv", emit: purple_purity_tsv - path "${tumor}.purple.qc", emit: purple_qc - path "${tumor}.purple.purity.range.tsv", emit: purple_purity_range_tsv - path "${tumor}.purple.cnv.somatic.tsv", emit: purple_cnv_somatic_tsv - path "${tumor}.purple.cnv.gene.tsv", emit: purple_cnv_gene_tsv - path "${tumor}.purple.segment.tsv", emit: purple_segment_tsv - path "${tumor}.purple.somatic.clonality.tsv", emit: purple_somatic_clonality_tsv - path "plot/${tumor}.segment.png", emit: purple_segment_png - path "plot/${tumor}.copynumber.png", emit: purple_copynumber_png - path "plot/${tumor}.circos.png", emit: purple_circos_png - path "plot/${tumor}.map.png", emit: purple_map_png - path "plot/${tumor}.input.png", emit: purple_input_png - path "plot/${tumor}.purity.range.png", emit: purple_purity_range_png + path "${params.tumor}.purple.purity.tsv", emit: purple_purity_tsv + path "${params.tumor}.purple.qc", emit: purple_qc + path "${params.tumor}.purple.purity.range.tsv", emit: purple_purity_range_tsv + path "${params.tumor}.purple.cnv.somatic.tsv", emit: purple_cnv_somatic_tsv + path "${params.tumor}.purple.cnv.gene.tsv", emit: purple_cnv_gene_tsv + path "${params.tumor}.purple.segment.tsv", emit: purple_segment_tsv + path "${params.tumor}.purple.somatic.clonality.tsv", emit: purple_somatic_clonality_tsv + path "plot/${params.tumor}.segment.png", emit: purple_segment_png + path "plot/${params.tumor}.copynumber.png", emit: purple_copynumber_png + path "plot/${params.tumor}.circos.png", emit: purple_circos_png + path "plot/${params.tumor}.map.png", emit: purple_map_png + path "plot/${params.tumor}.input.png", emit: purple_input_png + path "plot/${params.tumor}.purity.range.png", emit: purple_purity_range_png script: - def reference_args = normal ? "-reference ${normal}" : "" - def somatic_vcf_args = somatic_vcf ? "-somatic_vcf ${somatic_vcf}" : "" - def germline_vcf_args = germline_vcf ? "-germline_vcf ${germline_vcf}" : "" + def reference_args = params.normal ? """-reference ${params.normal}""" : "" + def somatic_vcf_args = params.normal && sage_vcf ? """-somatic_vcf ${sage_vcf}""" : "" + """ purple \\ - -tumor ${tumor} \\ + -tumor ${params.tumor} \\ ${reference_args} \\ - ${somatic_vcf_args} \\ - ${germline_vcf_args} \\ -amber ${params.outdir}/amber \\ -cobalt ${cobalt_path} \\ -output_dir \$PWD \\ @@ -240,35 +265,26 @@ process runPurple { } workflow { - // Arguments - tumor = Channel.value(params.tumor) - normal = Channel.value(params.normal) + // Input Bams tumorBam = Channel.fromPath(params.tumorBam) normalBam = Channel.fromPath(params.normalBam) - binProbes = Channel.value(params.binProbes) - binLogR = Channel.value(params.binLogR) - somaticVcf = Channel.value(params.somaticVcf) - germlineVcf = Channel.value(params.germlineVcf) - // Run Amber and Cobalt - amberOutput = runAmber(tumor, normal, tumorBam, normalBam) - cobaltOutput = runCobalt(tumor, normal, tumorBam, normalBam) + // Run Amber, Cobalt and Sage + amberOutput = runAmber(tumorBam, normalBam) + cobaltOutput = runCobalt(tumorBam, normalBam) + sageOutput = runSage(tumorBam, normalBam) // Bin Cobalt if expected - postCobaltOutput = (binProbes != 0 || binLogR != 0) - ? binCobalt(tumor, normal, binProbes, binLogR, cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf) + postCobaltOutput = (params.binProbes != 0 || params.binLogR != 0) + ? binCobalt(cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf) : cobaltOutput - cobaltOutdir = (binProbes != 0 || binLogR != 0) + cobaltOutdir = (params.binProbes != 0 || params.binLogR != 0) ? "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR" : "${params.outdir}/cobalt" // Run Purple runPurple( - tumor, - normal, - somaticVcf, - germlineVcf, amberOutput.amber_baf_tsv, amberOutput.amber_baf_pcf, amberOutput.amber_qc, @@ -277,6 +293,7 @@ workflow { postCobaltOutput.cobalt_tumor_ratio_pcf, postCobaltOutput.cobalt_normal_ratio_pcf, cobaltOutdir, + sageOutput.sage_vcf, ) } diff --git a/nextflow.config b/nextflow.config index 056b371..e9afded 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,8 +39,6 @@ profiles { } hpc_slurm { - singularity.enabled = true - singularity.autoMounts = true process { executor = 'slurm' queue = 'componc_cpu' diff --git a/run.sh b/run.sh index 909a1d8..564add9 100755 --- a/run.sh +++ b/run.sh @@ -6,15 +6,14 @@ TUMOR=IID_H211025_T01_01_WG01 NORMAL=IID_H211025_N01_01_WG01 TUMOR_BAM=`isabl get-bams ${TUMOR}` NORMAL_BAM=`isabl get-bams ${NORMAL}` -SOMATIC_VCF=/data1/papaemme/isabl/data/analyses/18/97/541897/merged/IID_H211025_T01_01_WG01_vs_IID_H211025_N01_01_WG01.snvs.pass.flagged.vcf.gz -GERMLINE_VCF=/data1/papaemme/isabl/data/analyses/17/51/541751/merged/IID_H211025_N01_01_WG01.snvs.vcf.gz +SOMATIC_VCF=/data1/papaemme/isabl/home/liosisk/benchmarking/callers/sage/.sage.vcf.gz NF_PURPLE=/data1/papaemme/isabl/home/svc_papaemme_bot/dev/nf-purple/main.nf OUTDIR=/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_matched REFGENOME=/data1/papaemme/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta -GENOMEVERSION=V37 +GENOMEVERSION=37 -REFDIR=/data1/papaemme/isabl/ref/homo_sapiens/37/hmftools +REFDIR=/data1/papaemme/isabl/ref/homo_sapiens/37/hmftools/v6_0/ref/37 LOCI=${REFDIR}/copy_number/GermlineHetPon.37.vcf.gz GCPROFILE=${REFDIR}/copy_number/GC_profile.1000bp.37.cnp DIPLOIDREGIONS=${REFDIR}/copy_number/DiploidRegions.37.bed.gz @@ -29,7 +28,6 @@ nextflow run \ --normal ${NORMAL} \ --normalBam ${NORMAL_BAM} \ --somaticVcf $SOMATIC_VCF \ - --germlineVcf $GERMLINE_VCF \ --outdir ${OUTDIR} \ --loci ${LOCI} \ --gcProfile ${GCPROFILE} \ @@ -38,7 +36,7 @@ nextflow run \ --genomeVersion ${GENOMEVERSION} \ --refGenome ${REFGENOME} \ --circos ${CIRCOS} \ - --cores 8 \ + --cores 16 \ --memory '64G' \ --binProbes 100 \ --binLogR 0.5 \ diff --git a/run_sage.sh b/run_sage.sh index 70b7419..f0c529c 100755 --- a/run_sage.sh +++ b/run_sage.sh @@ -12,21 +12,36 @@ CORES=16 mkdir -p ${OUTDIR} +# singularity run \ +# --bind /data1:/data1 \ +# --bind /scratch:/scratch \ +# --bind /usersoftware:/usersoftware \ +# /data1/papaemme/isabl/home/liosisk/images/sage.sif \ +# java -Xms4G -Xmx64G -cp /sage_v3.0_beta.jar com.hartwig.hmftools.sage.SageApplication \ +# -threads ${CORES} \ +# -tumor ${TUMOR} \ +# -tumor_bam ${TUMOR_BAM} \ +# -reference ${NORMAL} \ +# -reference_bam ${NORMAL_BAM} \ +# -ref_genome_version ${GENOMEVERSION} \ +# -ref_genome ${REFGENOME} \ +# -hotspots ${REFDIR}/KnownHotspots.somatic.37.vcf.gz \ +# -panel_bed ${REFDIR}/ActionableCodingPanel.somatic.37.bed.gz \ +# -high_confidence_bed ${REFDIR}/GIAB-High-Conf/37/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed \ +# -ensembl_data_dir ${REFDIR}/Ensembl-Data-Cache \ +# -out "${REFDIR}/${TNAME}.sage.vcf.gz" + + singularity run \ --bind /data1:/data1 \ --bind /scratch:/scratch \ --bind /usersoftware:/usersoftware \ /data1/papaemme/isabl/home/liosisk/images/sage.sif \ java -Xms4G -Xmx64G -cp /sage_v3.0_beta.jar com.hartwig.hmftools.sage.SageApplication \ - -threads ${CORES} \ -tumor ${TUMOR} \ -tumor_bam ${TUMOR_BAM} \ -reference ${NORMAL} \ -reference_bam ${NORMAL_BAM} \ -ref_genome_version ${GENOMEVERSION} \ -ref_genome ${REFGENOME} \ - -hotspots ${REFDIR}/KnownHotspots.somatic.37.vcf.gz \ - -panel_bed ${REFDIR}/ActionableCodingPanel.somatic.37.bed.gz \ - -high_confidence_bed ${REFDIR}/GIAB-High-Conf/37/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed \ - -ensembl_data_dir ${REFDIR}/Ensembl-Data-Cache \ - -out "${REFDIR}/${TNAME}.sage.vcf.gz" + -output_vcf "${OUTDIR}/${TUMOR}.sage.vcf.gz" \ No newline at end of file From e744b5ab58930cbfb91ea237af7ab46d461d4106 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Fri, 15 Nov 2024 19:22:11 -0500 Subject: [PATCH 06/20] =?UTF-8?q?=E2=9C=85=20modify=20test.config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nf-test.config | 18 +++++++++--------- run_sage.sh | 20 -------------------- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/nf-test.config b/nf-test.config index 5bcf7a7..b197744 100644 --- a/nf-test.config +++ b/nf-test.config @@ -3,13 +3,13 @@ config { workDir ".nf-test" configFile "tests/nextflow.config" profile "cloud" -} - -params { - memory = '4 GB' - tumor = "TEST" - binProbes = 100 - binLogR = 0.5 - cobalt_ratio_pcf = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.pcf" - cobalt_ratio_tsv = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.tsv.gz" + + params { + memory = '4 GB' + tumor = "TEST" + binProbes = 100 + binLogR = 0.5 + cobalt_ratio_pcf = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.pcf" + cobalt_ratio_tsv = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.tsv.gz" + } } \ No newline at end of file diff --git a/run_sage.sh b/run_sage.sh index f0c529c..ce2c330 100755 --- a/run_sage.sh +++ b/run_sage.sh @@ -12,26 +12,6 @@ CORES=16 mkdir -p ${OUTDIR} -# singularity run \ -# --bind /data1:/data1 \ -# --bind /scratch:/scratch \ -# --bind /usersoftware:/usersoftware \ -# /data1/papaemme/isabl/home/liosisk/images/sage.sif \ -# java -Xms4G -Xmx64G -cp /sage_v3.0_beta.jar com.hartwig.hmftools.sage.SageApplication \ -# -threads ${CORES} \ -# -tumor ${TUMOR} \ -# -tumor_bam ${TUMOR_BAM} \ -# -reference ${NORMAL} \ -# -reference_bam ${NORMAL_BAM} \ -# -ref_genome_version ${GENOMEVERSION} \ -# -ref_genome ${REFGENOME} \ -# -hotspots ${REFDIR}/KnownHotspots.somatic.37.vcf.gz \ -# -panel_bed ${REFDIR}/ActionableCodingPanel.somatic.37.bed.gz \ -# -high_confidence_bed ${REFDIR}/GIAB-High-Conf/37/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed \ -# -ensembl_data_dir ${REFDIR}/Ensembl-Data-Cache \ -# -out "${REFDIR}/${TNAME}.sage.vcf.gz" - - singularity run \ --bind /data1:/data1 \ --bind /scratch:/scratch \ From 79e94d9a1c417a62ee2db73b43c28c78cfb07019 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Fri, 15 Nov 2024 19:29:54 -0500 Subject: [PATCH 07/20] =?UTF-8?q?=E2=9C=85=20modify=20test.config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nf-test.config | 9 --------- tests/nextflow.config | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/nf-test.config b/nf-test.config index b197744..b279792 100644 --- a/nf-test.config +++ b/nf-test.config @@ -3,13 +3,4 @@ config { workDir ".nf-test" configFile "tests/nextflow.config" profile "cloud" - - params { - memory = '4 GB' - tumor = "TEST" - binProbes = 100 - binLogR = 0.5 - cobalt_ratio_pcf = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.pcf" - cobalt_ratio_tsv = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.tsv.gz" - } } \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config index c19b1ad..adc318a 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,3 +3,12 @@ Nextflow config file for running tests ======================================================================================== */ + +params { + memory = '4 GB' + tumor = "TEST" + binProbes = 100 + binLogR = 0.5 + cobalt_ratio_pcf = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.pcf" + cobalt_ratio_tsv = "${projectDir}/tests/outdir/cobalt/TEST.cobalt.ratio.tsv.gz" +} \ No newline at end of file From d74dc5c61f78985876e63c726cafb7f013ab4867 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Fri, 20 Dec 2024 13:21:08 -0500 Subject: [PATCH 08/20] =?UTF-8?q?=F0=9F=94=A7=20only=20run=20cobalt=20binn?= =?UTF-8?q?ing=20for=20matched?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.nf | 4 ++-- nextflow.config | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index d7d9486..bf48191 100644 --- a/main.nf +++ b/main.nf @@ -275,11 +275,11 @@ workflow { sageOutput = runSage(tumorBam, normalBam) // Bin Cobalt if expected - postCobaltOutput = (params.binProbes != 0 || params.binLogR != 0) + postCobaltOutput = (params.normalBam) && (params.binProbes != 0 || params.binLogR != 0) ? binCobalt(cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf) : cobaltOutput - cobaltOutdir = (params.binProbes != 0 || params.binLogR != 0) + cobaltOutdir = (params.normalBam) && (params.binProbes != 0 || params.binLogR != 0) ? "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR" : "${params.outdir}/cobalt" diff --git a/nextflow.config b/nextflow.config index e9afded..fb19a0a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,5 +1,3 @@ - - // Metrics Files dag { enabled = true From 4f7b7fe33e8375bf99a8aaa7c2b6763b5e86adef Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Fri, 20 Dec 2024 19:51:44 -0500 Subject: [PATCH 09/20] =?UTF-8?q?=E2=9C=85=20add=20e2e=20tests=20for=20mat?= =?UTF-8?q?ched?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 7 +- .gitignore | 2 + main.nf | 60 +++++++++--------- nextflow.config | 49 +++++++------- run.sh | 2 +- tests/data/README.md | 40 ++++++++++++ tests/data/amber/TEST_TUMOR.amber.baf.pcf | 4 ++ tests/data/amber/TEST_TUMOR.amber.baf.tsv.gz | Bin 0 -> 565 bytes .../TEST_TUMOR.amber.contamination.vcf.gz | Bin 0 -> 321 bytes tests/data/amber/TEST_TUMOR.amber.qc | 4 ++ .../data/cobalt/TEST_NORMAL.cobalt.ratio.pcf | 3 + .../TEST_TUMOR.cobalt.ratio.pcf} | 2 +- .../cobalt/TEST_TUMOR.cobalt.ratio.tsv.gz | Bin 0 -> 3055 bytes .../TEST_NORMAL.cobalt.ratio.pcf | 3 + .../TEST_TUMOR.cobalt.ratio.pcf | 3 + .../TEST_TUMOR.cobalt.ratio.tsv.gz} | Bin tests/data/{ => input}/normal.bam | Bin tests/data/{ => input}/normal.bam.bai | Bin tests/data/{ => input}/tumor.bam | Bin tests/data/{ => input}/tumor.bam.bai | Bin .../ref/ensembl_data/ensemble_gene_data.csv | 4 ++ tests/data/{ => ref}/reference.fasta | 0 tests/data/{ => ref}/reference.fasta.amb | 0 tests/data/{ => ref}/reference.fasta.ann | 0 tests/data/{ => ref}/reference.fasta.bwt | Bin tests/data/{ => ref}/reference.fasta.dict | 0 tests/data/{ => ref}/reference.fasta.fai | 0 tests/data/{ => ref}/reference.fasta.pac | Bin tests/data/{ => ref}/reference.fasta.sa | Bin .../sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz | Bin 0 -> 3997 bytes .../sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz.tbi | Bin 0 -> 185 bytes tests/main.binCobalt.nf.test | 38 +++++++++++ tests/main.binCobalt.nf.test.snap | 31 +++++++++ tests/main.bincobalt.nf.test | 41 ------------ tests/main.bincobalt.nf.test.snap | 25 -------- tests/main.nf.test | 16 ++--- ...runamber.nf.test => main.runAmber.nf.test} | 14 +--- ...f.test.snap => main.runAmber.nf.test.snap} | 18 ++---- tests/main.runCobalt.nf.test | 33 ++++++++++ ....test.snap => main.runCobalt.nf.test.snap} | 10 +-- tests/main.runPurple.nf.test | 58 +++++++++++++++++ tests/main.runSage.nf.test | 35 ++++++++++ tests/main.runcobalt.nf.test | 40 ------------ tests/main.runpurple.nf.test | 60 ------------------ tests/nextflow.config | 10 ++- 45 files changed, 343 insertions(+), 269 deletions(-) create mode 100644 tests/data/README.md create mode 100644 tests/data/amber/TEST_TUMOR.amber.baf.pcf create mode 100644 tests/data/amber/TEST_TUMOR.amber.baf.tsv.gz create mode 100644 tests/data/amber/TEST_TUMOR.amber.contamination.vcf.gz create mode 100644 tests/data/amber/TEST_TUMOR.amber.qc create mode 100644 tests/data/cobalt/TEST_NORMAL.cobalt.ratio.pcf rename tests/data/{TEST.cobalt.ratio.pcf => cobalt/TEST_TUMOR.cobalt.ratio.pcf} (66%) create mode 100644 tests/data/cobalt/TEST_TUMOR.cobalt.ratio.tsv.gz create mode 100644 tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_NORMAL.cobalt.ratio.pcf create mode 100644 tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_TUMOR.cobalt.ratio.pcf rename tests/data/{TEST.cobalt.ratio.tsv.gz => cobalt/binned_100_probes_0.5_LogR/TEST_TUMOR.cobalt.ratio.tsv.gz} (100%) rename tests/data/{ => input}/normal.bam (100%) rename tests/data/{ => input}/normal.bam.bai (100%) rename tests/data/{ => input}/tumor.bam (100%) rename tests/data/{ => input}/tumor.bam.bai (100%) create mode 100644 tests/data/ref/ensembl_data/ensemble_gene_data.csv rename tests/data/{ => ref}/reference.fasta (100%) rename tests/data/{ => ref}/reference.fasta.amb (100%) rename tests/data/{ => ref}/reference.fasta.ann (100%) rename tests/data/{ => ref}/reference.fasta.bwt (100%) rename tests/data/{ => ref}/reference.fasta.dict (100%) rename tests/data/{ => ref}/reference.fasta.fai (100%) rename tests/data/{ => ref}/reference.fasta.pac (100%) rename tests/data/{ => ref}/reference.fasta.sa (100%) create mode 100644 tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz create mode 100644 tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz.tbi create mode 100644 tests/main.binCobalt.nf.test create mode 100644 tests/main.binCobalt.nf.test.snap delete mode 100644 tests/main.bincobalt.nf.test delete mode 100644 tests/main.bincobalt.nf.test.snap rename tests/{main.runamber.nf.test => main.runAmber.nf.test} (55%) rename tests/{main.runamber.nf.test.snap => main.runAmber.nf.test.snap} (65%) create mode 100644 tests/main.runCobalt.nf.test rename tests/{main.runcobalt.nf.test.snap => main.runCobalt.nf.test.snap} (81%) create mode 100644 tests/main.runPurple.nf.test create mode 100644 tests/main.runSage.nf.test delete mode 100644 tests/main.runcobalt.nf.test delete mode 100644 tests/main.runpurple.nf.test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f5dcce..2dded6f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,12 +20,9 @@ jobs: - name: Pull Docker image and cache run: | docker pull papaemmelab/purple:v0.1.1 - - name: Run unit tests of each process for Amber, Cobalt, Purple + - name: Run unit tests of each process for Amber, Cobalt, binCobalt, Sage, Purple run: | - nf-test test tests/main.runamber.nf.test - nf-test test tests/main.runcobalt.nf.test - nf-test test tests/main.bincobalt.nf.test - nf-test test tests/main.runpurple.nf.test + nf-test test tests/main.*.nf.test - name: Run pipeline end-to-end test run: | nf-test test tests/main.nf.test diff --git a/.gitignore b/.gitignore index 7ed863f..8bff4fe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Nextflow run files nextflow +nf-test work capsule framework @@ -10,6 +11,7 @@ tmp # Tests tests/outdir/* +tests/data/ref/ensembl_data_original outdir plugins slurm*.out \ No newline at end of file diff --git a/main.nf b/main.nf index bf48191..b490b73 100644 --- a/main.nf +++ b/main.nf @@ -1,8 +1,7 @@ params.cores = 1 params.memory = '4 GB' -// Params Defaults in juno -params.refGenome = "/work/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta" +// Params Defaults params.genomeVersion = 37 params.circos = "/opt/circos-0.69-2/bin/circos" params.loci = "/data/copy_number/GermlineHetPon.37.vcf.gz" @@ -20,31 +19,32 @@ params.maxPurity = 1.0 def logMessage = """\ HMFTOOLS - PURPLE ======================================== - Running Mode: ${params.normal ? 'Matched' : 'Unmatched'} + Running Mode : ${params.normal ? 'Matched' : 'Unmatched'} ---------------------------------------- Params: ---------------------------------------- - tumor : ${params.tumor} - tumorBam : ${params.tumorBam} + tumor : ${params.tumor} + tumorBam : ${params.tumorBam} """ logMessage += (params.normal && params.normalBam) ? """\ - normal : ${params.normal} - normalBam : ${params.normalBam} + normal : ${params.normal} + normalBam : ${params.normalBam} """ : "" logMessage += """\ - somaticVcf : ${params.somaticVcf} - outdir : ${params.outdir} - cores : ${params.cores} - memory : ${params.memory} - binProbes : ${params.binProbes} - binLogR : ${params.binLogR} - minPurity : ${params.minPurity} - maxPurity : ${params.maxPurity} + somaticVcf : ${params.somaticVcf} + outdir : ${params.outdir} + cores : ${params.cores} + memory : ${params.memory} + binProbes : ${params.binProbes} + binLogR : ${params.binLogR} + minPurity : ${params.minPurity} + maxPurity : ${params.maxPurity} + ensemblDataDir : ${params.ensemblDataDir} ======================================== Workflow: ---------------------------------------- - Project : ${workflow.projectDir} - Cmd line : ${workflow.commandLine} + Project : ${workflow.projectDir} + Cmd line : ${workflow.commandLine} """ log.info(logMessage.stripIndent()) @@ -68,10 +68,9 @@ process runAmber { path "${params.tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf script: - def reference_args = params.normal ? """ + def reference_args = params.normal ? """\\ -reference ${params.normal} \\ - -reference_bam ${normalBam} \\ - """ : "" + -reference_bam ${normalBam} """ : "" """ if [ -f "${params.outdir}/amber/${params.tumor}.amber.baf.tsv.gz" ] && \\ @@ -86,8 +85,7 @@ process runAmber { else amber \\ -tumor ${params.tumor} \\ - -tumor_bam ${tumorBam} \\ - ${reference_args} \\ + -tumor_bam ${tumorBam} ${reference_args} \\ -output_dir \$PWD \\ -threads ${params.cores} \\ -loci ${params.loci} \\ @@ -114,9 +112,9 @@ process runCobalt { path "${params.normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true script: - def reference_args = params.normal ? """ + def reference_args = params.normal ? """\\ -reference ${params.normal} \\ - -reference_bam ${normalBam}""" : """ + -reference_bam ${normalBam}""" : """\\ -tumor_only_diploid_bed ${params.diploidRegions}""" """ @@ -132,8 +130,7 @@ process runCobalt { else cobalt \\ -tumor ${params.tumor} \\ - -tumor_bam ${tumorBam} \\ - ${reference_args} \\ + -tumor_bam ${tumorBam} ${reference_args} \\ -output_dir \$PWD \\ -threads ${params.cores} \\ -gc_profile ${params.gcProfile} @@ -242,16 +239,17 @@ process runPurple { path "plot/${params.tumor}.purity.range.png", emit: purple_purity_range_png script: - def reference_args = params.normal ? """-reference ${params.normal}""" : "" - def somatic_vcf_args = params.normal && sage_vcf ? """-somatic_vcf ${sage_vcf}""" : "" + def reference_args = params.normal ? """\\ + -reference ${params.normal}""" : "" + def somatic_vcf_args = params.normal && sage_vcf ? """\\ + -somatic_vcf ${sage_vcf}""" : "" """ purple \\ - -tumor ${params.tumor} \\ - ${reference_args} \\ + -tumor ${params.tumor} ${reference_args} \\ -amber ${params.outdir}/amber \\ -cobalt ${cobalt_path} \\ - -output_dir \$PWD \\ + -output_dir \$PWD ${somatic_vcf_args} \\ -gc_profile ${params.gcProfile} \\ -ref_genome ${params.refGenome} \\ -ref_genome_version ${params.genomeVersion} \\ diff --git a/nextflow.config b/nextflow.config index fb19a0a..bd78ed8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,41 +16,46 @@ report { overwrite = true } -// Executor properties -executor { - name = 'lsf' - perJobMemLimit = true +singularity { + enabled = true + autoCleanUp = true + autoMounts = true + cacheDir = '/data1/papaemme/isabl/opt/singularity' + envWhitelist = 'SINGULARITY_BINDPATH,SINGULARITYENV_LD_LIBRARY_PATH,SINGULARITYENV_LD_PRELOAD' } // Profiles profiles { - standard { - process.executor = 'local' + // Computing Environment + cloud { + singularity.enabled = false + docker { + enabled = true + runOptions = "--entrypoint ''" + } + process { + executor = 'local' + container = 'papaemmelab/purple:v0.1.1' + } } - - hpc_lsf { - singularity.enabled = true - singularity.autoMounts = true + + // Executors + local { + process.executor = 'local' + } + + lsf { process { executor = 'lsf' + perJobMemLimit = true } } - hpc_slurm { + slurm { process { executor = 'slurm' queue = 'componc_cpu' } } - - cloud { - docker { - enabled = true - runOptions = "--entrypoint ''" - } - process { - executor = 'local' - container = 'papaemmelab/purple:v0.1.1' - } - } + } \ No newline at end of file diff --git a/run.sh b/run.sh index 564add9..6567068 100755 --- a/run.sh +++ b/run.sh @@ -21,7 +21,7 @@ ENSEMBLDATADIR=${REFDIR}/common/ensembl_data CIRCOS=/opt/circos-0.69-2/bin/circos nextflow run \ - -profile hpc_slurm \ + -profile slurm \ ${NF_PURPLE} \ --tumor ${TUMOR} \ --tumorBam ${TUMOR_BAM} \ diff --git a/tests/data/README.md b/tests/data/README.md new file mode 100644 index 0000000..fc4e0e2 --- /dev/null +++ b/tests/data/README.md @@ -0,0 +1,40 @@ +# Create Test Files + +Using `reference.fasta` regions: + +```bash +$ cat tests/data/reference.fasta | grep "^>" +>1:100000-200000 +>2:300000-400000 +``` + +## Sage and Purple Ensembl Data Files + +```bash +DIR_IN=tests/data/ensembl_data_original +DIR_OUT=tests/data/ensembl_data + +# For +awk -F, 'NR==1 || ($3 == "1" && $5 >= 100000 && $6 <= 200000) || ($3 == "2" && $5 >= 300000 && $6 <= 400000)' $DIR_IN/ensemble_gene_data.csv > $DIR_OUT/ensemble_gene_data.csv +``` + +# Sage VCF: + +```bash +## Define the input and output files +input_vcf="/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_matched/sage/IID_H211025_T01_01_WG01_vs_IID_H211025_N01_01_WG01.vcf.gz" +output_vcf="tests/data/TEST_TUMOR_vs_TEST_NORMAL.vcf" + +## Extract tumor and normal sample names from the filename +filename=$(basename "$input_vcf" .vcf.gz) +tumor_sample=$(echo "$filename" | cut -d'_' -f1-5) +normal_sample=$(echo "$filename" | cut -d'_' -f7-11) + +## Filter the VCF and replace sample names +zcat "$input_vcf" | \ +awk 'BEGIN {FS=OFS="\t"} /^#/ {print; next} ($1 == "1" && $2 >= 100000 && $2 <= 200000) || ($1 == "2" && $2 >= 300000 && $2 <= 400000) {print}' | \ +sed "s/$tumor_sample/TEST_TUMOR/g; s/$normal_sample/TEST_NORMAL/g" > "$output_vcf" + +bgzip -f $output_vcf +tabix -f -p vcf $output_vcf.gz +``` \ No newline at end of file diff --git a/tests/data/amber/TEST_TUMOR.amber.baf.pcf b/tests/data/amber/TEST_TUMOR.amber.baf.pcf new file mode 100644 index 0000000..577836a --- /dev/null +++ b/tests/data/amber/TEST_TUMOR.amber.baf.pcf @@ -0,0 +1,4 @@ +sampleID chrom arm start.pos end.pos n.probes mean +tumorModifiedBAF 2 p 301051 301051 1 0.5826 +tumorModifiedBAF 2 p 301751 301751 1 0.5684 +tumorModifiedBAF 2 p 301784 384236 28 0.5805 diff --git a/tests/data/amber/TEST_TUMOR.amber.baf.tsv.gz b/tests/data/amber/TEST_TUMOR.amber.baf.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..7bbf636567be5c275052611d3bd1465a204eecbd GIT binary patch literal 565 zcmV-50?Pd#iwFP!000000BuxDZXGcUTRVp-WLw{5i=ew6qG(d&0ezVuN$+2odLE=e zRy1smERpp5dU`#s$J=pzY%j;#<^6IzxA(u-yrOz4^3Hb*%#* zZ0qWoWmj)vIR?s&(qw@^Esj3tXMXXls(NONLlF%2@~fv`!(Ijg*KGRJ_3{wn5HBX9~j#L9dyum?L z?J%3mwG2TPiJIpGy-~T-r&Uww4fPJxZK5hEhKYF#tT^-r7>qS4N9VvpRGZSmk#eUE z9z~U+DzBQ8x#O2}d%78y z(HyCaS-+W_p`NUh!KM)aqpVd=)<@+hbrl7b$E>*2A1-XYdkAW@LHO<;>dU)6ZUq1U D&&m~D literal 0 HcmV?d00001 diff --git a/tests/data/amber/TEST_TUMOR.amber.contamination.vcf.gz b/tests/data/amber/TEST_TUMOR.amber.contamination.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..cbcdea8885e099aa56e4b77e991e20c669074bcf GIT binary patch literal 321 zcmb2|=3rp}f&Xj_PR>jWDvY%gPG=o95OBSIDxBxzGS=XXjIvP2BV}11w6|T#EnHNn zRFI;$a{d1GGTXb=F0?-A+F5??42z?B^&7V0EZN#f)%X16-H`$+na3W_vRfv&HsW%N z;H|wAZ=H0!X!*MOgJ%1&ro6SThV@d$sdJ6%&TY`px#wqXvW4&GbhlYr?d&GuwHu~a z3#_<%u}Plkj^M!r_O(J6zMZqrcjOM5>)kAvv0(bOSmVX7rdR%#56(8O@!J2EBkL$j z`|QBz9~0u9OYe%%v(5hXZcpZx=nUR}Z+ahn6Waew{KqlLm3;LtvqPFcA2r-lweIws zkW}YO9ajpSB95I*J8fhnJAa95(4iaQ^!CtbO@zn3Izr;-UF)}d7qXm*Q K12Z_xKm-6)tc$1s literal 0 HcmV?d00001 diff --git a/tests/data/amber/TEST_TUMOR.amber.qc b/tests/data/amber/TEST_TUMOR.amber.qc new file mode 100644 index 0000000..8f2b6b2 --- /dev/null +++ b/tests/data/amber/TEST_TUMOR.amber.qc @@ -0,0 +1,4 @@ +QCStatus PASS +Contamination 0.0000 +ConsanguinityProportion 0.0000 +UniparentalDisomy NONE diff --git a/tests/data/cobalt/TEST_NORMAL.cobalt.ratio.pcf b/tests/data/cobalt/TEST_NORMAL.cobalt.ratio.pcf new file mode 100644 index 0000000..834eeef --- /dev/null +++ b/tests/data/cobalt/TEST_NORMAL.cobalt.ratio.pcf @@ -0,0 +1,3 @@ +sampleID chrom arm start.pos end.pos n.probes mean +S1 2 p 14001 397001 340 -7.2411 +S1 17 p 1 1 1 -9.96578428466209 diff --git a/tests/data/TEST.cobalt.ratio.pcf b/tests/data/cobalt/TEST_TUMOR.cobalt.ratio.pcf similarity index 66% rename from tests/data/TEST.cobalt.ratio.pcf rename to tests/data/cobalt/TEST_TUMOR.cobalt.ratio.pcf index f07a0b7..e81095b 100644 --- a/tests/data/TEST.cobalt.ratio.pcf +++ b/tests/data/cobalt/TEST_TUMOR.cobalt.ratio.pcf @@ -1,3 +1,3 @@ sampleID chrom arm start.pos end.pos n.probes mean S1 2 p 14001 397001 340 -7.24 -S1 17 p 1 397001 341 -8.813389396729216 +S1 17 p 1 1 1 -9.96578428466209 diff --git a/tests/data/cobalt/TEST_TUMOR.cobalt.ratio.tsv.gz b/tests/data/cobalt/TEST_TUMOR.cobalt.ratio.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d213e08ca3e0f0952eaca21754a6f04da49d338 GIT binary patch literal 3055 zcmZ|P2{=^i9|v%$8yS~Ok!5ft#TZ7}cZwlYMwziS_OWISA?u(l<)%i-Rxy@LX2@FB zp~gN!W4|g)wy}*#LyiC3d;k5Pp8tLBdEV!9zVCV8_dL%zzw;jHbY8B%k1)snyEF{} zy}0knx+>*+q1-F_rPJ=e_qp+mo#(mVy$gMlHJzP~Ij~^;$qNYwrq|@puy^U-mv^1l zoDEyqH}{r%x1nosO)W8O%ICnsy{~izv@q`b_tNdwy|3&Y_V(y@pfqiMgHjmFdLN5n z4L9_&zs*zjn&Vj0%)w85J;`0-Hz{VhzcUHN<5?11<&IyE2`Sp?$T*; z91IM;cRRzx|HLy{A?Z}!^PT3hF!9O@f1OSb-M$CQP;)C5lD#8}yA4kh#Ra`K&9Lc| z|Cx|Zd9w@-w@%5sJ&=WI{|`C0E5~7fW5p*_1UwD?FXE@43l;sqwIE&e{cZT2KS>v@ z{TaaJh|V(842RA#vy7xyI(c@@k z&%)sF>^XXyVeIVD+Vc6TDvH)6tQ)I#P4$cmFG5C=pjCHJM>xo%~kd-)2eq^tyyDVCYg9WVKWoIy6SspX}QS;u=* zPNJf(TPNG8yL3s9QHR$2Fb*dqligAhi;pQLOlTxL^RVfzhKhraEQZ#ts;2$&D7|jx zpn94BV4Ptv#%+HgLK?Y02H+9vzwZG=Ro?bX+mnFvjhDv9ybg2wob(d(ALE|50N=(L zi(O`XV;WSQeR43cRdh+r@j(@V%R+pmjeOxu{CkDrU4HK0SCnUiMFvk=G5w2kpPFEK6n^ChnL` zSlI}U*)2Cd*=;y-?L9d8S`)sR9ksIcQhb6Sm3}}|Y1qzc|=p0}nn+oHt1NzI2SrD-E_l)PRi z9#$8%>`#~hel`ApNkJgbMMpR`uNauDBX?g3CX+&mn;?ijj~}J|Qz&rtE;LGDl*SHeRMxwr;Udu9E-(f#2))eSdSGxckmrP}IbW zImCam?HL5$1Jv^^DEb)bat%R6>wgk?m|KB)tw=?C{?$J&-m{5^Q2qtuz}`(f+^=^5 zL(^CYzncHbeQJFjdL9*4GrHlEp$uht=%07Uj@GVsVpNpqko{I^VSN$h#3@UPAGYLk?aHO0 zkk)LoCkr=WYJp)M*BEzMYEy^SGOF8q`qxgp+6#sXTIXCEO3xg1_>8ksdpM7hdI_Op zFVA#qaw!vRHVrVNzxwn6ZxpsqW}jKn98gBKh6R)|Q*IHC)py`B=;ycGIGWBSR z3VSiaJg#In`&C=Y>yeHeP*H-leRi}=W$oAWFWUL}h+kMM1~*J|Xb+8A9LhhyYL$AM z57SxN`^E&?a)c6@DLj)cC%d;3XO8ba?(%6orV*@qMSZ3coAn1F+4~`Dd?-zW{XrAm zccXy| z$iWV)xQ1-{b*5Hj*z?{XX%hp=mFq4n>Rb8V5NMNky}9r>MbEd1MQTE8Zw$mJGw;?H ze>W25+aYMC>ki!6-G7-?EXwl03@j3zOJJnNw#R-v6 zr930)IX!j*LR-^Ve~CVbFK!UgA`DiaQZ6cxma%hFzsf{DFDV;dK9!SDBSrfjXxiyQgWB*%FoEQ}_9oX0!Z{C=Tp2 z40ePNupXVQvN==2rML{w#$#@QRbDtx?I2KDsry74M)u;JmOcB}j zt7KdaResLPw{RED7PsK`;Y?g?EVSJ2df2hgxW6x`^t#l`C9@Ji504qa;h@r++Np)2 zex|{0WX^NlL*7iAlOfb&HF}#7sb>sbnRXP%e!oSZMx-vTT*JH-+R(Q)>(Yp9wHcjA zP^1@Mx4gQbL`zkYAAw7$5`<=$LTmewrRBqVheDOBMZq}U|}qp8== zVKsSaFnqCg{0Mu?vk}>nhLwGKc=}nIu&>{!=AMzi)Y%7T%`yF>JoNr=2*TEud&=!^ zNBhF$voOb zP^e%G=cM|5uisrTQ%GC7(Pf4&nMWd~{ug_jfA?E4!8!$0-0x|4anYI2Q6DgHb!ct81_JGjL0_hr z59u4;#_z>;X!6D4{KyY76G5CEyTs^teltHUvM6{IsTMaLzIh?J8Q!3{Z^w8tMQT@? z-dYv%?PJfGz+|<8XwEMSTk(t0HbN%nkT&kqT&Ar#+Hkl9cXaW4%~VCcr;UAomxZ?X za9;@YXjXWx-UI^ubmj_1yVa=)J;YEf{RWb!BR%**dlaP!JMJgmE+y8!F&)^~eQBTGP*&Ka{ zPafm}fA#wH$>~{oJUaTv>G0?(8jtN^ww|u?yG_1a9DNtf=N49NvenwUo2=K_?7M$% zubE}D`@7S{ESqny{4iPNlf}lu-FZHpY%*(n@!_ZQix1Yr+B&@$9-lp~>2f}Mt}<&m zpIHw$Cggh+pHDW^&rfOnl5aj+v;6uxTV=Sh$|f^wx?F6subXwh*8FQWOS0ANJYQtv zB`ljypW5V>)>vyJ{L@XbxL^I^VO!gY#-gDNHt1bBUOaRbcaKt7aNJdLy_($I!Up5b zYO20P(uK-_`u}cy^Nw^Yv!3m}dP? zIGr!onYCFOHe`_RQBzU?hZ&o_wv`>jOzX8(f^ALeL+{Ww%I2fVm$#F3Mh4ieah1)% zr-w{Gf4=#)L+yv?WVbE|be89f=;5aSD5a$)@W07TW_^NGd5i*tUM4{2NkN-4N_%$WTd_nAepnW$z4iClUvktSn&F39@t7o2(V2`5o{&uOCrY3{R}v*9kR4A929T(Rq|=jnUip_Qn0}r)+h^?DX4Rc65pechmOC zs6cZ%nNRP*EyS!QupO<#Qt4=y()m0?>@n6x)W9=uK4;oT>CogB6B5bRR=&`;fHoq- z94=bsp2nUhtp_=oFDIM+9j{l}ANSc}`mOmu*Q@32R+Puz_lwQig6+OPxkn87(Yl(f zHu;3E;^ivCU!x2LxLIKNj~4v>2kR%?F~;f~@VqiL-^0{p_#(g22eH0~HEdYrhmPip z^OokmXD{?#oIj;*c(oTL?(SC0ulX%GpSGHY(fXdQ*Cu(Mzdzzi0X6gMCkT-^EDGD1 z_@2!=+b3yjdoVs<+-&cjWQ*knH`|Lx4MOj-GD4{#7uc|i$)lRe`>JaMkA|N3V|-CP zn72z+4+DQA`q?G>Y)wA9=AKQ|qu+I@g4|`L%gto|pPoaUUd8)>yLE{C7D(U0@$M%1 z${eXj^UtT%Exqcc zth%4S5JhS~=_nb$ecFXb?U4S)0xVOc407z{HHFYGlhy3aaz%MWr$fv5Bi!=3*V*^k z_oT0$+h^}GQSaFQm-Ex6XSh2o2vLiCzVZr<$Hj7y+4^dXC^0XthrrlmN*gF@87jw& z1eK1}eS_!e#mS`!e#gUKu0HBE;^XC6<@`=*oJlJXrP=KrqT1?rs3tR@^?0*d-fyUk zFwKZA!EtwcpR=oRsrz8ZGoLMyp{%RV6r#r2rlaHIDE<>_EX}02^5jb6`SEZcE}u`n z))g>%BvUfCRK43HjGyPz)$;T5b~#_(d~2;ACwm=5H&8{lrR#>W+A}5LqG;ACy`y-K zbG3r-Dr@Wbcsl&``GFMu?AMN_vy&+R)15P-I{DSw;i2zb+H?W zMSgpKYb~@WxkC1!T|YN5sH1jtyl*bL%O*uCeDbVNI^N5iTIbr7f!Wh)pN-Kfzxmt` z((%>D=R9|H#%b4GN!Oov+-)W4>G5a}9ciAeLhHA8m+AQGGDX^qE{@M^>)mPc?qhs; zadJ95zKD;Jz%T#R_`MZf(~Et)FJ0wt7WdP6w%p{iYzq{+&Tq6XX3ur4aKvT6mE&^O z65zFfU_l@}(Gn1)01JGFL&|9h$Xb9gCAsIe1XLy9I{{PNZEJ8V0gpk2<9aOtuM$uJ z6Ta)V59wC|(iN`6Ia>ljExN6H7&N_Qk2K9W$ z^SJCBvoa_U;eOz^x8RjMU4dgN-rj;&=9E6;LDyEivZi9fk;-kG``4NkihGXg@ao_7 z8NutWg7dV};3_wuyK59`AIwnnNF2xm`RaB2ZghD*xVjt2ajw z;V3>zqa;cWO^!X{hdMRI_bxJZfD2G&6<0G?dbb=l-g?y4gn!?-Bc2GJ* z7&?mLXpp2K`o%CBhS5og{yan{9S*NT>LT$m3{Be>n#L(KT~1i^9pQ*1$8W>&+rhz4 z2cdLAP=|-kffJ|@9AFaH+;MDn;5hgq^l|Ou%7GNOo#KZUvCs+iZ=q9v&+&iAwZcLP zZXY1X*!Xmua#O`^7vD2SK-lI1t^+MEaL>oSj!>t-wlF-0^5{ z5xR_aR(`#+@UgQH;uRkVhdX|tF^>ky-G%wqu&|4{s|fQrjnb%tId}lTlO!%MXFZsk zWg}up8UuDv62r?a4q3hVL(_{f6bSOEjE!;v-)PUo6 z)=LGBkgY%TdgnkQ*w%bd1{J+Oa6{CzeGQ##?H^!&=#T-TDD!XtDIlqA!$lAlD@lMA z2f%Gs9#1gG#fOXMQ|1T_Vn92N;3-WE&-HyxjPyxjY%{@ghzUsyzfF>~O@=f=2mlw1 zkAn=N_I2EXJI4j?vcNrg6!thy{tvJ_#`zh14j!jMao{_))TraW2wYfN(lz8tUxU#Z zAyUzKq6hjxoPk+1_%-n26NY?$iw|m;vrpubS>nULt(0Dr7Do}y4FH8=v_q{Er$11;0zD(ArFwavqx(N~bl*yILTVd8| z@&Z9Z^TMe%cdJYz5P?wM+T`^Dk))yp1O)w2yD6)Ggdk;Dp-@+H zumTBm;L1JY{9)q-n5p=L$^O8-9S%SQJO~ZiPrNfw?xm>U;q`I08+H*S7c*N z0TH4VbL_yx5K&c~te}9X4&c`8K&}7v* z1ZH@k;zibP6#ZC7_e=95d{+eEl0JODNc>8O+WZvHD+1{KgMt8FwSR!+IE1b`q=#CM z;733w_^S|vb#%Yl-?hsaz(s&yC1rp`dVkp{JOtl@zs~&fS?G)1=&Rl6Q%aCkN{An0 zgedF(M;U?6;$;~@F3EQig7EN*PO(7;{umpWCqRb4qY^LFMOOs|YW^k}Br5q`RB*vW z7g5F4JC{UI+cfYi8lVW_l8OV}f%;CpuPHqW68Hi1Y7#^P*Z(WUfp4n+o<1a&5SiFP9zZ5WY?qBFj75H(h8spzGrYR2b-XPmpVK9~2^H3JCgo1HBCvl0cOL8mtS@ z3UUXW(mA4N{e5!)6bMcDm0!UBR0i1H7nnjI>VqE@BkH^;=@y0!_{+L5(!B+#dAu=r zVf0}`6m$lCcp<84xXl4Yahk!PjuYs%DGK|+R>MzKU|j;#(E#1Lpbr%Wx;MahwkpAV zzfl2j>dsVwz85M~d6+z56a@4cp^u`^g)XKufDb{>uE>XlBv84&jzYx&T=Ws##ozgXg3G?wJ&zi@Bsz{OM3mqkwYsBYH3|r+vU1#Ue$B zR^;Cm0@4>Q1{_o4Q~c#2T!gwY3D3h?VGAg9WoD?M}4jfE^KKV1>}bqK&Ti80NA zasaT6w9o^-2!M7vek$$_be;TOqg>{9gaPUY(oV(qHtEqwLcz^( zCAW~oih%dccRgR}4=m8A%4z~2U9UOc;Tkwk&-7f4u+zvcRHdO}N7(_g&|FcGxzyh^c;W6UWU5tbN5k(jp9dnjErs@9tWEY#E3GJ)3zkdB9XgAsKUZ#5B816{Q70bb^n!yQjNCxW|0 z#!ixM_$J4u@2kSS4&R~^zLBiyO-&rN$9wUvQO*nFPZb>8MZ>_!P29rAxQ^dQ+jviB zn>x>oYE7V|`JUKf04u61D%FwqG2kQjhN0^cE)dNm?(31gDvL*4ibm>S67C(K@uv%# z*ehK@6f=80v6t2G*b{nZeNS*INihNKWv0X;&uLJg8t38P93aJ_i1&zBLWwGY zx_m3ZQjWa~QY_Kc%E3B_uE;x@qt-oeG%6+QBQ?bKnu9lYnVll7OW( zC&yb;&PDvOb0#v17AUM#oSw%1h-HQKohu4Sv#JEF1hx1c8ylE>GFI5h|4eU&g*>xX zN879RmBrH=FKi4bl!y=Jb2@(TP4lO@Qxr;ULS&A$?+g|?`R1V4;@^UI`8zhWGB8XG V(2aIrVqlO*bDT5-GuW*l0sz&!I Date: Fri, 20 Dec 2024 20:18:41 -0500 Subject: [PATCH 10/20] =?UTF-8?q?=E2=9C=85=20config=20proper=20process=20c?= =?UTF-8?q?ontainers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nextflow.config | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index bd78ed8..86a038f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,6 +16,15 @@ report { overwrite = true } +process { + withName: 'runAmber|runCobalt|binCobalt|runPurple' { + container = 'papaemmelab/purple:v0.1.1' + } + withName: 'runSage' { + container = 'quay.io/biocontainers/hmftools-sage:3.4.4--hdfd78af_0' + } +} + singularity { enabled = true autoCleanUp = true @@ -33,10 +42,7 @@ profiles { enabled = true runOptions = "--entrypoint ''" } - process { - executor = 'local' - container = 'papaemmelab/purple:v0.1.1' - } + process.executor = 'local' } // Executors From 8ea13107725222556b6f35a2b378e7f586a5029a Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Fri, 20 Dec 2024 20:30:15 -0500 Subject: [PATCH 11/20] =?UTF-8?q?=E2=9C=85=20remove=20binCobalt=20snapshot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 1 + nextflow.config | 4 +++- tests/main.binCobalt.nf.test | 1 - tests/main.binCobalt.nf.test.snap | 31 ------------------------------- 4 files changed, 4 insertions(+), 33 deletions(-) delete mode 100644 tests/main.binCobalt.nf.test.snap diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2dded6f..68787b2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,7 @@ jobs: - name: Pull Docker image and cache run: | docker pull papaemmelab/purple:v0.1.1 + docker pull quay.io/biocontainers/hmftools-sage:3.4.4--hdfd78af_0 - name: Run unit tests of each process for Amber, Cobalt, binCobalt, Sage, Purple run: | nf-test test tests/main.*.nf.test diff --git a/nextflow.config b/nextflow.config index 86a038f..1651be4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ report { } process { + executor = 'local' withName: 'runAmber|runCobalt|binCobalt|runPurple' { container = 'papaemmelab/purple:v0.1.1' } @@ -37,17 +38,18 @@ singularity { profiles { // Computing Environment cloud { + process.executor = 'local' singularity.enabled = false docker { enabled = true runOptions = "--entrypoint ''" } - process.executor = 'local' } // Executors local { process.executor = 'local' + singularity.enabled = false } lsf { diff --git a/tests/main.binCobalt.nf.test b/tests/main.binCobalt.nf.test index 8e51201..2ba843a 100644 --- a/tests/main.binCobalt.nf.test +++ b/tests/main.binCobalt.nf.test @@ -23,7 +23,6 @@ nextflow_process { then { assert process.success - assert snapshot(process.out).match() assert process.trace.tasks().size() == 1 // check expected files diff --git a/tests/main.binCobalt.nf.test.snap b/tests/main.binCobalt.nf.test.snap deleted file mode 100644 index 18b9ea4..0000000 --- a/tests/main.binCobalt.nf.test.snap +++ /dev/null @@ -1,31 +0,0 @@ -{ - "Should bin Cobalt without failures": { - "content": [ - { - "0": [ - "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,f235d46690097a000fdf7ac31bb53108" - ], - "1": [ - "TEST_TUMOR.cobalt.ratio.pcf:md5,d4b0aa911e08a551e123bef8816d1256" - ], - "2": [ - "TEST_NORMAL.cobalt.ratio.pcf:md5,b488cbdc3b5d17e5ab1e5712f405bb90" - ], - "cobalt_normal_ratio_pcf": [ - "TEST_NORMAL.cobalt.ratio.pcf:md5,b488cbdc3b5d17e5ab1e5712f405bb90" - ], - "cobalt_tumor_ratio_pcf": [ - "TEST_TUMOR.cobalt.ratio.pcf:md5,d4b0aa911e08a551e123bef8816d1256" - ], - "cobalt_tumor_ratio_tsv": [ - "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,f235d46690097a000fdf7ac31bb53108" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2024-12-20T19:50:22.208765876" - } -} \ No newline at end of file From 4c19cec50843b2c809a5d41a9938bce77a48bf5a Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Fri, 20 Dec 2024 21:51:50 -0500 Subject: [PATCH 12/20] =?UTF-8?q?=F0=9F=90=9B=20fix=20test=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.nf | 5 +++-- tests/nextflow.config | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index b490b73..f48f947 100644 --- a/main.nf +++ b/main.nf @@ -10,6 +10,7 @@ params.ensemblDataDir = "/data/common/ensembl_data" params.diploidRegions = "/data/copy_number/DiploidRegions.37.bed.gz" params.normal = null params.normalBam = null +params.somaticVcf = null params.binProbes = 0 params.binLogR = 0 params.minPurity = 0.08 @@ -27,8 +28,8 @@ def logMessage = """\ tumorBam : ${params.tumorBam} """ logMessage += (params.normal && params.normalBam) ? """\ - normal : ${params.normal} - normalBam : ${params.normalBam} + normal : ${params.normal} + normalBam : ${params.normalBam} """ : "" logMessage += """\ somaticVcf : ${params.somaticVcf} diff --git a/tests/nextflow.config b/tests/nextflow.config index 9c65b3d..3bf9c1c 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -11,7 +11,7 @@ params { tumorBam = "${projectDir}/tests/data/input/tumor.bam" normalBam = "${projectDir}/tests/data/input/normal.bam" refGenome = "${projectDir}/tests/data/ref/reference.fasta" - ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data_original" + ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" outdir = "${projectDir}/tests/outdir" binProbes = 100 binLogR = 0.5 From ab793b9c015c1c697bac2ab9f988bf7775c2e34a Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Sat, 21 Dec 2024 00:48:23 -0500 Subject: [PATCH 13/20] =?UTF-8?q?=E2=9C=85=20fix=20unit=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 3 --- ...mble_gene_data.csv => ensembl_gene_data.csv} | 0 tests/main.nf.test | 6 ++++++ tests/main.runPurple.nf.test | 16 ---------------- tests/main.runSage.nf.test | 6 +++++- tests/nextflow.config | 17 +++++++++++++++-- 6 files changed, 26 insertions(+), 22 deletions(-) rename tests/data/ref/ensembl_data/{ensemble_gene_data.csv => ensembl_gene_data.csv} (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 68787b2..326fda4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,4 @@ jobs: - name: Run unit tests of each process for Amber, Cobalt, binCobalt, Sage, Purple run: | nf-test test tests/main.*.nf.test - - name: Run pipeline end-to-end test - run: | - nf-test test tests/main.nf.test \ No newline at end of file diff --git a/tests/data/ref/ensembl_data/ensemble_gene_data.csv b/tests/data/ref/ensembl_data/ensembl_gene_data.csv similarity index 100% rename from tests/data/ref/ensembl_data/ensemble_gene_data.csv rename to tests/data/ref/ensembl_data/ensembl_gene_data.csv diff --git a/tests/main.nf.test b/tests/main.nf.test index cf1ecef..eaad19d 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -4,6 +4,12 @@ nextflow_pipeline { script "main.nf" test("Should run Main.nf with failures on sage.") { + + when { + params { + sage_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz" + } + } then { with(workflow) { // Sage and Purple dont work in tests. diff --git a/tests/main.runPurple.nf.test b/tests/main.runPurple.nf.test index c6fee2e..51f1794 100644 --- a/tests/main.runPurple.nf.test +++ b/tests/main.runPurple.nf.test @@ -37,22 +37,6 @@ nextflow_process { assert process.exitStatus == 1 assert process.errorReport.contains("[ERROR]") assert process.errorReport.contains("com.hartwig.hmftools.purple.segment.PurpleSegmentFactory.create(PurpleSegmentFactory.java:96") - - // assert process.success - // assert snapshot(process.out).match() - // assert process.trace.tasks().size() == 1 - - // // check expected files - // with(process.out) { - // assert purple_segment_png.size() == 1 - // assert purple_copynumber_png.size() == 1 - // assert purple_circos_png.size() == 1 - // assert purple_map_png.size() == 1 - // assert purple_input_png.size() == 1 - // assert purple_purity_range_png.size() == 1 - // } } - } - } diff --git a/tests/main.runSage.nf.test b/tests/main.runSage.nf.test index d696dbe..a591f90 100644 --- a/tests/main.runSage.nf.test +++ b/tests/main.runSage.nf.test @@ -7,6 +7,9 @@ nextflow_process { test("Should run Sage with failure reading bam") { when { + params { + ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" + } process { """ input[0] = Channel.fromPath(params.tumorBam) @@ -19,7 +22,8 @@ nextflow_process { // until better test data is added, expect a known error assert process.failed assert process.exitStatus == 1 - assert process.errorReport.contains("[ERROR] cannot find sequence index for chromosome 1:100000-200000 in bam header") + assert process.errorReport.contains('Cannot invoke "java.util.List.stream()" because "geneDataList" is null') + // assert process.errorReport.contains("[ERROR] cannot find sequence index for chromosome 1:100000-200000 in bam header") // assert process.success // assert snapshot(process.out).match() diff --git a/tests/nextflow.config b/tests/nextflow.config index 3bf9c1c..1e2d2a2 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -11,8 +11,21 @@ params { tumorBam = "${projectDir}/tests/data/input/tumor.bam" normalBam = "${projectDir}/tests/data/input/normal.bam" refGenome = "${projectDir}/tests/data/ref/reference.fasta" - ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" outdir = "${projectDir}/tests/outdir" binProbes = 100 binLogR = 0.5 -} \ No newline at end of file +} + +profiles { + stub_sage { + process { + withName: runSage { + stub = true + script: + """ + cp ${params.sage_vcf} \$PWD + """ + } + } + } +} From d043554864cecc88cfd1bab5d87325d40d4fd326 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Mon, 23 Dec 2024 14:42:37 -0500 Subject: [PATCH 14/20] =?UTF-8?q?=F0=9F=94=A7=20add=20test=20config=20for?= =?UTF-8?q?=20matched=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 4 +- tests/coverage.main.html | 48 +++++++++++++++++++ tests/main.nf.test | 15 +++--- tests/nextflow.config | 31 ------------ .../nf-test.matched.config | 2 +- tests/nf-test.unmatched.config | 6 +++ tests/params.matched.config | 11 +++++ tests/params.unmatched.config | 11 +++++ 8 files changed, 87 insertions(+), 41 deletions(-) create mode 100644 tests/coverage.main.html delete mode 100644 tests/nextflow.config rename nf-test.config => tests/nf-test.matched.config (62%) create mode 100644 tests/nf-test.unmatched.config create mode 100644 tests/params.matched.config create mode 100644 tests/params.unmatched.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 326fda4..7b982bc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,6 @@ jobs: docker pull papaemmelab/purple:v0.1.1 docker pull quay.io/biocontainers/hmftools-sage:3.4.4--hdfd78af_0 - name: Run unit tests of each process for Amber, Cobalt, binCobalt, Sage, Purple + id: run_test run: | - nf-test test tests/main.*.nf.test - \ No newline at end of file + nf-test test --ci --config tests/nf-test.matched.config --coverage \ No newline at end of file diff --git a/tests/coverage.main.html b/tests/coverage.main.html new file mode 100644 index 0000000..0f546fb --- /dev/null +++ b/tests/coverage.main.html @@ -0,0 +1,48 @@ + + + + + + + + + + +
+

Coverage Report

+

This report was generated by nf-test on Mon Dec 23 11:38:07 EST 2024.

+

+ Coverage: 100% +

+ +
+
+
+ +
+ + + + + + + + + + + + + + + + +
FileCovered
main.nftrue
+
+ + \ No newline at end of file diff --git a/tests/main.nf.test b/tests/main.nf.test index eaad19d..06e22e8 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -2,25 +2,26 @@ nextflow_pipeline { name "Test Workflow main.nf" script "main.nf" + autoSort false test("Should run Main.nf with failures on sage.") { when { params { - sage_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz" + ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" } } then { with(workflow) { - // Sage and Purple dont work in tests. + // Tests are expected to fail at Sage and Purple. Best we can do assert failed assert exitStatus == 1 assert trace.tasks().size() == 4 - assert workflow.trace.succeeded().size() == 3 - assert workflow.trace.failed().size() == 1 - - // See assertions examples nf-core: https://nf-co.re/docs/contributing/nf-test/assertions - assert workflow.stdout.any{ it.contains("[ERROR] cannot find sequence index for chromosome 1:100000-200000 in bam header") } + assert workflow.trace.succeeded().size() == 2 // Amber, Cobalt + assert workflow.trace.failed().size() == 2 // Sage, Purple + assert workflow.stdout.any{ + it.contains('Cannot invoke "java.util.List.stream()" because "geneDataList" is null') + } } } } diff --git a/tests/nextflow.config b/tests/nextflow.config deleted file mode 100644 index 1e2d2a2..0000000 --- a/tests/nextflow.config +++ /dev/null @@ -1,31 +0,0 @@ -/* -======================================================================================== - Nextflow config file for running tests -======================================================================================== -*/ - -params { - memory = '4 GB' - tumor = "TEST_TUMOR" - normal = "TEST_NORMAL" - tumorBam = "${projectDir}/tests/data/input/tumor.bam" - normalBam = "${projectDir}/tests/data/input/normal.bam" - refGenome = "${projectDir}/tests/data/ref/reference.fasta" - outdir = "${projectDir}/tests/outdir" - binProbes = 100 - binLogR = 0.5 -} - -profiles { - stub_sage { - process { - withName: runSage { - stub = true - script: - """ - cp ${params.sage_vcf} \$PWD - """ - } - } - } -} diff --git a/nf-test.config b/tests/nf-test.matched.config similarity index 62% rename from nf-test.config rename to tests/nf-test.matched.config index b279792..d4baa9e 100644 --- a/nf-test.config +++ b/tests/nf-test.matched.config @@ -1,6 +1,6 @@ config { testsDir "tests" workDir ".nf-test" - configFile "tests/nextflow.config" profile "cloud" + configFile "tests/params.matched.config" } \ No newline at end of file diff --git a/tests/nf-test.unmatched.config b/tests/nf-test.unmatched.config new file mode 100644 index 0000000..74feeb5 --- /dev/null +++ b/tests/nf-test.unmatched.config @@ -0,0 +1,6 @@ +config { + testsDir "tests" + workDir ".nf-test" + profile "cloud" + configFile "tests/params.unmatched.config" +} \ No newline at end of file diff --git a/tests/params.matched.config b/tests/params.matched.config new file mode 100644 index 0000000..a181284 --- /dev/null +++ b/tests/params.matched.config @@ -0,0 +1,11 @@ +params { + memory = '4 GB' + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/tests/data/input/normal.bam" + refGenome = "${projectDir}/tests/data/ref/reference.fasta" + outdir = "${projectDir}/tests/outdir" + binProbes = 100 + binLogR = 0.5 +} diff --git a/tests/params.unmatched.config b/tests/params.unmatched.config new file mode 100644 index 0000000..d856942 --- /dev/null +++ b/tests/params.unmatched.config @@ -0,0 +1,11 @@ +params { + memory = '4 GB' + tumor = "TEST_TUMOR" + normal = null + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = null + refGenome = "${projectDir}/tests/data/ref/reference.fasta" + outdir = "${projectDir}/tests/outdir" + binProbes = 100 + binLogR = 0.5 +} From 0b0ce75d037023d6e41070588d03f1df175e8a3f Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Mon, 23 Dec 2024 18:58:43 -0500 Subject: [PATCH 15/20] =?UTF-8?q?=F0=9F=94=A7=20add=20match=20and=20unmatc?= =?UTF-8?q?hed=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- assets/NO_FILE | 0 main.nf | 129 ++++++++---------- nextflow.config | 2 +- .../nf-test.matched.config => nf-test.config | 2 +- run.sh => run_matched.sh | 5 +- run_unmatched.sh | 40 ++++++ tests/main.binCobalt.nf.test | 5 +- tests/main.nf.test | 40 +++++- tests/main.runAmber.nf.test | 40 +++++- tests/main.runAmber.nf.test.snap | 39 +++++- tests/main.runCobalt.nf.test | 43 +++++- tests/main.runCobalt.nf.test.snap | 31 ----- tests/main.runPurple.nf.test | 40 +++++- tests/main.runSage.nf.test | 9 +- ...{params.matched.config => nextflow.config} | 4 +- tests/nf-test.unmatched.config | 6 - tests/params.unmatched.config | 11 -- 17 files changed, 294 insertions(+), 152 deletions(-) create mode 100644 assets/NO_FILE rename tests/nf-test.matched.config => nf-test.config (62%) rename run.sh => run_matched.sh (92%) create mode 100755 run_unmatched.sh delete mode 100644 tests/main.runCobalt.nf.test.snap rename tests/{params.matched.config => nextflow.config} (100%) delete mode 100644 tests/nf-test.unmatched.config delete mode 100644 tests/params.unmatched.config diff --git a/assets/NO_FILE b/assets/NO_FILE new file mode 100644 index 0000000..e69de29 diff --git a/main.nf b/main.nf index f48f947..d61fba6 100644 --- a/main.nf +++ b/main.nf @@ -9,13 +9,17 @@ params.gcProfile = "/data/copy_number/GC_profile.1000bp.37.cnp" params.ensemblDataDir = "/data/common/ensembl_data" params.diploidRegions = "/data/copy_number/DiploidRegions.37.bed.gz" params.normal = null -params.normalBam = null -params.somaticVcf = null +params.normalBam = "assets/NO_FILE" params.binProbes = 0 params.binLogR = 0 params.minPurity = 0.08 params.maxPurity = 1.0 +// https://nextflow-io.github.io/patterns/optional-input +def NO_FILE = file("${projectDir}/assets/NO_FILE") +params.normal = null +params.normalBam = NO_FILE + def logMessage = """\ HMFTOOLS - PURPLE @@ -32,7 +36,6 @@ logMessage += (params.normal && params.normalBam) ? """\ normalBam : ${params.normalBam} """ : "" logMessage += """\ - somaticVcf : ${params.somaticVcf} outdir : ${params.outdir} cores : ${params.cores} memory : ${params.memory} @@ -53,7 +56,7 @@ log.info(logMessage.stripIndent()) // See https://github.com/hartwigmedical/hmftools/tree/master/amber process runAmber { tag "AMBER on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") - publishDir "${params.outdir}/amber", mode: 'copy' + publishDir "${params.outdir}/amber", mode: 'copy', overwrite: false, cache: true cpus params.cores memory params.memory time '1h' @@ -66,7 +69,7 @@ process runAmber { path "${params.tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv path "${params.tumor}.amber.baf.pcf", emit: amber_baf_pcf path "${params.tumor}.amber.qc", emit: amber_qc - path "${params.tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf + path "${params.tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf, optional: true script: def reference_args = params.normal ? """\\ @@ -74,31 +77,20 @@ process runAmber { -reference_bam ${normalBam} """ : "" """ - if [ -f "${params.outdir}/amber/${params.tumor}.amber.baf.tsv.gz" ] && \\ - [ -f "${params.outdir}/amber/${params.tumor}.amber.baf.pcf" ] && \\ - [ -f "${params.outdir}/amber/${params.tumor}.amber.qc" ] && \\ - [ -f "${params.outdir}/amber/${params.tumor}.amber.contamination.vcf.gz" ]; then - echo "Output files already exist. Skipping amber execution." - ln -fs ${params.outdir}/amber/${params.tumor}.amber.baf.tsv.gz ${params.tumor}.amber.baf.tsv.gz - ln -fs ${params.outdir}/amber/${params.tumor}.amber.baf.pcf ${params.tumor}.amber.baf.pcf - ln -fs ${params.outdir}/amber/${params.tumor}.amber.qc ${params.tumor}.amber.qc - ln -fs ${params.outdir}/amber/${params.tumor}.amber.contamination.vcf.gz ${params.tumor}.amber.contamination.vcf.gz - else - amber \\ - -tumor ${params.tumor} \\ - -tumor_bam ${tumorBam} ${reference_args} \\ - -output_dir \$PWD \\ - -threads ${params.cores} \\ - -loci ${params.loci} \\ - -ref_genome_version V${params.genomeVersion} - fi + amber \\ + -tumor ${params.tumor} \\ + -tumor_bam ${tumorBam} ${reference_args} \\ + -output_dir \$PWD \\ + -threads ${params.cores} \\ + -loci ${params.loci} \\ + -ref_genome_version V${params.genomeVersion} """.stripIndent() } // See https://github.com/hartwigmedical/hmftools/tree/master/cobalt#mandatory-arguments process runCobalt { tag "COBALT on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") - publishDir "${params.outdir}/cobalt", mode: 'copy' + publishDir "${params.outdir}/cobalt", mode: 'copy', overwrite: false, cache: true cpus params.cores memory params.memory time '1h' @@ -119,27 +111,17 @@ process runCobalt { -tumor_only_diploid_bed ${params.diploidRegions}""" """ - if [ -f "${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.tsv.gz" ] && \ - [ -f "${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.pcf" ]; then - echo "Output files already exist. Skipping cobalt execution." - ln -s ${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.tsv.gz ${params.tumor}.cobalt.ratio.tsv.gz - ln -s ${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.pcf ${params.tumor}.cobalt.ratio.pcf - - if [ -f "${params.outdir}/cobalt/${params.normal}.cobalt.ratio.pcf" ]; then - ln -s ${params.outdir}/cobalt/${params.normal}.cobalt.ratio.pcf ${params.normal}.cobalt.ratio.pcf - fi - else - cobalt \\ - -tumor ${params.tumor} \\ - -tumor_bam ${tumorBam} ${reference_args} \\ - -output_dir \$PWD \\ - -threads ${params.cores} \\ - -gc_profile ${params.gcProfile} - fi + cobalt \\ + -tumor ${params.tumor} \\ + -tumor_bam ${tumorBam} ${reference_args} \\ + -output_dir \$PWD \\ + -threads ${params.cores} \\ + -gc_profile ${params.gcProfile} """.stripIndent() } process binCobalt { + // Only when Unmatched tag "COBALT BIN on ${params.tumor}" + (params.normal ? " and ${params.normal}" : "") publishDir "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR", mode: 'copy' cpus 1 @@ -149,33 +131,23 @@ process binCobalt { input: path cobalt_tumor_ratio_tsv path cobalt_tumor_ratio_pcf - path cobalt_normal_ratio_pcf output: path "${params.tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv path "${params.tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf - path "${params.normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf script: """ - # Bin Cobalt Tumor Probes bin_cobalt.py \\ --in_pcf ${cobalt_tumor_ratio_pcf} \\ --bin_probes ${params.binProbes} \\ --bin_log_r ${params.binLogR} - - # Bin Cobalt Normal probes - if [ -f "${cobalt_normal_ratio_pcf}" ]; then - bin_cobalt.py \\ - --in_pcf ${cobalt_normal_ratio_pcf} \\ - --bin_probes ${params.binProbes} \\ - --bin_log_r ${params.binLogR} - fi """.stripIndent() } // See https://github.com/hartwigmedical/hmftools/blob/master/sage/README.md#usage process runSage { + // Only when matched tag "SAGE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") publishDir "${params.outdir}/sage", mode: 'copy' cpus params.cores @@ -222,7 +194,7 @@ process runPurple { path cobalt_tumor_ratio_pcf path cobalt_normal_ratio_pcf path cobalt_path - path sage_vcf + path somatic_vcf output: path "${params.tumor}.purple.purity.tsv", emit: purple_purity_tsv @@ -242,8 +214,8 @@ process runPurple { script: def reference_args = params.normal ? """\\ -reference ${params.normal}""" : "" - def somatic_vcf_args = params.normal && sage_vcf ? """\\ - -somatic_vcf ${sage_vcf}""" : "" + def somatic_vcf_args = params.normal && somatic_vcf ? """\\ + -somatic_vcf ${somatic_vcf}""" : "" """ purple \\ @@ -265,34 +237,45 @@ process runPurple { workflow { // Input Bams - tumorBam = Channel.fromPath(params.tumorBam) - normalBam = Channel.fromPath(params.normalBam) + tumorBam = params.tumorBam ? Channel.fromPath(params.tumorBam) : error("tumorBam is required") + normalBam = params.normalBam ? Channel.fromPath(params.normalBam) : Channel.empty() - // Run Amber, Cobalt and Sage + // Run Amber and Cobalt amberOutput = runAmber(tumorBam, normalBam) cobaltOutput = runCobalt(tumorBam, normalBam) - sageOutput = runSage(tumorBam, normalBam) - - // Bin Cobalt if expected - postCobaltOutput = (params.normalBam) && (params.binProbes != 0 || params.binLogR != 0) - ? binCobalt(cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf) - : cobaltOutput - - cobaltOutdir = (params.normalBam) && (params.binProbes != 0 || params.binLogR != 0) - ? "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR" - : "${params.outdir}/cobalt" + + cobaltOutdir = "${params.outdir}/cobalt" + + // // Bin Cobalt, if unmatched and if any bin param is provided + // if ((!params.normal) && (params.binProbes != 0 || params.binLogR != 0)) { + // cobaltOutdir = "${params.outdir}/cobalt" + // } else { + // cobaltOutput = binCobalt(cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf) + // cobaltOutput.cobalt_normal_ratio_pcf = NO_FILE + // cobaltOutdir = "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR" + // } + + // Run Sage, if matched + if (params.normal) { + sageOutput = runSage(tumorBam, normalBam) + somatic_vcf = sageOutput.sage_vcf + // amber_contamination_vcf = amberOutput.amber_contamination_vcf + } else { + somatic_vcf = null + // amber_contamination_vcf = null + } // Run Purple runPurple( amberOutput.amber_baf_tsv, amberOutput.amber_baf_pcf, amberOutput.amber_qc, - amberOutput.amber_contamination_vcf, - postCobaltOutput.cobalt_tumor_ratio_tsv, - postCobaltOutput.cobalt_tumor_ratio_pcf, - postCobaltOutput.cobalt_normal_ratio_pcf, + amberOutput.amber_contamination_vcf ?: NO_FILE, + cobaltOutput.cobalt_tumor_ratio_tsv, + cobaltOutput.cobalt_tumor_ratio_pcf, + cobaltOutput.cobalt_normal_ratio_pcf ?: NO_FILE, cobaltOutdir, - sageOutput.sage_vcf, + somatic_vcf ?: NO_FILE, ) } diff --git a/nextflow.config b/nextflow.config index 1651be4..f11effc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,7 +62,7 @@ profiles { slurm { process { executor = 'slurm' - queue = 'componc_cpu' + queue = 'componc_cpu,componc_gpu' } } diff --git a/tests/nf-test.matched.config b/nf-test.config similarity index 62% rename from tests/nf-test.matched.config rename to nf-test.config index d4baa9e..c3decdf 100644 --- a/tests/nf-test.matched.config +++ b/nf-test.config @@ -2,5 +2,5 @@ config { testsDir "tests" workDir ".nf-test" profile "cloud" - configFile "tests/params.matched.config" + configFile "tests/nextflow.config" } \ No newline at end of file diff --git a/run.sh b/run_matched.sh similarity index 92% rename from run.sh rename to run_matched.sh index 6567068..2dc7118 100755 --- a/run.sh +++ b/run_matched.sh @@ -13,13 +13,16 @@ OUTDIR=/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_matched REFGENOME=/data1/papaemme/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta GENOMEVERSION=37 -REFDIR=/data1/papaemme/isabl/ref/homo_sapiens/37/hmftools/v6_0/ref/37 +REFDIR=/data1/papaemme/isabl/ref/homo_sapiens/37/hmftools/v5_33 LOCI=${REFDIR}/copy_number/GermlineHetPon.37.vcf.gz GCPROFILE=${REFDIR}/copy_number/GC_profile.1000bp.37.cnp DIPLOIDREGIONS=${REFDIR}/copy_number/DiploidRegions.37.bed.gz ENSEMBLDATADIR=${REFDIR}/common/ensembl_data CIRCOS=/opt/circos-0.69-2/bin/circos +mkdir -p ${OUTDIR} +cd ${OUTDIR} + nextflow run \ -profile slurm \ ${NF_PURPLE} \ diff --git a/run_unmatched.sh b/run_unmatched.sh new file mode 100755 index 0000000..e9c9d5e --- /dev/null +++ b/run_unmatched.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +ROOT=/data1/papaemme + +TUMOR=IID_H211025_T01_01_WG01 +TUMOR_BAM=`isabl get-bams ${TUMOR}` + +NF_PURPLE=/data1/papaemme/isabl/home/svc_papaemme_bot/dev/nf-purple/main.nf +OUTDIR=/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_unmatched/ +REFGENOME=/data1/papaemme/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta +GENOMEVERSION=37 + +REFDIR=/data1/papaemme/isabl/ref/homo_sapiens/37/hmftools/v5_33 +LOCI=${REFDIR}/copy_number/GermlineHetPon.37.vcf.gz +GCPROFILE=${REFDIR}/copy_number/GC_profile.1000bp.37.cnp +DIPLOIDREGIONS=${REFDIR}/copy_number/DiploidRegions.37.bed.gz +ENSEMBLDATADIR=${REFDIR}/common/ensembl_data +CIRCOS=/opt/circos-0.69-2/bin/circos + +mkdir -p ${OUTDIR} + +cd ${OUTDIR} + +nextflow run \ + -profile slurm \ + ${NF_PURPLE} \ + --tumor ${TUMOR} \ + --tumorBam ${TUMOR_BAM} \ + --outdir ${OUTDIR} \ + --loci ${LOCI} \ + --gcProfile ${GCPROFILE} \ + --diploidRegions ${DIPLOIDREGIONS} \ + --ensemblDataDir ${ENSEMBLDATADIR} \ + --genomeVersion ${GENOMEVERSION} \ + --refGenome ${REFGENOME} \ + --circos ${CIRCOS} \ + --cores 16 \ + --memory '64G' \ + --binProbes 100 \ + --binLogR 0.5 \ No newline at end of file diff --git a/tests/main.binCobalt.nf.test b/tests/main.binCobalt.nf.test index 2ba843a..44f41ac 100644 --- a/tests/main.binCobalt.nf.test +++ b/tests/main.binCobalt.nf.test @@ -4,19 +4,17 @@ nextflow_process { script "main.nf" process "binCobalt" - test("Should bin Cobalt without failures") { + test("Should bin Cobalt output in unmatched setting") { when { params { cobalt_tumor_ratio_tsv = "${projectDir}/tests/data/cobalt/TEST_TUMOR.cobalt.ratio.tsv.gz" cobalt_tumor_ratio_pcf = "${projectDir}/tests/data/cobalt/TEST_TUMOR.cobalt.ratio.pcf" - cobalt_normal_ratio_pcf = "${projectDir}/tests/data/cobalt/TEST_NORMAL.cobalt.ratio.pcf" } process { """ input[0] = Channel.fromPath(params.cobalt_tumor_ratio_tsv) input[1] = Channel.fromPath(params.cobalt_tumor_ratio_pcf) - input[2] = Channel.fromPath(params.cobalt_normal_ratio_pcf) """ } } @@ -29,7 +27,6 @@ nextflow_process { with(process.out) { assert cobalt_tumor_ratio_tsv.size() == 1 assert cobalt_tumor_ratio_pcf.size() == 1 - assert cobalt_normal_ratio_pcf.size() == 1 } } } diff --git a/tests/main.nf.test b/tests/main.nf.test index 06e22e8..03026b7 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -4,21 +4,51 @@ nextflow_pipeline { script "main.nf" autoSort false - test("Should run Main.nf with failures on sage.") { + // test("Should run Main.nf in matched setting. [Error expected in Sage]") { + // when { + // params { + // tumor = "TEST_TUMOR" + // normal = "TEST_NORMAL" + // tumorBam = "${projectDir}/tests/data/input/tumor.bam" + // normalBam = "${projectDir}/tests/data/input/normal.bam" + // ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" + // sage_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz" + // } + // } + // then { + // with(workflow) { + // assert failed + // assert exitStatus == 1 + // assert trace.tasks().size() == 3 // Amber, Cobalt, Sage + // assert trace.succeeded().size() == 2 // Amber, Cobalt + // assert trace.failed().size() == 1 // Sage + // assert stdout.any{ + // it.contains('Cannot invoke "java.util.List.stream()" because "geneDataList" is null') + // } + // } + // } + // } + test("Should run Main.nf in unmatched setting. [Error expected in Purple]") { when { params { + tumor = "TEST_TUMOR" + normal = null + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/assets/NO_FILE" ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" } } then { with(workflow) { - // Tests are expected to fail at Sage and Purple. Best we can do + println "Workflow status: ${status}" + println "Workflow trace: ${trace}" + assert failed assert exitStatus == 1 - assert trace.tasks().size() == 4 - assert workflow.trace.succeeded().size() == 2 // Amber, Cobalt - assert workflow.trace.failed().size() == 2 // Sage, Purple + assert trace.tasks().size() == 4 // Amber, Cobalt, binCobalt, Purple + assert workflow.trace.succeeded().size() == 3 // Amber, Cobalt, binCobalt + assert workflow.trace.failed().size() == 1 // Purple assert workflow.stdout.any{ it.contains('Cannot invoke "java.util.List.stream()" because "geneDataList" is null') } diff --git a/tests/main.runAmber.nf.test b/tests/main.runAmber.nf.test index caa460e..12b9110 100644 --- a/tests/main.runAmber.nf.test +++ b/tests/main.runAmber.nf.test @@ -4,9 +4,14 @@ nextflow_process { script "main.nf" process "runAmber" - test("Should run Amber without failures") { - + test("Should run Amber in matched setting") { when { + params { + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/tests/data/input/normal.bam" + } process { """ input[0] = Channel.fromPath(params.tumorBam) @@ -14,7 +19,6 @@ nextflow_process { """ } } - then { assert process.success assert snapshot(process.out).match() @@ -28,7 +32,35 @@ nextflow_process { assert amber_qc.size() == 1 } } - } + test("Should run Amber in unmatched setting") { + when { + params { + tumor = "TEST_TUMOR" + normal = null + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/assets/NO_FILE" + } + process { + """ + input[0] = Channel.fromPath(params.tumorBam) + input[1] = Channel.fromPath(params.normalBam) + """ + } + } + then { + assert process.success + assert snapshot(process.out).match() + assert process.trace.tasks().size() == 1 + + // check expected files + with(process.out) { + assert amber_baf_pcf.size() == 1 + assert amber_baf_tsv.size() == 1 + assert amber_contamination_vcf.size() == 1 + assert amber_qc.size() == 1 + } + } + } } diff --git a/tests/main.runAmber.nf.test.snap b/tests/main.runAmber.nf.test.snap index 055e124..337a652 100644 --- a/tests/main.runAmber.nf.test.snap +++ b/tests/main.runAmber.nf.test.snap @@ -1,5 +1,5 @@ { - "Should run Amber without failures": { + "Should run Amber in matched setting": { "content": [ { "0": [ @@ -32,6 +32,41 @@ "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2024-12-20T19:50:26.411257553" + "timestamp": "2024-12-23T18:58:20.35175274" + }, + "Should run Amber in unmatched setting": { + "content": [ + { + "0": [ + "TEST_TUMOR.amber.baf.tsv.gz:md5,0c55ac207db6eb8e1f138a044990f4d1" + ], + "1": [ + "TEST_TUMOR.amber.baf.pcf:md5,c889d8a8b2668045d35eed1fd8e2c7ce" + ], + "2": [ + "TEST_TUMOR.amber.qc:md5,9547e68ba672e08d6bd49e1e6f654ee8" + ], + "3": [ + + ], + "amber_baf_pcf": [ + "TEST_TUMOR.amber.baf.pcf:md5,c889d8a8b2668045d35eed1fd8e2c7ce" + ], + "amber_baf_tsv": [ + "TEST_TUMOR.amber.baf.tsv.gz:md5,0c55ac207db6eb8e1f138a044990f4d1" + ], + "amber_contamination_vcf": [ + + ], + "amber_qc": [ + "TEST_TUMOR.amber.qc:md5,9547e68ba672e08d6bd49e1e6f654ee8" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-23T18:58:34.075044669" } } \ No newline at end of file diff --git a/tests/main.runCobalt.nf.test b/tests/main.runCobalt.nf.test index e254bbd..85068ef 100644 --- a/tests/main.runCobalt.nf.test +++ b/tests/main.runCobalt.nf.test @@ -4,9 +4,14 @@ nextflow_process { script "main.nf" process "runCobalt" - test("Should run Cobalt without failures") { - - when { + test("Should run Cobalt in matched setting") { + when { + params { + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/tests/data/input/normal.bam" + } process { """ input[0] = Channel.fromPath(params.tumorBam) @@ -14,7 +19,6 @@ nextflow_process { """ } } - then { assert process.success assert snapshot(process.out).match() @@ -27,7 +31,34 @@ nextflow_process { assert cobalt_normal_ratio_pcf.size() == 1 } } - } - + + test("Should run Cobalt in unmatched setting") { + when { + params { + tumor = "TEST_TUMOR" + normal = null + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/assets/NO_FILE" + } + process { + """ + input[0] = Channel.fromPath(params.tumorBam) + input[1] = Channel.fromPath(params.normalBam) + """ + } + } + then { + assert process.success + assert snapshot(process.out).match() + assert process.trace.tasks().size() == 1 + + // check expected files + with(process.out) { + assert cobalt_tumor_ratio_tsv.size() == 1 + assert cobalt_tumor_ratio_pcf.size() == 1 + assert cobalt_normal_ratio_pcf.size() == 0 + } + } + } } diff --git a/tests/main.runCobalt.nf.test.snap b/tests/main.runCobalt.nf.test.snap deleted file mode 100644 index 318c875..0000000 --- a/tests/main.runCobalt.nf.test.snap +++ /dev/null @@ -1,31 +0,0 @@ -{ - "Should run Cobalt without failures": { - "content": [ - { - "0": [ - "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,f235d46690097a000fdf7ac31bb53108" - ], - "1": [ - "TEST_TUMOR.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" - ], - "2": [ - "TEST_NORMAL.cobalt.ratio.pcf:md5,4f035e0ee8e4290d64d24152ea5387b8" - ], - "cobalt_normal_ratio_pcf": [ - "TEST_NORMAL.cobalt.ratio.pcf:md5,4f035e0ee8e4290d64d24152ea5387b8" - ], - "cobalt_tumor_ratio_pcf": [ - "TEST_TUMOR.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" - ], - "cobalt_tumor_ratio_tsv": [ - "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,f235d46690097a000fdf7ac31bb53108" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2024-12-20T19:50:30.545363223" - } -} \ No newline at end of file diff --git a/tests/main.runPurple.nf.test b/tests/main.runPurple.nf.test index 51f1794..11e8fba 100644 --- a/tests/main.runPurple.nf.test +++ b/tests/main.runPurple.nf.test @@ -4,7 +4,7 @@ nextflow_process { script "main.nf" process "runPurple" - test("Should run Purple with failures on the segmentation step") { + test("Should run Purple in matched setting. [Error expected on segmentation]") { when { params { amber_baf_tsv = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.baf.tsv.gz" @@ -39,4 +39,42 @@ nextflow_process { assert process.errorReport.contains("com.hartwig.hmftools.purple.segment.PurpleSegmentFactory.create(PurpleSegmentFactory.java:96") } } + + test("Should run Purple in unmatched setting. [Error expected on segmentation]") { + when { + params { + normal = null + normalBam = "${projectDir}/assets/NO_FILE" + amber_baf_tsv = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.baf.tsv.gz" + amber_baf_pcf = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.baf.pcf" + amber_qc = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.qc" + amber_contamination_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.contamination.vcf.gz" + cobalt_tumor_ratio_tsv = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_TUMOR.cobalt.ratio.tsv.gz" + cobalt_tumor_ratio_pcf = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_TUMOR.cobalt.ratio.pcf" + cobalt_normal_ratio_pcf = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_NORMAL.cobalt.ratio.pcf" + cobalt_path = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR" + sage_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz" + } + process { + """ + input[0] = Channel.fromPath(params.amber_baf_tsv) + input[1] = Channel.fromPath(params.amber_baf_pcf) + input[2] = Channel.fromPath(params.amber_qc) + input[3] = Channel.fromPath(params.amber_contamination_vcf) + input[4] = Channel.fromPath(params.cobalt_tumor_ratio_tsv) + input[5] = Channel.fromPath(params.cobalt_tumor_ratio_pcf) + input[6] = Channel.fromPath(params.cobalt_normal_ratio_pcf) + input[7] = Channel.fromPath(params.cobalt_path) + input[8] = Channel.fromPath(params.sage_vcf) + """ + } + } + then { + // Best we can do. Purple fails with test data when trying to create the segmentation plots. + assert process.failed + assert process.exitStatus == 1 + assert process.errorReport.contains("[ERROR]") + assert process.errorReport.contains("com.hartwig.hmftools.purple.segment.PurpleSegmentFactory.create(PurpleSegmentFactory.java:96") + } + } } diff --git a/tests/main.runSage.nf.test b/tests/main.runSage.nf.test index a591f90..2ef1e34 100644 --- a/tests/main.runSage.nf.test +++ b/tests/main.runSage.nf.test @@ -4,10 +4,13 @@ nextflow_process { script "main.nf" process "runSage" - test("Should run Sage with failure reading bam") { - + test("Should run Sage in matched setting [Error expected]") { when { params { + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/tests/data/input/normal.bam" ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" } process { @@ -17,7 +20,6 @@ nextflow_process { """ } } - then { // until better test data is added, expect a known error assert process.failed @@ -35,5 +37,4 @@ nextflow_process { // } } } - } diff --git a/tests/params.matched.config b/tests/nextflow.config similarity index 100% rename from tests/params.matched.config rename to tests/nextflow.config index a181284..49940af 100644 --- a/tests/params.matched.config +++ b/tests/nextflow.config @@ -1,11 +1,11 @@ params { - memory = '4 GB' tumor = "TEST_TUMOR" normal = "TEST_NORMAL" tumorBam = "${projectDir}/tests/data/input/tumor.bam" normalBam = "${projectDir}/tests/data/input/normal.bam" - refGenome = "${projectDir}/tests/data/ref/reference.fasta" outdir = "${projectDir}/tests/outdir" + refGenome = "${projectDir}/tests/data/ref/reference.fasta" binProbes = 100 binLogR = 0.5 + memory = '4 GB' } diff --git a/tests/nf-test.unmatched.config b/tests/nf-test.unmatched.config deleted file mode 100644 index 74feeb5..0000000 --- a/tests/nf-test.unmatched.config +++ /dev/null @@ -1,6 +0,0 @@ -config { - testsDir "tests" - workDir ".nf-test" - profile "cloud" - configFile "tests/params.unmatched.config" -} \ No newline at end of file diff --git a/tests/params.unmatched.config b/tests/params.unmatched.config deleted file mode 100644 index d856942..0000000 --- a/tests/params.unmatched.config +++ /dev/null @@ -1,11 +0,0 @@ -params { - memory = '4 GB' - tumor = "TEST_TUMOR" - normal = null - tumorBam = "${projectDir}/tests/data/input/tumor.bam" - normalBam = null - refGenome = "${projectDir}/tests/data/ref/reference.fasta" - outdir = "${projectDir}/tests/outdir" - binProbes = 100 - binLogR = 0.5 -} From 323499898e9e41d5c73d8d5d36786e2ef69bc6f1 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Mon, 23 Dec 2024 19:00:45 -0500 Subject: [PATCH 16/20] =?UTF-8?q?=F0=9F=94=A7=20update=20amber=20unmatched?= =?UTF-8?q?=20test=20with=20no=20contamination=20vcf?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/main.runAmber.nf.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/main.runAmber.nf.test b/tests/main.runAmber.nf.test index 12b9110..063f484 100644 --- a/tests/main.runAmber.nf.test +++ b/tests/main.runAmber.nf.test @@ -28,8 +28,8 @@ nextflow_process { with(process.out) { assert amber_baf_pcf.size() == 1 assert amber_baf_tsv.size() == 1 - assert amber_contamination_vcf.size() == 1 assert amber_qc.size() == 1 + assert amber_contamination_vcf.size() == 1 } } } @@ -58,8 +58,8 @@ nextflow_process { with(process.out) { assert amber_baf_pcf.size() == 1 assert amber_baf_tsv.size() == 1 - assert amber_contamination_vcf.size() == 1 assert amber_qc.size() == 1 + assert amber_contamination_vcf.size() == 0 } } } From aeac31faa52ee75061c8c048de8d8e9675ccad66 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Tue, 24 Dec 2024 00:04:14 -0500 Subject: [PATCH 17/20] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20modules=20and=20work?= =?UTF-8?q?flow=20working=20for=20matched/unmatched?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.nf | 258 +++--------------- modules/amber.nf | 33 +++ modules/cobalt.nf | 57 ++++ modules/purple.nf | 60 ++++ modules/sage.nf | 31 +++ nextflow.config | 20 ++ run_unmatched.sh | 5 +- tests/main.binCobalt.nf.test | 2 +- tests/main.runCobalt.nf.test | 2 +- tests/main.runCobalt.nf.test.snap | 60 ++++ tests/main.runPurple.nf.test | 2 +- tests/main.runSage.nf.test | 2 +- ...nAmber.nf.test => module.runAmber.nf.test} | 2 +- ...test.snap => module.runAmber.nf.test.snap} | 0 14 files changed, 307 insertions(+), 227 deletions(-) create mode 100644 modules/amber.nf create mode 100644 modules/cobalt.nf create mode 100644 modules/purple.nf create mode 100644 modules/sage.nf create mode 100644 tests/main.runCobalt.nf.test.snap rename tests/{main.runAmber.nf.test => module.runAmber.nf.test} (98%) rename tests/{main.runAmber.nf.test.snap => module.runAmber.nf.test.snap} (100%) diff --git a/main.nf b/main.nf index d61fba6..7cf9594 100644 --- a/main.nf +++ b/main.nf @@ -1,25 +1,8 @@ -params.cores = 1 -params.memory = '4 GB' - -// Params Defaults -params.genomeVersion = 37 -params.circos = "/opt/circos-0.69-2/bin/circos" -params.loci = "/data/copy_number/GermlineHetPon.37.vcf.gz" -params.gcProfile = "/data/copy_number/GC_profile.1000bp.37.cnp" -params.ensemblDataDir = "/data/common/ensembl_data" -params.diploidRegions = "/data/copy_number/DiploidRegions.37.bed.gz" -params.normal = null -params.normalBam = "assets/NO_FILE" -params.binProbes = 0 -params.binLogR = 0 -params.minPurity = 0.08 -params.maxPurity = 1.0 - -// https://nextflow-io.github.io/patterns/optional-input -def NO_FILE = file("${projectDir}/assets/NO_FILE") -params.normal = null -params.normalBam = NO_FILE - +include { runAmber } from './modules/amber' +include { runCobalt } from './modules/cobalt' +include { binCobalt } from './modules/cobalt' +include { runSage } from './modules/sage' +include { runPurple } from './modules/purple' def logMessage = """\ HMFTOOLS - PURPLE @@ -53,216 +36,53 @@ logMessage += """\ log.info(logMessage.stripIndent()) -// See https://github.com/hartwigmedical/hmftools/tree/master/amber -process runAmber { - tag "AMBER on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") - publishDir "${params.outdir}/amber", mode: 'copy', overwrite: false, cache: true - cpus params.cores - memory params.memory - time '1h' - - input: - path tumorBam - path normalBam - - output: - path "${params.tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv - path "${params.tumor}.amber.baf.pcf", emit: amber_baf_pcf - path "${params.tumor}.amber.qc", emit: amber_qc - path "${params.tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf, optional: true - - script: - def reference_args = params.normal ? """\\ - -reference ${params.normal} \\ - -reference_bam ${normalBam} """ : "" - - """ - amber \\ - -tumor ${params.tumor} \\ - -tumor_bam ${tumorBam} ${reference_args} \\ - -output_dir \$PWD \\ - -threads ${params.cores} \\ - -loci ${params.loci} \\ - -ref_genome_version V${params.genomeVersion} - """.stripIndent() -} - -// See https://github.com/hartwigmedical/hmftools/tree/master/cobalt#mandatory-arguments -process runCobalt { - tag "COBALT on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") - publishDir "${params.outdir}/cobalt", mode: 'copy', overwrite: false, cache: true - cpus params.cores - memory params.memory - time '1h' - - input: - path tumorBam - path normalBam - - output: - path "${params.tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv - path "${params.tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf - path "${params.normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true - - script: - def reference_args = params.normal ? """\\ - -reference ${params.normal} \\ - -reference_bam ${normalBam}""" : """\\ - -tumor_only_diploid_bed ${params.diploidRegions}""" - - """ - cobalt \\ - -tumor ${params.tumor} \\ - -tumor_bam ${tumorBam} ${reference_args} \\ - -output_dir \$PWD \\ - -threads ${params.cores} \\ - -gc_profile ${params.gcProfile} - """.stripIndent() -} - -process binCobalt { - // Only when Unmatched - tag "COBALT BIN on ${params.tumor}" + (params.normal ? " and ${params.normal}" : "") - publishDir "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR", mode: 'copy' - cpus 1 - memory '4G' - time '1h' - - input: - path cobalt_tumor_ratio_tsv - path cobalt_tumor_ratio_pcf - - output: - path "${params.tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv - path "${params.tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf +def printHelpMessage() { + helpMessage = """\ + PURPLE is a purity ploidy estimator primarily designed for whole genome + sequenced (WGS) data. It combines B-allele frequency (BAF) from AMBER, + read depth ratios from COBALT, somatic variants and structural variants + to estimate the purity and copy number profile of a tumor sample. PURPLE + supports both grch 37 and 38 reference assemblies. - script: + For more info please see: + https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md """ - bin_cobalt.py \\ - --in_pcf ${cobalt_tumor_ratio_pcf} \\ - --bin_probes ${params.binProbes} \\ - --bin_log_r ${params.binLogR} - """.stripIndent() + log.info(helpMessage.stripIndent()) } -// See https://github.com/hartwigmedical/hmftools/blob/master/sage/README.md#usage -process runSage { - // Only when matched - tag "SAGE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") - publishDir "${params.outdir}/sage", mode: 'copy' - cpus params.cores - memory params.memory - time '4h' - - input: - path tumorBam - path normalBam - - output: - path "${params.tumor}_vs_${params.normal}.vcf.gz", emit: sage_vcf - - script: - """ - sage \\ - -tumor ${params.tumor} \\ - -tumor_bam ${tumorBam} \\ - -reference ${params.normal} \\ - -reference_bam ${normalBam} \\ - -ref_genome ${params.refGenome} \\ - -ref_genome_version ${params.genomeVersion} \\ - -output_vcf \$PWD/${params.tumor}_vs_${params.normal}.vcf.gz \\ - -threads ${params.cores} \\ - -ensembl_data_dir ${params.ensemblDataDir} - """.stripIndent() -} - - -// See https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md#arguments -process runPurple { - tag "PURPLE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") - publishDir "${params.outdir}/purple/purple_${params.minPurity}_${params.maxPurity}", mode: 'copy' - cpus params.cores - memory params.memory - time '1h' - - input: - path amber_baf_tsv - path amber_baf_pcf - path amber_qc - path amber_contamination_vcf - path cobalt_tumor_ratio_tsv - path cobalt_tumor_ratio_pcf - path cobalt_normal_ratio_pcf - path cobalt_path - path somatic_vcf - - output: - path "${params.tumor}.purple.purity.tsv", emit: purple_purity_tsv - path "${params.tumor}.purple.qc", emit: purple_qc - path "${params.tumor}.purple.purity.range.tsv", emit: purple_purity_range_tsv - path "${params.tumor}.purple.cnv.somatic.tsv", emit: purple_cnv_somatic_tsv - path "${params.tumor}.purple.cnv.gene.tsv", emit: purple_cnv_gene_tsv - path "${params.tumor}.purple.segment.tsv", emit: purple_segment_tsv - path "${params.tumor}.purple.somatic.clonality.tsv", emit: purple_somatic_clonality_tsv - path "plot/${params.tumor}.segment.png", emit: purple_segment_png - path "plot/${params.tumor}.copynumber.png", emit: purple_copynumber_png - path "plot/${params.tumor}.circos.png", emit: purple_circos_png - path "plot/${params.tumor}.map.png", emit: purple_map_png - path "plot/${params.tumor}.input.png", emit: purple_input_png - path "plot/${params.tumor}.purity.range.png", emit: purple_purity_range_png - - script: - def reference_args = params.normal ? """\\ - -reference ${params.normal}""" : "" - def somatic_vcf_args = params.normal && somatic_vcf ? """\\ - -somatic_vcf ${somatic_vcf}""" : "" - - """ - purple \\ - -tumor ${params.tumor} ${reference_args} \\ - -amber ${params.outdir}/amber \\ - -cobalt ${cobalt_path} \\ - -output_dir \$PWD ${somatic_vcf_args} \\ - -gc_profile ${params.gcProfile} \\ - -ref_genome ${params.refGenome} \\ - -ref_genome_version ${params.genomeVersion} \\ - -ensembl_data_dir ${params.ensemblDataDir} \\ - -circos ${params.circos} \\ - -min_purity ${params.minPurity} \\ - -max_purity ${params.maxPurity} - - rsync -a --no-links \$PWD/ ${params.outdir}/purple/ - """.stripIndent() -} +def NO_FILE = "${projectDir}/assets/NO_FILE" workflow { + if ( + params.help == true + || params.tumor == false + || params.tumorBam == false + || params.refGenome == false + ){ + printHelpMessage() + exit 1 + } // Input Bams - tumorBam = params.tumorBam ? Channel.fromPath(params.tumorBam) : error("tumorBam is required") - normalBam = params.normalBam ? Channel.fromPath(params.normalBam) : Channel.empty() + tumorTuple = tuple(params.tumor, params.tumorBam, params.tumorBam + ".bai") + normalTuple = tuple(params.normal, params.normalBam, params.normalBam + ".bai") // Run Amber and Cobalt - amberOutput = runAmber(tumorBam, normalBam) - cobaltOutput = runCobalt(tumorBam, normalBam) - - cobaltOutdir = "${params.outdir}/cobalt" + amberOutput = runAmber(tumorTuple, normalTuple) + cobaltOutput = runCobalt(tumorTuple, normalTuple) - // // Bin Cobalt, if unmatched and if any bin param is provided - // if ((!params.normal) && (params.binProbes != 0 || params.binLogR != 0)) { - // cobaltOutdir = "${params.outdir}/cobalt" - // } else { - // cobaltOutput = binCobalt(cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf) - // cobaltOutput.cobalt_normal_ratio_pcf = NO_FILE - // cobaltOutdir = "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR" - // } + // Bin Cobalt, if unmatched and if any bin param is provided + cobaltOutdir = "${params.outdir}/cobalt" + if ((!params.normal) && (params.binProbes != 0 || params.binLogR != 0)) { + cobaltOutput = binCobalt(cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf) + cobaltOutdir = "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR" + } // Run Sage, if matched if (params.normal) { - sageOutput = runSage(tumorBam, normalBam) + sageOutput = runSage(tumorTuple, normalTuple) somatic_vcf = sageOutput.sage_vcf - // amber_contamination_vcf = amberOutput.amber_contamination_vcf } else { - somatic_vcf = null - // amber_contamination_vcf = null + somatic_vcf = file(NO_FILE) } // Run Purple @@ -270,12 +90,10 @@ workflow { amberOutput.amber_baf_tsv, amberOutput.amber_baf_pcf, amberOutput.amber_qc, - amberOutput.amber_contamination_vcf ?: NO_FILE, cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf, - cobaltOutput.cobalt_normal_ratio_pcf ?: NO_FILE, cobaltOutdir, - somatic_vcf ?: NO_FILE, + somatic_vcf, ) } diff --git a/modules/amber.nf b/modules/amber.nf new file mode 100644 index 0000000..53c3832 --- /dev/null +++ b/modules/amber.nf @@ -0,0 +1,33 @@ +// See https://github.com/hartwigmedical/hmftools/tree/master/amber +process runAmber { + tag "AMBER on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") + publishDir "${params.outdir}", mode: 'copy' + cpus params.cores + memory params.memory + time '1h' + + input: + tuple val(tumor), path(tumorBam), path(tumorBai) + tuple val(normal), path(normalBam), path(normalBai) + + output: + path "amber/${tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv + path "amber/${tumor}.amber.baf.pcf", emit: amber_baf_pcf + path "amber/${tumor}.amber.qc", emit: amber_qc + path "amber/${tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf, optional: true + + script: + def reference_args = normal ? """\\ + -reference ${normal} \\ + -reference_bam ${normalBam} """ : "" + + """ + amber \\ + -tumor ${tumor} \\ + -tumor_bam ${tumorBam} ${reference_args} \\ + -output_dir amber \\ + -threads ${params.cores} \\ + -loci ${params.loci} \\ + -ref_genome_version V${params.genomeVersion} + """.stripIndent() +} \ No newline at end of file diff --git a/modules/cobalt.nf b/modules/cobalt.nf new file mode 100644 index 0000000..67982a5 --- /dev/null +++ b/modules/cobalt.nf @@ -0,0 +1,57 @@ +// See https://github.com/hartwigmedical/hmftools/tree/master/cobalt#mandatory-arguments +process runCobalt { + tag "COBALT on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") + publishDir "${params.outdir}", mode: 'copy' + cpus params.cores + memory params.memory + time '1h' + + input: + tuple val(tumor), path(tumorBam), path(tumorBai) + tuple val(normal), path(normalBam), path(normalBai) + + output: + path "cobalt/${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv + path "cobalt/${tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf + path "cobalt/${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true + + script: + def reference_args = normal ? """\\ + -reference ${normal} \\ + -reference_bam ${normalBam}""" : """\\ + -tumor_only_diploid_bed ${params.diploidRegions}""" + + """ + cobalt \\ + -tumor ${tumor} \\ + -tumor_bam ${tumorBam} ${reference_args} \\ + -output_dir cobalt \\ + -threads ${params.cores} \\ + -gc_profile ${params.gcProfile} + """.stripIndent() +} + +process binCobalt { + // Only when Unmatched + tag "COBALT BIN on ${params.tumor}" + (params.normal ? " and ${params.normal}" : "") + publishDir "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR", mode: 'copy' + cpus 1 + memory '4G' + time '1h' + + input: + path cobalt_tumor_ratio_tsv + path cobalt_tumor_ratio_pcf + + output: + path "${params.tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv + path "${params.tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf + + script: + """ + bin_cobalt.py \\ + --in_pcf ${cobalt_tumor_ratio_pcf} \\ + --bin_probes ${params.binProbes} \\ + --bin_log_r ${params.binLogR} + """.stripIndent() +} diff --git a/modules/purple.nf b/modules/purple.nf new file mode 100644 index 0000000..7e9678a --- /dev/null +++ b/modules/purple.nf @@ -0,0 +1,60 @@ +// See https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md#arguments +process runPurple { + tag "PURPLE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") + publishDir "${params.outdir}/purple/purple_${params.minPurity}_${params.maxPurity}", mode: 'copy' + cpus params.cores + memory params.memory + time '1h' + + input: + path amber_baf_tsv + path amber_baf_pcf + path amber_qc + path cobalt_tumor_ratio_tsv + path cobalt_tumor_ratio_pcf + path cobalt_path + path somatic_vcf + + output: + path "${params.tumor}.purple.cnv.gene.tsv" + path "${params.tumor}.purple.cnv.somatic.tsv" + path "${params.tumor}.purple.germline.deletion.tsv", optional: true + path "${params.tumor}.purple.purity.range.tsv" + path "${params.tumor}.purple.purity.tsv" + path "${params.tumor}.purple.qc" + path "${params.tumor}.purple.segment.tsv" + path "${params.tumor}.purple.somatic.clonality.tsv" + path "${params.tumor}.purple.somatic.hist.tsv", optional: true + path "${params.tumor}.purple.somatic.vcf.gz", optional: true + path "${params.tumor}.purple.somatic.vcf.gz.tbi", optional: true + path "plot/${params.tumor}.circos.png" + path "plot/${params.tumor}.copynumber.png" + path "plot/${params.tumor}.input.png" + path "plot/${params.tumor}.map.png" + path "plot/${params.tumor}.purity.range.png" + path "plot/${params.tumor}.segment.png" + path "plot/${params.tumor}.somatic.clonality.png", optional: true + path "plot/${params.tumor}.somatic.png", optional: true + path "plot/${params.tumor}.somatic.rainfall.png", optional: true + + script: + def reference_args = params.normal ? """\\ + -reference ${params.normal}""" : "" + def somatic_vcf_args = params.normal && somatic_vcf ? """\\ + -somatic_vcf ${somatic_vcf}""" : "" + + """ + purple \\ + -tumor ${params.tumor} ${reference_args} ${somatic_vcf_args} \\ + -amber ${params.outdir}/amber \\ + -cobalt ${cobalt_path} \\ + -output_dir \$PWD \\ + -gc_profile ${params.gcProfile} \\ + -ref_genome ${params.refGenome} \\ + -ref_genome_version ${params.genomeVersion} \\ + -ensembl_data_dir ${params.ensemblDataDir} \\ + -circos ${params.circos} \\ + -min_purity ${params.minPurity} \\ + -max_purity ${params.maxPurity} + """.stripIndent() +} diff --git a/modules/sage.nf b/modules/sage.nf new file mode 100644 index 0000000..18a4643 --- /dev/null +++ b/modules/sage.nf @@ -0,0 +1,31 @@ +// See https://github.com/hartwigmedical/hmftools/blob/master/sage/README.md#usage +process runSage { + tag "SAGE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "") + publishDir "${params.outdir}", mode: 'copy' + cpus params.cores + memory params.memory + time '4h' + + input: + tuple val(tumor), path(tumorBam), path(tumorBai) + tuple val(normal), path(normalBam), path(normalBai) + + output: + path "sage/${tumor}_vs_${normal}.vcf.gz", emit: sage_vcf + + script: + """ + mkdir -p sage + + sage \\ + -tumor ${params.tumor} \\ + -tumor_bam ${tumorBam} \\ + -reference ${params.normal} \\ + -reference_bam ${normalBam} \\ + -ref_genome ${params.refGenome} \\ + -ref_genome_version ${params.genomeVersion} \\ + -output_vcf sage/${tumor}_vs_${normal}.vcf.gz \\ + -threads ${params.cores} \\ + -ensembl_data_dir ${params.ensemblDataDir} + """.stripIndent() +} diff --git a/nextflow.config b/nextflow.config index f11effc..55f25fc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,3 +1,23 @@ +// Params Defaults +params { + cores = 1 + memory = '4 GB' + genomeVersion = 37 + circos = "/opt/circos-0.69-2/bin/circos" + loci = "/data/copy_number/GermlineHetPon.37.vcf.gz" + gcProfile = "/data/copy_number/GC_profile.1000bp.37.cnp" + ensemblDataDir = "/data/common/ensembl_data" + diploidRegions = "/data/copy_number/DiploidRegions.37.bed.gz" + binProbes = 0 + binLogR = 0 + minPurity = 0.08 + maxPurity = 1.0 + help = false + normal = null + // https://nextflow-io.github.io/patterns/optional-input + normalBam = "${projectDir}/assets/NO_FILE" +} + // Metrics Files dag { enabled = true diff --git a/run_unmatched.sh b/run_unmatched.sh index e9c9d5e..bed32ed 100755 --- a/run_unmatched.sh +++ b/run_unmatched.sh @@ -6,7 +6,7 @@ TUMOR=IID_H211025_T01_01_WG01 TUMOR_BAM=`isabl get-bams ${TUMOR}` NF_PURPLE=/data1/papaemme/isabl/home/svc_papaemme_bot/dev/nf-purple/main.nf -OUTDIR=/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_unmatched/ +OUTDIR=/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_unmatched REFGENOME=/data1/papaemme/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta GENOMEVERSION=37 @@ -37,4 +37,5 @@ nextflow run \ --cores 16 \ --memory '64G' \ --binProbes 100 \ - --binLogR 0.5 \ No newline at end of file + --binLogR 0.5 \ + -resume diff --git a/tests/main.binCobalt.nf.test b/tests/main.binCobalt.nf.test index 44f41ac..8dc5f51 100644 --- a/tests/main.binCobalt.nf.test +++ b/tests/main.binCobalt.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process binCobalt" - script "main.nf" + script "modules/cobalt.nf" process "binCobalt" test("Should bin Cobalt output in unmatched setting") { diff --git a/tests/main.runCobalt.nf.test b/tests/main.runCobalt.nf.test index 85068ef..ce478b5 100644 --- a/tests/main.runCobalt.nf.test +++ b/tests/main.runCobalt.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process runCobalt" - script "main.nf" + script "modules/cobalt.nf" process "runCobalt" test("Should run Cobalt in matched setting") { diff --git a/tests/main.runCobalt.nf.test.snap b/tests/main.runCobalt.nf.test.snap new file mode 100644 index 0000000..f7d2f8e --- /dev/null +++ b/tests/main.runCobalt.nf.test.snap @@ -0,0 +1,60 @@ +{ + "Should run Cobalt in matched setting": { + "content": [ + { + "0": [ + "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,f235d46690097a000fdf7ac31bb53108" + ], + "1": [ + "TEST_TUMOR.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" + ], + "2": [ + "TEST_NORMAL.cobalt.ratio.pcf:md5,4f035e0ee8e4290d64d24152ea5387b8" + ], + "cobalt_normal_ratio_pcf": [ + "TEST_NORMAL.cobalt.ratio.pcf:md5,4f035e0ee8e4290d64d24152ea5387b8" + ], + "cobalt_tumor_ratio_pcf": [ + "TEST_TUMOR.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" + ], + "cobalt_tumor_ratio_tsv": [ + "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,f235d46690097a000fdf7ac31bb53108" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-23T18:58:50.156199847" + }, + "Should run Cobalt in unmatched setting": { + "content": [ + { + "0": [ + "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,87cf8451b04e4373fc4dd7ccda3e6afe" + ], + "1": [ + "TEST_TUMOR.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" + ], + "2": [ + + ], + "cobalt_normal_ratio_pcf": [ + + ], + "cobalt_tumor_ratio_pcf": [ + "TEST_TUMOR.cobalt.ratio.pcf:md5,7fa4142d0d102e1f3b633d61d154d9c0" + ], + "cobalt_tumor_ratio_tsv": [ + "TEST_TUMOR.cobalt.ratio.tsv.gz:md5,87cf8451b04e4373fc4dd7ccda3e6afe" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-23T18:59:18.731393807" + } +} \ No newline at end of file diff --git a/tests/main.runPurple.nf.test b/tests/main.runPurple.nf.test index 11e8fba..5ff1290 100644 --- a/tests/main.runPurple.nf.test +++ b/tests/main.runPurple.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process runPurple" - script "main.nf" + script "modules/purple.nf" process "runPurple" test("Should run Purple in matched setting. [Error expected on segmentation]") { diff --git a/tests/main.runSage.nf.test b/tests/main.runSage.nf.test index 2ef1e34..a9f7385 100644 --- a/tests/main.runSage.nf.test +++ b/tests/main.runSage.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process runSage" - script "main.nf" + script "modules/sage.nf" process "runSage" test("Should run Sage in matched setting [Error expected]") { diff --git a/tests/main.runAmber.nf.test b/tests/module.runAmber.nf.test similarity index 98% rename from tests/main.runAmber.nf.test rename to tests/module.runAmber.nf.test index 063f484..5138a9b 100644 --- a/tests/main.runAmber.nf.test +++ b/tests/module.runAmber.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process runAmber" - script "main.nf" + script "modules/amber.nf" process "runAmber" test("Should run Amber in matched setting") { diff --git a/tests/main.runAmber.nf.test.snap b/tests/module.runAmber.nf.test.snap similarity index 100% rename from tests/main.runAmber.nf.test.snap rename to tests/module.runAmber.nf.test.snap From ad5ff86f691d9745bffbff20c3c236eeecf445ee Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Tue, 24 Dec 2024 00:07:07 -0500 Subject: [PATCH 18/20] =?UTF-8?q?=F0=9F=9F=A3=20define=20purple=20files=20?= =?UTF-8?q?with=20glob=20wildcards?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/purple.nf | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/modules/purple.nf b/modules/purple.nf index 7e9678a..c26329b 100644 --- a/modules/purple.nf +++ b/modules/purple.nf @@ -16,26 +16,8 @@ process runPurple { path somatic_vcf output: - path "${params.tumor}.purple.cnv.gene.tsv" - path "${params.tumor}.purple.cnv.somatic.tsv" - path "${params.tumor}.purple.germline.deletion.tsv", optional: true - path "${params.tumor}.purple.purity.range.tsv" - path "${params.tumor}.purple.purity.tsv" - path "${params.tumor}.purple.qc" - path "${params.tumor}.purple.segment.tsv" - path "${params.tumor}.purple.somatic.clonality.tsv" - path "${params.tumor}.purple.somatic.hist.tsv", optional: true - path "${params.tumor}.purple.somatic.vcf.gz", optional: true - path "${params.tumor}.purple.somatic.vcf.gz.tbi", optional: true - path "plot/${params.tumor}.circos.png" - path "plot/${params.tumor}.copynumber.png" - path "plot/${params.tumor}.input.png" - path "plot/${params.tumor}.map.png" - path "plot/${params.tumor}.purity.range.png" - path "plot/${params.tumor}.segment.png" - path "plot/${params.tumor}.somatic.clonality.png", optional: true - path "plot/${params.tumor}.somatic.png", optional: true - path "plot/${params.tumor}.somatic.rainfall.png", optional: true + path "${params.tumor}.purple.*", emit: purple_outfiles + path "plot/${params.tumor}.*.png", emit: purple_plots script: def reference_args = params.normal ? """\\ From 1b47c29bc2b4cd75b7a2b38eae70c40d378916f3 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Tue, 24 Dec 2024 00:36:31 -0500 Subject: [PATCH 19/20] =?UTF-8?q?=E2=9C=85=20fix=20unit=20and=20e2e=20test?= =?UTF-8?q?s=20for=20matched/unmatched=20cases?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 5 +- tests/coverage.main.html | 48 --------------- tests/main.nf.test | 58 ------------------- ...obalt.nf.test => module.binCobalt.nf.test} | 0 tests/module.runAmber.nf.test | 9 ++- ...obalt.nf.test => module.runCobalt.nf.test} | 8 +-- ...est.snap => module.runCobalt.nf.test.snap} | 0 ...urple.nf.test => module.runPurple.nf.test} | 26 +++------ ...runSage.nf.test => module.runSage.nf.test} | 4 +- tests/workflow.main.nf.test | 53 +++++++++++++++++ 10 files changed, 74 insertions(+), 137 deletions(-) delete mode 100644 tests/coverage.main.html delete mode 100644 tests/main.nf.test rename tests/{main.binCobalt.nf.test => module.binCobalt.nf.test} (100%) rename tests/{main.runCobalt.nf.test => module.runCobalt.nf.test} (82%) rename tests/{main.runCobalt.nf.test.snap => module.runCobalt.nf.test.snap} (100%) rename tests/{main.runPurple.nf.test => module.runPurple.nf.test} (72%) rename tests/{main.runSage.nf.test => module.runSage.nf.test} (87%) create mode 100644 tests/workflow.main.nf.test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b982bc..304f988 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,6 @@ jobs: run: | docker pull papaemmelab/purple:v0.1.1 docker pull quay.io/biocontainers/hmftools-sage:3.4.4--hdfd78af_0 - - name: Run unit tests of each process for Amber, Cobalt, binCobalt, Sage, Purple - id: run_test + - name: Run unit tests for process module and main workflow run: | - nf-test test --ci --config tests/nf-test.matched.config --coverage \ No newline at end of file + nf-test test --ci --coverage \ No newline at end of file diff --git a/tests/coverage.main.html b/tests/coverage.main.html deleted file mode 100644 index 0f546fb..0000000 --- a/tests/coverage.main.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - - - - - - - -
-

Coverage Report

-

This report was generated by nf-test on Mon Dec 23 11:38:07 EST 2024.

-

- Coverage: 100% -

- -
-
-
- -
- - - - - - - - - - - - - - - - -
FileCovered
main.nftrue
-
- - \ No newline at end of file diff --git a/tests/main.nf.test b/tests/main.nf.test deleted file mode 100644 index 03026b7..0000000 --- a/tests/main.nf.test +++ /dev/null @@ -1,58 +0,0 @@ -nextflow_pipeline { - - name "Test Workflow main.nf" - script "main.nf" - autoSort false - - // test("Should run Main.nf in matched setting. [Error expected in Sage]") { - // when { - // params { - // tumor = "TEST_TUMOR" - // normal = "TEST_NORMAL" - // tumorBam = "${projectDir}/tests/data/input/tumor.bam" - // normalBam = "${projectDir}/tests/data/input/normal.bam" - // ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" - // sage_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz" - // } - // } - // then { - // with(workflow) { - // assert failed - // assert exitStatus == 1 - // assert trace.tasks().size() == 3 // Amber, Cobalt, Sage - // assert trace.succeeded().size() == 2 // Amber, Cobalt - // assert trace.failed().size() == 1 // Sage - // assert stdout.any{ - // it.contains('Cannot invoke "java.util.List.stream()" because "geneDataList" is null') - // } - // } - // } - // } - - test("Should run Main.nf in unmatched setting. [Error expected in Purple]") { - when { - params { - tumor = "TEST_TUMOR" - normal = null - tumorBam = "${projectDir}/tests/data/input/tumor.bam" - normalBam = "${projectDir}/assets/NO_FILE" - ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" - } - } - then { - with(workflow) { - println "Workflow status: ${status}" - println "Workflow trace: ${trace}" - - assert failed - assert exitStatus == 1 - assert trace.tasks().size() == 4 // Amber, Cobalt, binCobalt, Purple - assert workflow.trace.succeeded().size() == 3 // Amber, Cobalt, binCobalt - assert workflow.trace.failed().size() == 1 // Purple - assert workflow.stdout.any{ - it.contains('Cannot invoke "java.util.List.stream()" because "geneDataList" is null') - } - } - } - } -} diff --git a/tests/main.binCobalt.nf.test b/tests/module.binCobalt.nf.test similarity index 100% rename from tests/main.binCobalt.nf.test rename to tests/module.binCobalt.nf.test diff --git a/tests/module.runAmber.nf.test b/tests/module.runAmber.nf.test index 5138a9b..6d10036 100644 --- a/tests/module.runAmber.nf.test +++ b/tests/module.runAmber.nf.test @@ -14,8 +14,8 @@ nextflow_process { } process { """ - input[0] = Channel.fromPath(params.tumorBam) - input[1] = Channel.fromPath(params.normalBam) + input[0] = [params.tumor, "${params.tumorBam}", "${params.tumorBam}.bai"] + input[1] = [params.normal, "${params.normalBam}", "${params.normalBam}.bai"] """ } } @@ -33,7 +33,6 @@ nextflow_process { } } } - test("Should run Amber in unmatched setting") { when { params { @@ -44,8 +43,8 @@ nextflow_process { } process { """ - input[0] = Channel.fromPath(params.tumorBam) - input[1] = Channel.fromPath(params.normalBam) + input[0] = [params.tumor, "${params.tumorBam}", "${params.tumorBam}.bai"] + input[1] = [params.normal, "${params.normalBam}", "${params.normalBam}.bai"] """ } } diff --git a/tests/main.runCobalt.nf.test b/tests/module.runCobalt.nf.test similarity index 82% rename from tests/main.runCobalt.nf.test rename to tests/module.runCobalt.nf.test index ce478b5..898219b 100644 --- a/tests/main.runCobalt.nf.test +++ b/tests/module.runCobalt.nf.test @@ -14,8 +14,8 @@ nextflow_process { } process { """ - input[0] = Channel.fromPath(params.tumorBam) - input[1] = Channel.fromPath(params.normalBam) + input[0] = [params.tumor, "${params.tumorBam}", "${params.tumorBam}.bai"] + input[1] = [params.normal, "${params.normalBam}", "${params.normalBam}.bai"] """ } } @@ -43,8 +43,8 @@ nextflow_process { } process { """ - input[0] = Channel.fromPath(params.tumorBam) - input[1] = Channel.fromPath(params.normalBam) + input[0] = [params.tumor, "${params.tumorBam}", "${params.tumorBam}.bai"] + input[1] = [params.normal, "${params.normalBam}", "${params.normalBam}.bai"] """ } } diff --git a/tests/main.runCobalt.nf.test.snap b/tests/module.runCobalt.nf.test.snap similarity index 100% rename from tests/main.runCobalt.nf.test.snap rename to tests/module.runCobalt.nf.test.snap diff --git a/tests/main.runPurple.nf.test b/tests/module.runPurple.nf.test similarity index 72% rename from tests/main.runPurple.nf.test rename to tests/module.runPurple.nf.test index 5ff1290..80047a0 100644 --- a/tests/main.runPurple.nf.test +++ b/tests/module.runPurple.nf.test @@ -10,10 +10,8 @@ nextflow_process { amber_baf_tsv = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.baf.tsv.gz" amber_baf_pcf = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.baf.pcf" amber_qc = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.qc" - amber_contamination_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.contamination.vcf.gz" cobalt_tumor_ratio_tsv = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_TUMOR.cobalt.ratio.tsv.gz" cobalt_tumor_ratio_pcf = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_TUMOR.cobalt.ratio.pcf" - cobalt_normal_ratio_pcf = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_NORMAL.cobalt.ratio.pcf" cobalt_path = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR" sage_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz" } @@ -22,12 +20,10 @@ nextflow_process { input[0] = Channel.fromPath(params.amber_baf_tsv) input[1] = Channel.fromPath(params.amber_baf_pcf) input[2] = Channel.fromPath(params.amber_qc) - input[3] = Channel.fromPath(params.amber_contamination_vcf) - input[4] = Channel.fromPath(params.cobalt_tumor_ratio_tsv) - input[5] = Channel.fromPath(params.cobalt_tumor_ratio_pcf) - input[6] = Channel.fromPath(params.cobalt_normal_ratio_pcf) - input[7] = Channel.fromPath(params.cobalt_path) - input[8] = Channel.fromPath(params.sage_vcf) + input[3] = Channel.fromPath(params.cobalt_tumor_ratio_tsv) + input[4] = Channel.fromPath(params.cobalt_tumor_ratio_pcf) + input[5] = Channel.fromPath(params.cobalt_path) + input[6] = Channel.fromPath(params.sage_vcf) """ } } @@ -48,24 +44,20 @@ nextflow_process { amber_baf_tsv = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.baf.tsv.gz" amber_baf_pcf = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.baf.pcf" amber_qc = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.qc" - amber_contamination_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR.amber.contamination.vcf.gz" cobalt_tumor_ratio_tsv = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_TUMOR.cobalt.ratio.tsv.gz" cobalt_tumor_ratio_pcf = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_TUMOR.cobalt.ratio.pcf" - cobalt_normal_ratio_pcf = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR/TEST_NORMAL.cobalt.ratio.pcf" cobalt_path = "${projectDir}/tests/data/cobalt/binned_100_probes_0.5_LogR" - sage_vcf = "${projectDir}/tests/data/sage/TEST_TUMOR_vs_TEST_NORMAL.vcf.gz" + sage_vcf = "${projectDir}/assets/NO_FILE" } process { """ input[0] = Channel.fromPath(params.amber_baf_tsv) input[1] = Channel.fromPath(params.amber_baf_pcf) input[2] = Channel.fromPath(params.amber_qc) - input[3] = Channel.fromPath(params.amber_contamination_vcf) - input[4] = Channel.fromPath(params.cobalt_tumor_ratio_tsv) - input[5] = Channel.fromPath(params.cobalt_tumor_ratio_pcf) - input[6] = Channel.fromPath(params.cobalt_normal_ratio_pcf) - input[7] = Channel.fromPath(params.cobalt_path) - input[8] = Channel.fromPath(params.sage_vcf) + input[3] = Channel.fromPath(params.cobalt_tumor_ratio_tsv) + input[4] = Channel.fromPath(params.cobalt_tumor_ratio_pcf) + input[5] = Channel.fromPath(params.cobalt_path) + input[6] = Channel.fromPath(params.sage_vcf) """ } } diff --git a/tests/main.runSage.nf.test b/tests/module.runSage.nf.test similarity index 87% rename from tests/main.runSage.nf.test rename to tests/module.runSage.nf.test index a9f7385..f4214ee 100644 --- a/tests/main.runSage.nf.test +++ b/tests/module.runSage.nf.test @@ -15,8 +15,8 @@ nextflow_process { } process { """ - input[0] = Channel.fromPath(params.tumorBam) - input[1] = Channel.fromPath(params.normalBam) + input[0] = [params.tumor, "${params.tumorBam}", "${params.tumorBam}.bai"] + input[1] = [params.normal, "${params.normalBam}", "${params.normalBam}.bai"] """ } } diff --git a/tests/workflow.main.nf.test b/tests/workflow.main.nf.test new file mode 100644 index 0000000..9fe8dd5 --- /dev/null +++ b/tests/workflow.main.nf.test @@ -0,0 +1,53 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + autoSort false + + test("Should run Main.nf in matched setting. [Error expected in Sage]") { + when { + params { + tumor = "TEST_TUMOR" + normal = "TEST_NORMAL" + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/tests/data/input/normal.bam" + ensemblDataDir = "${projectDir}/tests/data/ref/ensembl_data" + } + } + then { + with(workflow) { + assert failed + assert exitStatus == 1 + assert trace.tasks().size() == 3 // Amber, Cobalt, Sage + assert trace.succeeded().size() == 0 + assert trace.failed().size() == 3 + assert stdout.any{ + it.contains('Cannot invoke "java.util.List.stream()" because "geneDataList" is null') + } + } + } + } + + test("Should run Main.nf in unmatched setting. [Error expected in Purple]") { + when { + params { + tumor = "TEST_TUMOR" + normal = null + tumorBam = "${projectDir}/tests/data/input/tumor.bam" + normalBam = "${projectDir}/assets/NO_FILE" + } + } + then { + with(workflow) { + assert failed + assert exitStatus == 1 + assert trace.tasks().size() == 4 // Amber, Cobalt, binCobalt, Purple + assert workflow.trace.succeeded().size() == 3 // Amber, Cobalt, binCobalt + assert workflow.trace.failed().size() == 1 // Purple + assert workflow.stdout.any{ + it.contains('[ERROR] - failed processing sample(TEST_TUMOR): java.lang.NullPointerException') + } + } + } + } +} From de5b15e7309433dc1e6b8f67280e1738432abd90 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango" Date: Tue, 24 Dec 2024 00:55:33 -0500 Subject: [PATCH 20/20] =?UTF-8?q?=F0=9F=94=A7=20small=20typo=20in=20test?= =?UTF-8?q?=20data=20readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 1 + tests/data/README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fa277a3..6d37b34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,4 @@ +# Dockerfile use for papaemme/purple:v0.1.1 FROM papaemmelab/docker-hmftools:v1.0.0 # Clean up to free space diff --git a/tests/data/README.md b/tests/data/README.md index fc4e0e2..a6c9e19 100644 --- a/tests/data/README.md +++ b/tests/data/README.md @@ -15,7 +15,7 @@ DIR_IN=tests/data/ensembl_data_original DIR_OUT=tests/data/ensembl_data # For -awk -F, 'NR==1 || ($3 == "1" && $5 >= 100000 && $6 <= 200000) || ($3 == "2" && $5 >= 300000 && $6 <= 400000)' $DIR_IN/ensemble_gene_data.csv > $DIR_OUT/ensemble_gene_data.csv +awk -F, 'NR==1 || ($3 == "1" && $5 >= 100000 && $6 <= 200000) || ($3 == "2" && $5 >= 300000 && $6 <= 400000)' $DIR_IN/ensembl_gene_data.csv > $DIR_OUT/ensembl_gene_data.csv ``` # Sage VCF: