Skip to content

Commit

Permalink
🧬 add sage for somatic enrichment
Browse files Browse the repository at this point in the history
  • Loading branch information
juanesarango committed Nov 16, 2024
1 parent 14a817b commit 8e7de21
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 96 deletions.
181 changes: 99 additions & 82 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ params.memory = '4 GB'

// Params Defaults in juno
params.refGenome = "/work/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta"
params.genomeVersion = "37"
params.genomeVersion = 37
params.circos = "/opt/circos-0.69-2/bin/circos"
params.loci = "/data/copy_number/GermlineHetPon.37.vcf.gz"
params.gcProfile = "/data/copy_number/GC_profile.1000bp.37.cnp"
Expand Down Expand Up @@ -33,7 +33,6 @@ logMessage += (params.normal && params.normalBam) ? """\
""" : ""
logMessage += """\
somaticVcf : ${params.somaticVcf}
germlineVcf : ${params.germlineVcf}
outdir : ${params.outdir}
cores : ${params.cores}
memory : ${params.memory}
Expand All @@ -59,32 +58,34 @@ process runAmber {
time '1h'

input:
val tumor
val normal
path tumorBam
path normalBam

output:
path "${tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv
path "${tumor}.amber.baf.pcf", emit: amber_baf_pcf
path "${tumor}.amber.qc", emit: amber_qc
path "${tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf
path "${params.tumor}.amber.baf.tsv.gz", emit: amber_baf_tsv
path "${params.tumor}.amber.baf.pcf", emit: amber_baf_pcf
path "${params.tumor}.amber.qc", emit: amber_qc
path "${params.tumor}.amber.contamination.vcf.gz", emit: amber_contamination_vcf

script:
def reference_args = normal ? """-reference ${normal} \\\n -reference_bam ${normalBam}""" : ""
def reference_args = params.normal ? """
-reference ${params.normal} \\
-reference_bam ${normalBam} \\
""" : ""

"""
if [ -f "${params.outdir}/amber/${tumor}.amber.baf.tsv.gz" ] && \\
[ -f "${params.outdir}/amber/${tumor}.amber.baf.pcf" ] && \\
[ -f "${params.outdir}/amber/${tumor}.amber.qc" ] && \\
[ -f "${params.outdir}/amber/${tumor}.amber.contamination.vcf.gz" ]; then
if [ -f "${params.outdir}/amber/${params.tumor}.amber.baf.tsv.gz" ] && \\
[ -f "${params.outdir}/amber/${params.tumor}.amber.baf.pcf" ] && \\
[ -f "${params.outdir}/amber/${params.tumor}.amber.qc" ] && \\
[ -f "${params.outdir}/amber/${params.tumor}.amber.contamination.vcf.gz" ]; then
echo "Output files already exist. Skipping amber execution."
ln -fs ${params.outdir}/amber/${tumor}.amber.baf.tsv.gz ${tumor}.amber.baf.tsv.gz
ln -fs ${params.outdir}/amber/${tumor}.amber.baf.pcf ${tumor}.amber.baf.pcf
ln -fs ${params.outdir}/amber/${tumor}.amber.qc ${tumor}.amber.qc
ln -fs ${params.outdir}/amber/${tumor}.amber.contamination.vcf.gz ${tumor}.amber.contamination.vcf.gz
ln -fs ${params.outdir}/amber/${params.tumor}.amber.baf.tsv.gz ${params.tumor}.amber.baf.tsv.gz
ln -fs ${params.outdir}/amber/${params.tumor}.amber.baf.pcf ${params.tumor}.amber.baf.pcf
ln -fs ${params.outdir}/amber/${params.tumor}.amber.qc ${params.tumor}.amber.qc
ln -fs ${params.outdir}/amber/${params.tumor}.amber.contamination.vcf.gz ${params.tumor}.amber.contamination.vcf.gz
else
amber \\
-tumor ${tumor} \\
-tumor ${params.tumor} \\
-tumor_bam ${tumorBam} \\
${reference_args} \\
-output_dir \$PWD \\
Expand All @@ -104,31 +105,33 @@ process runCobalt {
time '1h'

input:
val tumor
val normal
path tumorBam
path normalBam

output:
path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv
path "${tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf
path "${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true
path "${params.tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv
path "${params.tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf
path "${params.normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf, optional: true

script:
def reference_args = normal ? "\t\t\t-reference ${normal} \\\n -reference_bam ${normalBam}" : "\t\t\t-tumor_only_diploid_bed ${params.diploidRegions}"
def reference_args = params.normal ? """
-reference ${params.normal} \\
-reference_bam ${normalBam}""" : """
-tumor_only_diploid_bed ${params.diploidRegions}"""

"""
if [ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.tsv.gz" ] && \
[ -f "${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf" ]; then
if [ -f "${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.tsv.gz" ] && \
[ -f "${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.pcf" ]; then
echo "Output files already exist. Skipping cobalt execution."
ln -s ${params.outdir}/cobalt/${tumor}.cobalt.ratio.tsv.gz ${tumor}.cobalt.ratio.tsv.gz
ln -s ${params.outdir}/cobalt/${tumor}.cobalt.ratio.pcf ${tumor}.cobalt.ratio.pcf
ln -s ${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.tsv.gz ${params.tumor}.cobalt.ratio.tsv.gz
ln -s ${params.outdir}/cobalt/${params.tumor}.cobalt.ratio.pcf ${params.tumor}.cobalt.ratio.pcf
if [ -f "${params.outdir}/cobalt/${normal}.cobalt.ratio.pcf" ]; then
ln -s ${params.outdir}/cobalt/${normal}.cobalt.ratio.pcf ${normal}.cobalt.ratio.pcf
if [ -f "${params.outdir}/cobalt/${params.normal}.cobalt.ratio.pcf" ]; then
ln -s ${params.outdir}/cobalt/${params.normal}.cobalt.ratio.pcf ${params.normal}.cobalt.ratio.pcf
fi
else
cobalt \\
-tumor ${tumor} \\
-tumor ${params.tumor} \\
-tumor_bam ${tumorBam} \\
${reference_args} \\
-output_dir \$PWD \\
Expand All @@ -146,37 +149,64 @@ process binCobalt {
time '1h'

input:
val tumor
val normal
val binProbes
val binLogR
path cobalt_tumor_ratio_tsv
path cobalt_tumor_ratio_pcf
path cobalt_normal_ratio_pcf

output:
path "${tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv
path "${tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf
path "${normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf
path "${params.tumor}.cobalt.ratio.tsv.gz", emit: cobalt_tumor_ratio_tsv
path "${params.tumor}.cobalt.ratio.pcf", emit: cobalt_tumor_ratio_pcf
path "${params.normal}.cobalt.ratio.pcf", emit: cobalt_normal_ratio_pcf

script:
"""
# Bin Cobalt Tumor Probes
bin_cobalt.py \\
--in_pcf ${cobalt_tumor_ratio_pcf} \\
--bin_probes ${binProbes} \\
--bin_log_r ${binLogR}
--bin_probes ${params.binProbes} \\
--bin_log_r ${params.binLogR}
# Bin Cobalt Normal probes
if [ -f "${cobalt_normal_ratio_pcf}" ]; then
bin_cobalt.py \\
--in_pcf ${cobalt_normal_ratio_pcf} \\
--bin_probes ${binProbes} \\
--bin_log_r ${binLogR}
--bin_probes ${params.binProbes} \\
--bin_log_r ${params.binLogR}
fi
""".stripIndent()
}

// See https://github.com/hartwigmedical/hmftools/blob/master/sage/README.md#usage
process runSage {
tag "SAGE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "")
publishDir "${params.outdir}/sage", mode: 'copy'
cpus params.cores
memory params.memory
time '4h'

input:
path tumorBam
path normalBam

output:
path "${params.tumor}_vs_${params.normal}.vcf.gz", emit: sage_vcf

script:
"""
sage \\
-tumor ${params.tumor} \\
-tumor_bam ${tumorBam} \\
-reference ${params.normal} \\
-reference_bam ${normalBam} \\
-ref_genome ${params.refGenome} \\
-ref_genome_version ${params.genomeVersion} \\
-output_vcf \$PWD/${params.tumor}_vs_${params.normal}.vcf.gz \\
-threads ${params.cores} \\
-ensembl_data_dir ${params.ensemblDataDir}
""".stripIndent()
}


// See https://github.com/hartwigmedical/hmftools/blob/master/purple/README.md#arguments
process runPurple {
tag "PURPLE on ${params.tumor}" + (params.normal ? " vs ${params.normal}" : "")
Expand All @@ -186,10 +216,6 @@ process runPurple {
time '1h'

input:
val tumor
val normal
path somatic_vcf
path germline_vcf
path amber_baf_tsv
path amber_baf_pcf
path amber_qc
Expand All @@ -198,32 +224,31 @@ process runPurple {
path cobalt_tumor_ratio_pcf
path cobalt_normal_ratio_pcf
path cobalt_path
path sage_vcf

output:
path "${tumor}.purple.purity.tsv", emit: purple_purity_tsv
path "${tumor}.purple.qc", emit: purple_qc
path "${tumor}.purple.purity.range.tsv", emit: purple_purity_range_tsv
path "${tumor}.purple.cnv.somatic.tsv", emit: purple_cnv_somatic_tsv
path "${tumor}.purple.cnv.gene.tsv", emit: purple_cnv_gene_tsv
path "${tumor}.purple.segment.tsv", emit: purple_segment_tsv
path "${tumor}.purple.somatic.clonality.tsv", emit: purple_somatic_clonality_tsv
path "plot/${tumor}.segment.png", emit: purple_segment_png
path "plot/${tumor}.copynumber.png", emit: purple_copynumber_png
path "plot/${tumor}.circos.png", emit: purple_circos_png
path "plot/${tumor}.map.png", emit: purple_map_png
path "plot/${tumor}.input.png", emit: purple_input_png
path "plot/${tumor}.purity.range.png", emit: purple_purity_range_png
path "${params.tumor}.purple.purity.tsv", emit: purple_purity_tsv
path "${params.tumor}.purple.qc", emit: purple_qc
path "${params.tumor}.purple.purity.range.tsv", emit: purple_purity_range_tsv
path "${params.tumor}.purple.cnv.somatic.tsv", emit: purple_cnv_somatic_tsv
path "${params.tumor}.purple.cnv.gene.tsv", emit: purple_cnv_gene_tsv
path "${params.tumor}.purple.segment.tsv", emit: purple_segment_tsv
path "${params.tumor}.purple.somatic.clonality.tsv", emit: purple_somatic_clonality_tsv
path "plot/${params.tumor}.segment.png", emit: purple_segment_png
path "plot/${params.tumor}.copynumber.png", emit: purple_copynumber_png
path "plot/${params.tumor}.circos.png", emit: purple_circos_png
path "plot/${params.tumor}.map.png", emit: purple_map_png
path "plot/${params.tumor}.input.png", emit: purple_input_png
path "plot/${params.tumor}.purity.range.png", emit: purple_purity_range_png

script:
def reference_args = normal ? "-reference ${normal}" : ""
def somatic_vcf_args = somatic_vcf ? "-somatic_vcf ${somatic_vcf}" : ""
def germline_vcf_args = germline_vcf ? "-germline_vcf ${germline_vcf}" : ""
def reference_args = params.normal ? """-reference ${params.normal}""" : ""
def somatic_vcf_args = params.normal && sage_vcf ? """-somatic_vcf ${sage_vcf}""" : ""

"""
purple \\
-tumor ${tumor} \\
-tumor ${params.tumor} \\
${reference_args} \\
${somatic_vcf_args} \\
${germline_vcf_args} \\
-amber ${params.outdir}/amber \\
-cobalt ${cobalt_path} \\
-output_dir \$PWD \\
Expand All @@ -240,35 +265,26 @@ process runPurple {
}

workflow {
// Arguments
tumor = Channel.value(params.tumor)
normal = Channel.value(params.normal)
// Input Bams
tumorBam = Channel.fromPath(params.tumorBam)
normalBam = Channel.fromPath(params.normalBam)
binProbes = Channel.value(params.binProbes)
binLogR = Channel.value(params.binLogR)
somaticVcf = Channel.value(params.somaticVcf)
germlineVcf = Channel.value(params.germlineVcf)

// Run Amber and Cobalt
amberOutput = runAmber(tumor, normal, tumorBam, normalBam)
cobaltOutput = runCobalt(tumor, normal, tumorBam, normalBam)
// Run Amber, Cobalt and Sage
amberOutput = runAmber(tumorBam, normalBam)
cobaltOutput = runCobalt(tumorBam, normalBam)
sageOutput = runSage(tumorBam, normalBam)

// Bin Cobalt if expected
postCobaltOutput = (binProbes != 0 || binLogR != 0)
? binCobalt(tumor, normal, binProbes, binLogR, cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf)
postCobaltOutput = (params.binProbes != 0 || params.binLogR != 0)
? binCobalt(cobaltOutput.cobalt_tumor_ratio_tsv, cobaltOutput.cobalt_tumor_ratio_pcf, cobaltOutput.cobalt_normal_ratio_pcf)
: cobaltOutput

cobaltOutdir = (binProbes != 0 || binLogR != 0)
cobaltOutdir = (params.binProbes != 0 || params.binLogR != 0)
? "${params.outdir}/cobalt/binned_${params.binProbes}_probes_${params.binLogR}_LogR"
: "${params.outdir}/cobalt"

// Run Purple
runPurple(
tumor,
normal,
somaticVcf,
germlineVcf,
amberOutput.amber_baf_tsv,
amberOutput.amber_baf_pcf,
amberOutput.amber_qc,
Expand All @@ -277,6 +293,7 @@ workflow {
postCobaltOutput.cobalt_tumor_ratio_pcf,
postCobaltOutput.cobalt_normal_ratio_pcf,
cobaltOutdir,
sageOutput.sage_vcf,
)
}

Expand Down
2 changes: 0 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ profiles {
}

hpc_slurm {
singularity.enabled = true
singularity.autoMounts = true
process {
executor = 'slurm'
queue = 'componc_cpu'
Expand Down
10 changes: 4 additions & 6 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@ TUMOR=IID_H211025_T01_01_WG01
NORMAL=IID_H211025_N01_01_WG01
TUMOR_BAM=`isabl get-bams ${TUMOR}`
NORMAL_BAM=`isabl get-bams ${NORMAL}`
SOMATIC_VCF=/data1/papaemme/isabl/data/analyses/18/97/541897/merged/IID_H211025_T01_01_WG01_vs_IID_H211025_N01_01_WG01.snvs.pass.flagged.vcf.gz
GERMLINE_VCF=/data1/papaemme/isabl/data/analyses/17/51/541751/merged/IID_H211025_N01_01_WG01.snvs.vcf.gz
SOMATIC_VCF=/data1/papaemme/isabl/home/liosisk/benchmarking/callers/sage/.sage.vcf.gz

NF_PURPLE=/data1/papaemme/isabl/home/svc_papaemme_bot/dev/nf-purple/main.nf
OUTDIR=/data1/papaemme/isabl/home/svc_papaemme_bot/tmp/purple_matched
REFGENOME=/data1/papaemme/isabl/ref/homo_sapiens/GRCh37d5/gr37.fasta
GENOMEVERSION=V37
GENOMEVERSION=37

REFDIR=/data1/papaemme/isabl/ref/homo_sapiens/37/hmftools
REFDIR=/data1/papaemme/isabl/ref/homo_sapiens/37/hmftools/v6_0/ref/37
LOCI=${REFDIR}/copy_number/GermlineHetPon.37.vcf.gz
GCPROFILE=${REFDIR}/copy_number/GC_profile.1000bp.37.cnp
DIPLOIDREGIONS=${REFDIR}/copy_number/DiploidRegions.37.bed.gz
Expand All @@ -29,7 +28,6 @@ nextflow run \
--normal ${NORMAL} \
--normalBam ${NORMAL_BAM} \
--somaticVcf $SOMATIC_VCF \
--germlineVcf $GERMLINE_VCF \
--outdir ${OUTDIR} \
--loci ${LOCI} \
--gcProfile ${GCPROFILE} \
Expand All @@ -38,7 +36,7 @@ nextflow run \
--genomeVersion ${GENOMEVERSION} \
--refGenome ${REFGENOME} \
--circos ${CIRCOS} \
--cores 8 \
--cores 16 \
--memory '64G' \
--binProbes 100 \
--binLogR 0.5 \
Expand Down
27 changes: 21 additions & 6 deletions run_sage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,36 @@ CORES=16

mkdir -p ${OUTDIR}

# singularity run \
# --bind /data1:/data1 \
# --bind /scratch:/scratch \
# --bind /usersoftware:/usersoftware \
# /data1/papaemme/isabl/home/liosisk/images/sage.sif \
# java -Xms4G -Xmx64G -cp /sage_v3.0_beta.jar com.hartwig.hmftools.sage.SageApplication \
# -threads ${CORES} \
# -tumor ${TUMOR} \
# -tumor_bam ${TUMOR_BAM} \
# -reference ${NORMAL} \
# -reference_bam ${NORMAL_BAM} \
# -ref_genome_version ${GENOMEVERSION} \
# -ref_genome ${REFGENOME} \
# -hotspots ${REFDIR}/KnownHotspots.somatic.37.vcf.gz \
# -panel_bed ${REFDIR}/ActionableCodingPanel.somatic.37.bed.gz \
# -high_confidence_bed ${REFDIR}/GIAB-High-Conf/37/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed \
# -ensembl_data_dir ${REFDIR}/Ensembl-Data-Cache \
# -out "${REFDIR}/${TNAME}.sage.vcf.gz"


singularity run \
--bind /data1:/data1 \
--bind /scratch:/scratch \
--bind /usersoftware:/usersoftware \
/data1/papaemme/isabl/home/liosisk/images/sage.sif \
java -Xms4G -Xmx64G -cp /sage_v3.0_beta.jar com.hartwig.hmftools.sage.SageApplication \
-threads ${CORES} \
-tumor ${TUMOR} \
-tumor_bam ${TUMOR_BAM} \
-reference ${NORMAL} \
-reference_bam ${NORMAL_BAM} \
-ref_genome_version ${GENOMEVERSION} \
-ref_genome ${REFGENOME} \
-hotspots ${REFDIR}/KnownHotspots.somatic.37.vcf.gz \
-panel_bed ${REFDIR}/ActionableCodingPanel.somatic.37.bed.gz \
-high_confidence_bed ${REFDIR}/GIAB-High-Conf/37/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed \
-ensembl_data_dir ${REFDIR}/Ensembl-Data-Cache \
-out "${REFDIR}/${TNAME}.sage.vcf.gz"
-output_vcf "${OUTDIR}/${TUMOR}.sage.vcf.gz"

0 comments on commit 8e7de21

Please sign in to comment.