Skip to content

Commit

Permalink
Features/combined calling mode (#9)
Browse files Browse the repository at this point in the history
* Add basic implementation

* Add normalization and merge script

* Update merging strategy

* Improve mode to combine MUTECT2 and mutserve

* Finalize fusion mode

* Format code

* Fix testcases and format modules

* Set fusion as default mode

* Recreate tabix index

* Update mutserve
  • Loading branch information
seppinho authored Feb 27, 2024
1 parent fe62869 commit 7012b79
Show file tree
Hide file tree
Showing 26 changed files with 475 additions and 137 deletions.
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,14 @@ RUN wget https://github.com/genepi/haplogrep3/releases/download/v3.2.1/haplogrep
rm haplogrep3-3.2.1-linux.zip
ENV PATH="/opt/haplogrep:${PATH}"

WORKDIR "/opt"
RUN wget https://github.com/jbangdev/jbang/releases/download/v0.91.0/jbang-0.91.0.zip && \
unzip -q jbang-*.zip && \
mv jbang-0.91.0 jbang && \
rm jbang*.zip

ENV PATH="/opt/jbang/bin:${PATH}"
WORKDIR "/opt"
COPY ./bin/VariantMerger.java ./
RUN jbang export portable -O=VariantMerger.jar VariantMerger.java

86 changes: 86 additions & 0 deletions bin/VariantMerger.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//usr/bin/env jbang "$0" "$@" ; exit $?
//REPOS jcenter,genepi-maven=https://genepi.i-med.ac.at/maven
//DEPS info.picocli:picocli:4.6.1
//DEPS genepi:genepi-io:1.1.1

import java.io.File;
import java.util.concurrent.Callable;
import genepi.io.table.reader.CsvTableReader;
import genepi.io.table.writer.CsvTableWriter;
import picocli.CommandLine;
import picocli.CommandLine.Option;
import picocli.CommandLine.Parameters;

public class VariantMerger implements Callable<Integer> {

@Parameters(description = "Combined variants file")
private String file;

@Option(names = "--output", description = "Output files", required = true)
private String output;

@Option(names = "--indel-tag", description = "Detect indels by this tag", required = false)
private String tag = "INDEL";

public Integer call() throws Exception {

assert (file != null);
assert (output != null);

CsvTableWriter writer = new CsvTableWriter(new File(output).getAbsolutePath(), '\t', false);
CsvTableReader reader = new CsvTableReader(file, '\t');

int lastPos = 0;
String[] lastRow = null;
int diff = 0;
writer.setColumns(reader.getColumns());

while (reader.next()) {

int pos = reader.getInteger("Pos");
int refLength = reader.getString("Ref").length();
int variantlength = reader.getString("Variant").length();

// init new position
if (lastPos == 0 || pos == 3107 ) {
diff = refLength - variantlength;
lastPos = pos;
lastRow = reader.getRow();
continue;
}

if (comparePositions(lastPos, pos, diff)) {
writer.setRow(lastRow);
writer.next();
lastPos = 0;
} else {
// since no hit, write previous row.
writer.setRow(lastRow);
writer.next();
diff = refLength - variantlength;
lastPos = pos;
lastRow = reader.getRow();
}

}
writer.setRow(lastRow);
writer.next();
reader.close();
writer.close();
return 0;
}

public static void main(String... args) {
int exitCode = new CommandLine(new VariantMerger()).execute(args);
System.exit(exitCode);
}

public boolean comparePositions(int lastPos, int pos, int diff) {
for (int i = 0; i <= Math.abs(diff); i++) {
if ((lastPos + i) == pos) {
return true;
}
}
return false;
}
}
Binary file modified files/mutserve.zip
Binary file not shown.
8 changes: 6 additions & 2 deletions modules/local/annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ process ANNOTATE {
path annotation

output:
path ("${variants_ch.baseName}_ann.txt"), emit: variants_ann_ch
path ("${variants_ch.baseName}.annotated.txt"), emit: variants_ann_ch

"""
java -jar /opt/mutserve/mutserve.jar annotate --input ${variants_ch} --output ${variants_ch.baseName}_ann.txt --annotation ${annotation}
java -jar /opt/mutserve/mutserve.jar \
annotate \
--input ${variants_ch} \
--output ${variants_ch.baseName}.annotated.txt \
--annotation ${annotation}
"""
}
12 changes: 0 additions & 12 deletions modules/local/contamination_detection.nf

This file was deleted.

38 changes: 38 additions & 0 deletions modules/local/filter_variants.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
process FILTER_VARIANTS {

//publishDir "${params.output}", mode: 'copy'

input:
tuple path(vcf_file), val(method)

output:
path("${vcf_file.baseName}.${method}.filtered.txt"), emit: combined_methods_ch

script:
def vcf_name = "${vcf_file}".replaceAll('.vcf.gz', '')

"""
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \
> ${vcf_file.baseName}.${method}.txt
if [[ ${method} == "mutserve_fusion" ]]
then
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt
awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' \
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
elif [[ ${method} == "mutect2_fusion" ]]
then
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt
awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' \
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
fi
"""
}

14 changes: 0 additions & 14 deletions modules/local/haplogroup_detection.nf

This file was deleted.

26 changes: 26 additions & 0 deletions modules/local/haplogroups_contamination.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
process HAPLOGROUPS_CONTAMINATION {

publishDir "${params.output_auxiliary}", mode: 'copy'

input:
path(merged_vcf_ch)

output:
path ("haplogroups.txt"), emit: haplogroups_ch
path ("haplocheck.html"), emit: contamination_report_ch
path ("haplocheck.txt"), emit: contamination_txt_ch

"""
java -jar /opt/haplogrep/haplogrep3.jar \
classify \
--tree [email protected] \
--in ${merged_vcf_ch} \
--out haplogroups.txt \
--extend-report
java -jar /opt/haplocheck/haplocheck.jar \
--out haplocheck.txt \
--raw \
${merged_vcf_ch}
"""
}
3 changes: 2 additions & 1 deletion modules/local/index.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ process INDEX {

"""
samtools faidx $reference
samtools dict $reference -o ${reference.baseName}.dict
samtools dict $reference \
-o ${reference.baseName}.dict
"""
}
28 changes: 17 additions & 11 deletions modules/local/input_validation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,23 @@ process INPUT_VALIDATION {
path("contig.txt"), emit: contig_ch

"""
csvtk concat -t ${statistics} -T -o sample_statistics.txt
csvtk concat -t ${mapping} -T -o sample_mappings.txt
csvtk concat \
-t ${statistics} \
-T -o sample_statistics.txt
csvtk concat \
-t ${mapping} \
-T -o sample_mappings.txt
java -jar /opt/mutserve/mutserve.jar stats \
--input sample_statistics.txt \
--detection-limit ${params.detection_limit} \
--reference ${params.reference} \
--baseQ ${params.baseQ}\
--mapQ ${params.mapQ} \
--alignQ ${params.alignQ} \
--output-excluded-samples excluded_samples.txt \
--output-contig contig.txt \
--tool ${params.mode}
--input sample_statistics.txt \
--detection-limit ${params.detection_limit} \
--reference ${params.reference} \
--baseQ ${params.baseQ}\
--mapQ ${params.mapQ} \
--alignQ ${params.alignQ} \
--output-excluded-samples excluded_samples.txt \
--output-contig contig.txt \
--tool ${params.mode}
"""
}
51 changes: 51 additions & 0 deletions modules/local/merge/strategy_mutect2.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process STRATEGY_MUTECT2 {

//publishDir "${params.output}", mode: 'copy'

input:
path bam_file
path reference
path excluded_samples
path fasta_index_files
val detected_contig

output:
path("${bam_file.baseName}.mutect2.filtered.txt"), emit: results

"""
samtools index ${bam_file}
gatk Mutect2 \
-R ${reference} \
-L ${detected_contig} \
--min-base-quality-score ${params.baseQ} \
-callable-depth 6 --max-reads-per-alignment-start 0 \
-I ${bam_file} \
-O raw.vcf.gz
gatk FilterMutectCalls \
-R ${reference} \
--min-reads-per-strand 2 \
-V raw.vcf.gz \
-O ${bam_file.baseName}.vcf.gz
bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.filtered.vcf.gz -Oz
tabix ${bam_file.baseName}.filtered.vcf.gz
gatk LeftAlignAndTrimVariants \
-R ${reference} \
-V ${bam_file.baseName}.vcf.gz \
-O ${bam_file.baseName}.filtered2.vcf.gz \
--max-indel-length 208
rm raw.vcf.gz
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.mutect2.txt
bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\t0\n' ${bam_file.baseName}.filtered2.vcf.gz >> ${bam_file.baseName}.mutect2.txt
awk -F'\t' 'NR == 1 || length(\$4) > 1 || length(\$5) > 1' ${bam_file.baseName}.mutect2.txt > ${bam_file.baseName}.mutect2.filtered.txt
"""
}
29 changes: 29 additions & 0 deletions modules/local/merging_variants.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
process MERGING_VARIANTS {

input:
path variants_txt
val mode

output:
path("variants.txt"), emit: txt_summarized_ch

"""
csvtk concat \
-t ${variants_txt} \
-T -o variants.concat.txt
csvtk sort \
-t variants.concat.txt \
-k ID:N -k Pos:n -k Type:r \
-T -o variants.sorted.txt
if [[ ${mode} == "fusion" ]]
then
java -jar /opt/VariantMerger.jar \
variants.sorted.txt \
--output variants.txt
else
mv variants.sorted.txt variants.txt
fi
"""
}
Loading

0 comments on commit 7012b79

Please sign in to comment.