Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Features/combined calling mode #9

Merged
merged 10 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,14 @@ RUN wget https://github.com/genepi/haplogrep3/releases/download/v3.2.1/haplogrep
rm haplogrep3-3.2.1-linux.zip
ENV PATH="/opt/haplogrep:${PATH}"

WORKDIR "/opt"
RUN wget https://github.com/jbangdev/jbang/releases/download/v0.91.0/jbang-0.91.0.zip && \
unzip -q jbang-*.zip && \
mv jbang-0.91.0 jbang && \
rm jbang*.zip

ENV PATH="/opt/jbang/bin:${PATH}"
WORKDIR "/opt"
COPY ./bin/VariantMerger.java ./
RUN jbang export portable -O=VariantMerger.jar VariantMerger.java

86 changes: 86 additions & 0 deletions bin/VariantMerger.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//usr/bin/env jbang "$0" "$@" ; exit $?
//REPOS jcenter,genepi-maven=https://genepi.i-med.ac.at/maven
//DEPS info.picocli:picocli:4.6.1
//DEPS genepi:genepi-io:1.1.1

import java.io.File;
import java.util.concurrent.Callable;
import genepi.io.table.reader.CsvTableReader;
import genepi.io.table.writer.CsvTableWriter;
import picocli.CommandLine;
import picocli.CommandLine.Option;
import picocli.CommandLine.Parameters;

public class VariantMerger implements Callable<Integer> {

@Parameters(description = "Combined variants file")
private String file;

@Option(names = "--output", description = "Output files", required = true)
private String output;

@Option(names = "--indel-tag", description = "Detect indels by this tag", required = false)
private String tag = "INDEL";

public Integer call() throws Exception {

assert (file != null);
assert (output != null);

CsvTableWriter writer = new CsvTableWriter(new File(output).getAbsolutePath(), '\t', false);
CsvTableReader reader = new CsvTableReader(file, '\t');

int lastPos = 0;
String[] lastRow = null;
int diff = 0;
writer.setColumns(reader.getColumns());

while (reader.next()) {

int pos = reader.getInteger("Pos");
int refLength = reader.getString("Ref").length();
int variantlength = reader.getString("Variant").length();

// init new position
if (lastPos == 0 || pos == 3107 ) {
diff = refLength - variantlength;
lastPos = pos;
lastRow = reader.getRow();
continue;
}

if (comparePositions(lastPos, pos, diff)) {
writer.setRow(lastRow);
writer.next();
lastPos = 0;
} else {
// since no hit, write previous row.
writer.setRow(lastRow);
writer.next();
diff = refLength - variantlength;
lastPos = pos;
lastRow = reader.getRow();
}

}
writer.setRow(lastRow);
writer.next();
reader.close();
writer.close();
return 0;
}

public static void main(String... args) {
int exitCode = new CommandLine(new VariantMerger()).execute(args);
System.exit(exitCode);
}

public boolean comparePositions(int lastPos, int pos, int diff) {
for (int i = 0; i <= Math.abs(diff); i++) {
if ((lastPos + i) == pos) {
return true;
}
}
return false;
}
}
Binary file modified files/mutserve.zip
Binary file not shown.
8 changes: 6 additions & 2 deletions modules/local/annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ process ANNOTATE {
path annotation

output:
path ("${variants_ch.baseName}_ann.txt"), emit: variants_ann_ch
path ("${variants_ch.baseName}.annotated.txt"), emit: variants_ann_ch

"""
java -jar /opt/mutserve/mutserve.jar annotate --input ${variants_ch} --output ${variants_ch.baseName}_ann.txt --annotation ${annotation}
java -jar /opt/mutserve/mutserve.jar \
annotate \
--input ${variants_ch} \
--output ${variants_ch.baseName}.annotated.txt \
--annotation ${annotation}
"""
}
12 changes: 0 additions & 12 deletions modules/local/contamination_detection.nf

This file was deleted.

38 changes: 38 additions & 0 deletions modules/local/filter_variants.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
process FILTER_VARIANTS {

//publishDir "${params.output}", mode: 'copy'

input:
tuple path(vcf_file), val(method)

output:
path("${vcf_file.baseName}.${method}.filtered.txt"), emit: combined_methods_ch

script:
def vcf_name = "${vcf_file}".replaceAll('.vcf.gz', '')

"""
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \
> ${vcf_file.baseName}.${method}.txt

if [[ ${method} == "mutserve_fusion" ]]
then
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt

awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' \
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt

elif [[ ${method} == "mutect2_fusion" ]]
then
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt

awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' \
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
fi
"""
}

14 changes: 0 additions & 14 deletions modules/local/haplogroup_detection.nf

This file was deleted.

26 changes: 26 additions & 0 deletions modules/local/haplogroups_contamination.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
process HAPLOGROUPS_CONTAMINATION {

publishDir "${params.output_auxiliary}", mode: 'copy'

input:
path(merged_vcf_ch)

output:
path ("haplogroups.txt"), emit: haplogroups_ch
path ("haplocheck.html"), emit: contamination_report_ch
path ("haplocheck.txt"), emit: contamination_txt_ch

"""
java -jar /opt/haplogrep/haplogrep3.jar \
classify \
--tree [email protected] \
--in ${merged_vcf_ch} \
--out haplogroups.txt \
--extend-report

java -jar /opt/haplocheck/haplocheck.jar \
--out haplocheck.txt \
--raw \
${merged_vcf_ch}
"""
}
3 changes: 2 additions & 1 deletion modules/local/index.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ process INDEX {

"""
samtools faidx $reference
samtools dict $reference -o ${reference.baseName}.dict
samtools dict $reference \
-o ${reference.baseName}.dict
"""
}
28 changes: 17 additions & 11 deletions modules/local/input_validation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,23 @@ process INPUT_VALIDATION {
path("contig.txt"), emit: contig_ch

"""
csvtk concat -t ${statistics} -T -o sample_statistics.txt
csvtk concat -t ${mapping} -T -o sample_mappings.txt
csvtk concat \
-t ${statistics} \
-T -o sample_statistics.txt

csvtk concat \
-t ${mapping} \
-T -o sample_mappings.txt

java -jar /opt/mutserve/mutserve.jar stats \
--input sample_statistics.txt \
--detection-limit ${params.detection_limit} \
--reference ${params.reference} \
--baseQ ${params.baseQ}\
--mapQ ${params.mapQ} \
--alignQ ${params.alignQ} \
--output-excluded-samples excluded_samples.txt \
--output-contig contig.txt \
--tool ${params.mode}
--input sample_statistics.txt \
--detection-limit ${params.detection_limit} \
--reference ${params.reference} \
--baseQ ${params.baseQ}\
--mapQ ${params.mapQ} \
--alignQ ${params.alignQ} \
--output-excluded-samples excluded_samples.txt \
--output-contig contig.txt \
--tool ${params.mode}
"""
}
51 changes: 51 additions & 0 deletions modules/local/merge/strategy_mutect2.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process STRATEGY_MUTECT2 {

//publishDir "${params.output}", mode: 'copy'

input:
path bam_file
path reference
path excluded_samples
path fasta_index_files
val detected_contig

output:
path("${bam_file.baseName}.mutect2.filtered.txt"), emit: results

"""
samtools index ${bam_file}

gatk Mutect2 \
-R ${reference} \
-L ${detected_contig} \
--min-base-quality-score ${params.baseQ} \
-callable-depth 6 --max-reads-per-alignment-start 0 \
-I ${bam_file} \
-O raw.vcf.gz

gatk FilterMutectCalls \
-R ${reference} \
--min-reads-per-strand 2 \
-V raw.vcf.gz \
-O ${bam_file.baseName}.vcf.gz

bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.filtered.vcf.gz -Oz

tabix ${bam_file.baseName}.filtered.vcf.gz
gatk LeftAlignAndTrimVariants \
-R ${reference} \
-V ${bam_file.baseName}.vcf.gz \
-O ${bam_file.baseName}.filtered2.vcf.gz \
--max-indel-length 208

rm raw.vcf.gz

echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.mutect2.txt

bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\t0\n' ${bam_file.baseName}.filtered2.vcf.gz >> ${bam_file.baseName}.mutect2.txt

awk -F'\t' 'NR == 1 || length(\$4) > 1 || length(\$5) > 1' ${bam_file.baseName}.mutect2.txt > ${bam_file.baseName}.mutect2.filtered.txt


"""
}
29 changes: 29 additions & 0 deletions modules/local/merging_variants.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
process MERGING_VARIANTS {

input:
path variants_txt
val mode

output:
path("variants.txt"), emit: txt_summarized_ch

"""
csvtk concat \
-t ${variants_txt} \
-T -o variants.concat.txt

csvtk sort \
-t variants.concat.txt \
-k ID:N -k Pos:n -k Type:r \
-T -o variants.sorted.txt

if [[ ${mode} == "fusion" ]]
then
java -jar /opt/VariantMerger.jar \
variants.sorted.txt \
--output variants.txt
else
mv variants.sorted.txt variants.txt
fi
"""
}
Loading
Loading