-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add basic implementation * Add normalization and merge script * Update merging strategy * Improve mode to combine MUTECT2 and mutserve * Finalize fusion mode * Format code * Fix testcases and format modules * Set fusion as default mode * Recreate tabix index * Update mutserve
- Loading branch information
Showing
26 changed files
with
475 additions
and
137 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
//usr/bin/env jbang "$0" "$@" ; exit $? | ||
//REPOS jcenter,genepi-maven=https://genepi.i-med.ac.at/maven | ||
//DEPS info.picocli:picocli:4.6.1 | ||
//DEPS genepi:genepi-io:1.1.1 | ||
|
||
import java.io.File; | ||
import java.util.concurrent.Callable; | ||
import genepi.io.table.reader.CsvTableReader; | ||
import genepi.io.table.writer.CsvTableWriter; | ||
import picocli.CommandLine; | ||
import picocli.CommandLine.Option; | ||
import picocli.CommandLine.Parameters; | ||
|
||
public class VariantMerger implements Callable<Integer> { | ||
|
||
@Parameters(description = "Combined variants file") | ||
private String file; | ||
|
||
@Option(names = "--output", description = "Output files", required = true) | ||
private String output; | ||
|
||
@Option(names = "--indel-tag", description = "Detect indels by this tag", required = false) | ||
private String tag = "INDEL"; | ||
|
||
public Integer call() throws Exception { | ||
|
||
assert (file != null); | ||
assert (output != null); | ||
|
||
CsvTableWriter writer = new CsvTableWriter(new File(output).getAbsolutePath(), '\t', false); | ||
CsvTableReader reader = new CsvTableReader(file, '\t'); | ||
|
||
int lastPos = 0; | ||
String[] lastRow = null; | ||
int diff = 0; | ||
writer.setColumns(reader.getColumns()); | ||
|
||
while (reader.next()) { | ||
|
||
int pos = reader.getInteger("Pos"); | ||
int refLength = reader.getString("Ref").length(); | ||
int variantlength = reader.getString("Variant").length(); | ||
|
||
// init new position | ||
if (lastPos == 0 || pos == 3107 ) { | ||
diff = refLength - variantlength; | ||
lastPos = pos; | ||
lastRow = reader.getRow(); | ||
continue; | ||
} | ||
|
||
if (comparePositions(lastPos, pos, diff)) { | ||
writer.setRow(lastRow); | ||
writer.next(); | ||
lastPos = 0; | ||
} else { | ||
// since no hit, write previous row. | ||
writer.setRow(lastRow); | ||
writer.next(); | ||
diff = refLength - variantlength; | ||
lastPos = pos; | ||
lastRow = reader.getRow(); | ||
} | ||
|
||
} | ||
writer.setRow(lastRow); | ||
writer.next(); | ||
reader.close(); | ||
writer.close(); | ||
return 0; | ||
} | ||
|
||
public static void main(String... args) { | ||
int exitCode = new CommandLine(new VariantMerger()).execute(args); | ||
System.exit(exitCode); | ||
} | ||
|
||
public boolean comparePositions(int lastPos, int pos, int diff) { | ||
for (int i = 0; i <= Math.abs(diff); i++) { | ||
if ((lastPos + i) == pos) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
process FILTER_VARIANTS { | ||
|
||
//publishDir "${params.output}", mode: 'copy' | ||
|
||
input: | ||
tuple path(vcf_file), val(method) | ||
|
||
output: | ||
path("${vcf_file.baseName}.${method}.filtered.txt"), emit: combined_methods_ch | ||
|
||
script: | ||
def vcf_name = "${vcf_file}".replaceAll('.vcf.gz', '') | ||
|
||
""" | ||
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \ | ||
> ${vcf_file.baseName}.${method}.txt | ||
if [[ ${method} == "mutserve_fusion" ]] | ||
then | ||
bcftools query \ | ||
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \ | ||
${vcf_file} >> ${vcf_file.baseName}.${method}.txt | ||
awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' \ | ||
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt | ||
elif [[ ${method} == "mutect2_fusion" ]] | ||
then | ||
bcftools query \ | ||
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' \ | ||
${vcf_file} >> ${vcf_file.baseName}.${method}.txt | ||
awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' \ | ||
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt | ||
fi | ||
""" | ||
} | ||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
process HAPLOGROUPS_CONTAMINATION { | ||
|
||
publishDir "${params.output_auxiliary}", mode: 'copy' | ||
|
||
input: | ||
path(merged_vcf_ch) | ||
|
||
output: | ||
path ("haplogroups.txt"), emit: haplogroups_ch | ||
path ("haplocheck.html"), emit: contamination_report_ch | ||
path ("haplocheck.txt"), emit: contamination_txt_ch | ||
|
||
""" | ||
java -jar /opt/haplogrep/haplogrep3.jar \ | ||
classify \ | ||
--tree [email protected] \ | ||
--in ${merged_vcf_ch} \ | ||
--out haplogroups.txt \ | ||
--extend-report | ||
java -jar /opt/haplocheck/haplocheck.jar \ | ||
--out haplocheck.txt \ | ||
--raw \ | ||
${merged_vcf_ch} | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
process STRATEGY_MUTECT2 { | ||
|
||
//publishDir "${params.output}", mode: 'copy' | ||
|
||
input: | ||
path bam_file | ||
path reference | ||
path excluded_samples | ||
path fasta_index_files | ||
val detected_contig | ||
|
||
output: | ||
path("${bam_file.baseName}.mutect2.filtered.txt"), emit: results | ||
|
||
""" | ||
samtools index ${bam_file} | ||
gatk Mutect2 \ | ||
-R ${reference} \ | ||
-L ${detected_contig} \ | ||
--min-base-quality-score ${params.baseQ} \ | ||
-callable-depth 6 --max-reads-per-alignment-start 0 \ | ||
-I ${bam_file} \ | ||
-O raw.vcf.gz | ||
gatk FilterMutectCalls \ | ||
-R ${reference} \ | ||
--min-reads-per-strand 2 \ | ||
-V raw.vcf.gz \ | ||
-O ${bam_file.baseName}.vcf.gz | ||
bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.filtered.vcf.gz -Oz | ||
tabix ${bam_file.baseName}.filtered.vcf.gz | ||
gatk LeftAlignAndTrimVariants \ | ||
-R ${reference} \ | ||
-V ${bam_file.baseName}.vcf.gz \ | ||
-O ${bam_file.baseName}.filtered2.vcf.gz \ | ||
--max-indel-length 208 | ||
rm raw.vcf.gz | ||
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.mutect2.txt | ||
bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\t0\n' ${bam_file.baseName}.filtered2.vcf.gz >> ${bam_file.baseName}.mutect2.txt | ||
awk -F'\t' 'NR == 1 || length(\$4) > 1 || length(\$5) > 1' ${bam_file.baseName}.mutect2.txt > ${bam_file.baseName}.mutect2.filtered.txt | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
process MERGING_VARIANTS { | ||
|
||
input: | ||
path variants_txt | ||
val mode | ||
|
||
output: | ||
path("variants.txt"), emit: txt_summarized_ch | ||
|
||
""" | ||
csvtk concat \ | ||
-t ${variants_txt} \ | ||
-T -o variants.concat.txt | ||
csvtk sort \ | ||
-t variants.concat.txt \ | ||
-k ID:N -k Pos:n -k Type:r \ | ||
-T -o variants.sorted.txt | ||
if [[ ${mode} == "fusion" ]] | ||
then | ||
java -jar /opt/VariantMerger.jar \ | ||
variants.sorted.txt \ | ||
--output variants.txt | ||
else | ||
mv variants.sorted.txt variants.txt | ||
fi | ||
""" | ||
} |
Oops, something went wrong.