Skip to content

Commit

Permalink
Add normalization and merge script
Browse files Browse the repository at this point in the history
  • Loading branch information
seppinho committed Feb 23, 2024
1 parent 5337904 commit 7528fa7
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 5 deletions.
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,14 @@ RUN wget https://github.com/genepi/haplogrep3/releases/download/v3.2.1/haplogrep
rm haplogrep3-3.2.1-linux.zip
ENV PATH="/opt/haplogrep:${PATH}"

WORKDIR "/opt"
RUN wget https://github.com/jbangdev/jbang/releases/download/v0.91.0/jbang-0.91.0.zip && \
unzip -q jbang-*.zip && \
mv jbang-0.91.0 jbang && \
rm jbang*.zip

ENV PATH="/opt/jbang/bin:${PATH}"
WORKDIR "/opt"
COPY ./bin/VariantMerger.java ./
RUN jbang export portable -O=VariantMerger.jar VariantMerger.java

87 changes: 87 additions & 0 deletions bin/VariantMerger.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
//usr/bin/env jbang "$0" "$@" ; exit $?
//REPOS jcenter,genepi-maven=https://genepi.i-med.ac.at/maven
//DEPS info.picocli:picocli:4.6.1
//DEPS genepi:genepi-io:1.1.1

import java.io.File;
import java.util.concurrent.Callable;
import genepi.io.table.reader.CsvTableReader;
import genepi.io.table.writer.CsvTableWriter;
import picocli.CommandLine;
import picocli.CommandLine.Option;
import picocli.CommandLine.Parameters;

public class VariantMerger implements Callable<Integer> {

@Parameters(description = "Combined variants file")
private String file;

@Option(names = "--output", description = "Output files", required = true)
private String output;

@Option(names = "--indel-tag", description = "Detect indels by this tag", required = false)
private String tag = "INDEL";

public Integer call() throws Exception {

assert (file != null);
assert (output != null);

CsvTableWriter writer = new CsvTableWriter(new File(output).getAbsolutePath(), '\t', false);
CsvTableReader reader = new CsvTableReader(file, '\t');

int lastPos = 0;
String[] lastRow = null;
int diff = 0;
writer.setColumns(reader.getColumns());

while (reader.next()) {

int pos = reader.getInteger("Pos");
int refLength = reader.getString("Ref").length();
int variantlength = reader.getString("Variant").length();

// init new position
if (lastPos == 0) {
diff = refLength - variantlength;
lastPos = pos;
lastRow = reader.getRow();
continue;
}

if (comparePositions(lastPos, pos, diff)) {
writer.setRow(lastRow);
writer.next();
lastPos = 0;
} else {
// since no hit, write previous row.
writer.setRow(lastRow);
writer.next();

diff = refLength - variantlength;
lastPos = pos;
lastRow = reader.getRow();
}

}
writer.setRow(lastRow);
writer.next();
reader.close();
writer.close();
return 0;
}

public static void main(String... args) {
int exitCode = new CommandLine(new VariantMerger()).execute(args);
System.exit(exitCode);
}

public boolean comparePositions(int lastPos, int pos, int diff) {
for (int i = 0; i <= diff; i++) {
if ((lastPos + i) == pos) {
return true;
}
}
return false;
}
}
6 changes: 4 additions & 2 deletions modules/local/mutect2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@ process MUTECT2 {
rm raw.vcf.gz
bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.norm.vcf.gz -Oz
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.mutect2.txt
bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\t0\n' ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.mutect2.txt
bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' ${bam_file.baseName}.norm.vcf.gz >> ${bam_file.baseName}.mutect2.txt
awk -F'\t' 'NR == 1 || length(\$4) > 1 || length(\$5) > 1' ${bam_file.baseName}.mutect2.txt > ${bam_file.baseName}.mutect2.filtered.txt
awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' ${bam_file.baseName}.mutect2.txt > ${bam_file.baseName}.mutect2.filtered.txt
"""
}
2 changes: 1 addition & 1 deletion modules/local/mutserve_single.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ process MUTSERVE_SINGLE {
${bam_file}
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.mutserve.txt
bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.mutserve.txt
bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t]SNV\n' ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.mutserve.txt
awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' ${bam_file.baseName}.mutserve.txt > ${bam_file.baseName}.mutserve.filtered.txt
"""
Expand Down
10 changes: 8 additions & 2 deletions modules/local/summarize_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@ process SUMMARIZE_VARIANTS {
path variants_txt

output:
path("variants.txt"), emit: txt_summarized_ch
path("variants.fixed.txt"), emit: txt_summarized_ch

"""
csvtk concat -t ${variants_txt} -T -o variants.txt
csvtk concat -t ${variants_txt} -T -o variants.concat.txt
csvtk sort -t variants.concat.txt -k ID:N -k Pos:n -k Type -T -o variants.sorted.txt
java -jar /opt/VariantMerger.jar \
variants.sorted.txt \
--output variants.fixed.txt
"""
}

0 comments on commit 7528fa7

Please sign in to comment.