diff --git a/files/bin/Minimac4 b/files/bin/Minimac4
deleted file mode 100755
index a9630714..00000000
Binary files a/files/bin/Minimac4 and /dev/null differ
diff --git a/files/bin/minimac4 b/files/bin/minimac4
new file mode 100755
index 00000000..0ce2bec2
Binary files /dev/null and b/files/bin/minimac4 differ
diff --git a/files/imputationserver-beagle.yaml b/files/imputationserver-beagle.yaml
index 07302987..1774f954 100644
--- a/files/imputationserver-beagle.yaml
+++ b/files/imputationserver-beagle.yaml
@@ -1,7 +1,7 @@
id: imputationserver-beagle
name: Genotype Imputation supporting Beagle (Minimac4)
description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.
If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20).
-version: 1.7.5
+version: 1.8.0-beta4
website: https://imputationserver.readthedocs.io
category:
diff --git a/files/imputationserver-hla.yaml b/files/imputationserver-hla.yaml
index 6aff3034..737dda15 100644
--- a/files/imputationserver-hla.yaml
+++ b/files/imputationserver-hla.yaml
@@ -1,7 +1,7 @@
id: imputationserver-hla
name: Genotype Imputation HLA (Minimac4)
description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.
If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20).
-version: 1.7.5
+version: 1.8.0-beta4
website: https://imputationserver.readthedocs.io
category:
diff --git a/files/imputationserver-pgs.yaml b/files/imputationserver-pgs.yaml
index eb9b66a9..3a5436ea 100644
--- a/files/imputationserver-pgs.yaml
+++ b/files/imputationserver-pgs.yaml
@@ -1,7 +1,7 @@
id: imputationserver-pgs
name: Genotype Imputation (PGS Calc Integration)
description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.
If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20).
-version: 1.7.5
+version: 1.8.0-beta4
website: https://imputationserver.readthedocs.io
category:
diff --git a/files/minimac4.yaml b/files/minimac4.yaml
index 617c57fc..ff365b63 100644
--- a/files/minimac4.yaml
+++ b/files/minimac4.yaml
@@ -1,7 +1,7 @@
id: imputationserver
name: Genotype Imputation (Minimac4)
-description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.
If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20).
-version: 1.7.5
+description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.
If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20).
+version: 1.8.0-beta4
website: https://imputationserver.readthedocs.io
category:
diff --git a/pom.xml b/pom.xml
index b70589b6..b8746e20 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,11 +5,8 @@
genepi
imputationserver
-
- 1.7.5
-
+ 1.8.0-beta4
jar
-
University of Michigan Imputation Server
http://maven.apache.org
diff --git a/src/main/java/genepi/imputationserver/steps/imputation/ImputationMapper.java b/src/main/java/genepi/imputationserver/steps/imputation/ImputationMapper.java
index 35c26be6..806de4cb 100644
--- a/src/main/java/genepi/imputationserver/steps/imputation/ImputationMapper.java
+++ b/src/main/java/genepi/imputationserver/steps/imputation/ImputationMapper.java
@@ -117,7 +117,6 @@ protected void setup(Context context) throws IOException, InterruptedException {
String referenceName = parameters.get(ImputationJob.REF_PANEL);
imputationParameters.setPhasing(phasingEngine);
imputationParameters.setReferencePanelName(referenceName);
- imputationParameters.setMinR2(minR2);
imputationParameters.setPhasingRequired(phasingRequired);
// get cached files
@@ -153,11 +152,11 @@ protected void setup(Context context) throws IOException, InterruptedException {
mapBeagleFilename = cache.getFile(mapBeagle);
}
- String minimacCommand = cache.getFile("Minimac4");
+ String minimacCommand = cache.getFile("minimac4");
String eagleCommand = cache.getFile("eagle");
String beagleCommand = cache.getFile("beagle.jar");
String tabixCommand = cache.getFile("tabix");
-
+
// create temp directory
DefaultPreferenceStore store = new DefaultPreferenceStore(context.getConfiguration());
folder = store.getString("minimac.tmp");
@@ -182,9 +181,9 @@ protected void setup(Context context) throws IOException, InterruptedException {
String formatFile = cache.getFile(name + ".format");
if (formatFile != null) {
// create symbolic link to format file. they have to be in the same folder
- Files.createSymbolicLink(Paths.get(FileUtil.path(folder,name)), Paths.get(localFilename));
- Files.createSymbolicLink(Paths.get(FileUtil.path(folder,name+".format")), Paths.get(formatFile));
- scores[i] = FileUtil.path(folder,name);
+ Files.createSymbolicLink(Paths.get(FileUtil.path(folder, name)), Paths.get(localFilename));
+ Files.createSymbolicLink(Paths.get(FileUtil.path(folder, name + ".format")), Paths.get(formatFile));
+ scores[i] = FileUtil.path(folder, name);
}
}
System.out.println("Loaded " + scores.length + " score files from distributed cache");
@@ -212,6 +211,7 @@ protected void setup(Context context) throws IOException, InterruptedException {
int phasingWindow = Integer.parseInt(store.getString("phasing.window"));
int window = Integer.parseInt(store.getString("minimac.window"));
+ int decay = Integer.parseInt(store.getString("minimac.decay"));
String minimacParams = store.getString("minimac.command");
String eagleParams = store.getString("eagle.command");
@@ -226,6 +226,8 @@ protected void setup(Context context) throws IOException, InterruptedException {
pipeline.setPhasingWindow(phasingWindow);
pipeline.setBuild(build);
pipeline.setMinimacWindow(window);
+ pipeline.setMinR2(minR2);
+ pipeline.setDecay(decay);
}
@@ -289,16 +291,8 @@ public void map(LongWritable key, Text value, Context context) throws IOExceptio
statistics.setImportTime((end - start) / 1000);
} else {
- if (imputationParameters.getMinR2() > 0) {
- // filter by r2
- String filteredInfoFilename = outputChunk.getInfoFilename() + "_filtered";
- filterInfoFileByR2(outputChunk.getInfoFilename(), filteredInfoFilename,
- imputationParameters.getMinR2());
- HdfsUtil.put(filteredInfoFilename, HdfsUtil.path(output, chunk + ".info"));
-
- } else {
- HdfsUtil.put(outputChunk.getInfoFilename(), HdfsUtil.path(output, chunk + ".info"));
- }
+
+ HdfsUtil.put(outputChunk.getInfoFilename(), HdfsUtil.path(output, chunk + ".info"));
long start = System.currentTimeMillis();
diff --git a/src/main/java/genepi/imputationserver/steps/imputation/ImputationPipeline.java b/src/main/java/genepi/imputationserver/steps/imputation/ImputationPipeline.java
index 14f7da24..3402b0a7 100644
--- a/src/main/java/genepi/imputationserver/steps/imputation/ImputationPipeline.java
+++ b/src/main/java/genepi/imputationserver/steps/imputation/ImputationPipeline.java
@@ -24,9 +24,10 @@
public class ImputationPipeline {
- public static final String PIPELINE_VERSION = "michigan-imputationserver-1.7.5";
- public static final String IMPUTATION_VERSION = "minimac4-1.0.2";
+ public static final String PIPELINE_VERSION = "michigan-imputationserver-1.8.0-beta4";
+
+ public static final String IMPUTATION_VERSION = "minimac-v4.1.6";
public static final String BEAGLE_VERSION = "beagle.18May20.d20.jar";
@@ -48,8 +49,12 @@ public class ImputationPipeline {
private int minimacWindow;
+ private int minimacDecay;
+
private int phasingWindow;
+ private double minR2;
+
private String refFilename;
private String mapMinimac;
@@ -288,6 +293,16 @@ public boolean phaseWithBeagle(VcfChunk input, VcfChunkOutput output, String ref
public boolean imputeVCF(VcfChunkOutput output)
throws InterruptedException, IOException, CompilationFailedException {
+ // create tabix index
+ Command tabix = new Command(tabixCommand);
+ tabix.setSilent(false);
+ tabix.setParams(output.getPhasedVcfFilename());
+ System.out.println("Command: " + tabix.getExecutedCommand());
+ if (tabix.execute() != 0) {
+ System.out.println("Error during index creation: " + tabix.getStdOut());
+ return false;
+ }
+
String chr = "";
if (build.equals("hg38")) {
chr = "chr" + output.getChromosome();
@@ -306,6 +321,8 @@ public boolean imputeVCF(VcfChunkOutput output)
binding.put("chr", chr);
binding.put("unphased", false);
binding.put("mapMinimac", mapMinimac);
+ binding.put("minR2", minR2);
+ binding.put("decay", minimacDecay);
String[] params = createParams(minimacParams, binding);
@@ -345,11 +362,11 @@ private boolean runPgsCalc(VcfChunkOutput output) {
task.setVcfFilename(output.getImputedVcfFilename());
task.setChunk(scoreChunk);
task.setRiskScoreFilenames(scores);
-
- //TODO: enable fix-strand-flips
- //task.setFixStrandFlips(true);
- //task.setRemoveAmbiguous(true);
-
+
+ // TODO: enable fix-strand-flips
+ // task.setFixStrandFlips(true);
+ // task.setRemoveAmbiguous(true);
+
for (String file : scores) {
String autoFormat = file + ".format";
if (new File(autoFormat).exists()) {
@@ -474,4 +491,13 @@ public void setMapBeagleFilename(String mapBeagleFilename) {
this.mapBeagleFilename = mapBeagleFilename;
}
+ public void setMinR2(double minR2) {
+ this.minR2 = minR2;
+ }
+
+ public void setDecay(int decay) {
+ this.minimacDecay = decay;
+
+ }
+
}
diff --git a/src/main/java/genepi/imputationserver/util/DefaultPreferenceStore.java b/src/main/java/genepi/imputationserver/util/DefaultPreferenceStore.java
index 01852193..5d086e91 100644
--- a/src/main/java/genepi/imputationserver/util/DefaultPreferenceStore.java
+++ b/src/main/java/genepi/imputationserver/util/DefaultPreferenceStore.java
@@ -71,11 +71,12 @@ public static Properties defaults() {
defaults.setProperty("chunksize", "20000000");
defaults.setProperty("phasing.window", "5000000");
defaults.setProperty("minimac.window", "500000");
+ defaults.setProperty("minimac.decay", "0");
defaults.setProperty("minimac.sendmail", "no");
defaults.setProperty("server.url", "https://imputationserver.sph.umich.edu");
defaults.setProperty("minimac.tmp", "/tmp");
defaults.setProperty("minimac.command",
- "--refHaps ${ref} --haps ${vcf} --start ${start} --end ${end} --window ${window} --prefix ${prefix} --chr ${chr} --cpus 1 --noPhoneHome --format GT,DS,GP --allTypedSites --meta --minRatio 0.00001 ${chr =='MT' ? '--myChromosome ' + chr : ''} ${unphased ? '--unphasedOutput' : ''} ${mapMinimac != null ? '--referenceEstimates --map ' + mapMinimac : ''}");
+ "--region ${chr}:${start}-${end} --overlap ${window} --output ${prefix}.dose.vcf.gz --output-format vcf.gz --format GT,DS,GP,HDS --min-ratio 0.00001 --decay ${decay} --all-typed-sites --sites ${prefix}.info --empirical-output ${prefix}.empiricalDose.vcf.gz ${minR2 != 0 ? '--min-r2 ' + minR2 : ''} ${mapMinimac != null ? '--map ' + mapMinimac : ''} ${ref} ${vcf}");
defaults.setProperty("eagle.command",
"--vcfRef ${ref} --vcfTarget ${vcf} --geneticMapFile ${map} --outPrefix ${prefix} --bpStart ${start} --bpEnd ${end} --allowRefAltSwap --vcfOutFormat z --keepMissingPloidyX");
defaults.setProperty("beagle.command",
diff --git a/src/main/java/genepi/imputationserver/util/FileMerger.java b/src/main/java/genepi/imputationserver/util/FileMerger.java
index d78413af..7acdafe3 100644
--- a/src/main/java/genepi/imputationserver/util/FileMerger.java
+++ b/src/main/java/genepi/imputationserver/util/FileMerger.java
@@ -24,35 +24,18 @@ public static void splitIntoHeaderAndData(String input, OutputStream outHeader,
while (reader.next()) {
String line = reader.get();
+
if (!line.startsWith("#")) {
- if (parameters.getMinR2() > 0) {
- // rsq set. parse line and check rsq
- String info = parseInfo(line);
- if (info != null) {
- boolean keep = keepVcfLineByInfo(info, R2_FLAG, parameters.getMinR2());
- if (keep) {
- outData.write(line.getBytes());
- outData.write("\n".getBytes());
- }
- } else {
- // no valid vcf line. keep line
- outData.write(line.getBytes());
- outData.write("\n".getBytes());
- }
- } else {
- // no rsq set. keep all lines without parsing
- outData.write(line.getBytes());
- outData.write("\n".getBytes());
- }
+ outData.write(line.getBytes());
+ outData.write("\n".getBytes());
} else {
// write filter command before ID List starting with #CHROM
if (line.startsWith("#CHROM")) {
- outHeader.write(("##pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes());
- outHeader.write(("##imputation=" + ImputationPipeline.IMPUTATION_VERSION + "\n").getBytes());
- outHeader.write(("##phasing=" + parameters.getPhasingMethod() + "\n").getBytes());
- outHeader.write(("##panel=" + parameters.getReferencePanelName() + "\n").getBytes());
- outHeader.write(("##r2Filter=" + parameters.getMinR2() + "\n").getBytes());
+ outHeader.write(("##mis_pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes());
+ outHeader.write(("##mis_imputation=" + ImputationPipeline.IMPUTATION_VERSION + "\n").getBytes());
+ outHeader.write(("##mis_phasing=" + parameters.getPhasingMethod() + "\n").getBytes());
+ outHeader.write(("##mis_panel=" + parameters.getReferencePanelName() + "\n").getBytes());
}
// write all headers except minimac4 command
@@ -85,9 +68,9 @@ public static void splitPhasedIntoHeaderAndData(String input, OutputStream outHe
// write filter command before ID List starting with #CHROM
if (line.startsWith("#CHROM")) {
- outHeader.write(("##pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes());
- outHeader.write(("##phasing=" + parameters.getPhasingMethod() + "\n").getBytes());
- outHeader.write(("##panel=" + parameters.getReferencePanelName() + "\n").getBytes());
+ outHeader.write(("##mis_pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes());
+ outHeader.write(("##mis_phasing=" + parameters.getPhasingMethod() + "\n").getBytes());
+ outHeader.write(("##mis_panel=" + parameters.getReferencePanelName() + "\n").getBytes());
}
// write all headers except eagle command
@@ -129,24 +112,30 @@ public static void mergeAndGzInfo(List hdfs, String local) throws IOExce
LineReader reader = new LineReader(in);
- boolean header = true;
+ boolean lineBreak = false;
while (reader.next()) {
String line = reader.get();
- if (header) {
+ if (line.startsWith("#")) {
+
if (firstFile) {
+
+ if (lineBreak) {
+ out.write('\n');
+ }
out.write(line.toString().getBytes());
- firstFile = false;
+ lineBreak = true;
}
- header = false;
} else {
out.write('\n');
out.write(line.toString().getBytes());
}
}
+ firstFile = false;
+
in.close();
}
diff --git a/src/main/java/genepi/imputationserver/util/ImputationParameters.java b/src/main/java/genepi/imputationserver/util/ImputationParameters.java
index 24116c5a..671efb6b 100644
--- a/src/main/java/genepi/imputationserver/util/ImputationParameters.java
+++ b/src/main/java/genepi/imputationserver/util/ImputationParameters.java
@@ -6,8 +6,6 @@ public class ImputationParameters {
private String referencePanelName;
- private double minR2;
-
private String phasing;
private boolean phasingRequired;
@@ -20,14 +18,6 @@ public void setReferencePanelName(String referencePanelName) {
this.referencePanelName = referencePanelName;
}
- public double getMinR2() {
- return minR2;
- }
-
- public void setMinR2(double minR2) {
- this.minR2 = minR2;
- }
-
public String getPhasing() {
return phasing;
}
diff --git a/src/test/java/genepi/imputationserver/steps/ImputationTest.java b/src/test/java/genepi/imputationserver/steps/ImputationTest.java
index 30972b90..5bb7f60e 100644
--- a/src/test/java/genepi/imputationserver/steps/ImputationTest.java
+++ b/src/test/java/genepi/imputationserver/steps/ImputationTest.java
@@ -315,7 +315,7 @@ public void testPipelineWithEagle() throws IOException, ZipException {
assertEquals(true, file.isPhased());
assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps());
- int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
+ int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(snpInInfo, file.getNoSnps());
FileUtil.deleteDirectory("test-data/tmp");
@@ -358,10 +358,10 @@ public void testValidatePanelWithEagle() throws IOException, ZipException {
VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false);
VCFHeader header = reader.getFileHeader();
- assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue());
- assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue());
- assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("imputation").getValue());
- assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue());
+ assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue());
+ assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue());
+ assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("mis_imputation").getValue());
+ assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue());
FileUtil.deleteDirectory("test-data/tmp");
@@ -404,10 +404,10 @@ public void testValidatePanelWithBeagle() throws IOException, ZipException {
VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false);
VCFHeader header = reader.getFileHeader();
- assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue());
- assertEquals(ImputationPipeline.BEAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue());
- assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("imputation").getValue());
- assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue());
+ assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue());
+ assertEquals(ImputationPipeline.BEAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue());
+ assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("mis_imputation").getValue());
+ assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue());
FileUtil.deleteDirectory("test-data/tmp");
@@ -451,9 +451,9 @@ public void testValidatePanelPhasingOnly() throws IOException, ZipException {
VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.phased.vcf.gz"), false);
VCFHeader header = reader.getFileHeader();
- assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue());
- assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue());
- assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue());
+ assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue());
+ assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue());
+ assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue());
FileUtil.deleteDirectory("test-data/tmp");
@@ -497,9 +497,9 @@ public void testValidatePanelPhasedInput() throws IOException, ZipException {
VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false);
VCFHeader header = reader.getFileHeader();
- assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue());
- assertEquals("n/a", header.getOtherHeaderLine("phasing").getValue());
- assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue());
+ assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue());
+ assertEquals("n/a", header.getOtherHeaderLine("mis_phasing").getValue());
+ assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue());
// FileUtil.deleteDirectory("test-data/tmp");
@@ -611,7 +611,7 @@ public void testPipelineWithEagleAndScores() throws IOException, ZipException {
assertEquals(true, file.isPhased());
assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps());
- int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
+ int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(snpInInfo, file.getNoSnps());
String[] args = { "test-data/tmp/chr20.dose.vcf.gz", "--ref", "PGS000018,PGS000027", "--out",
@@ -704,7 +704,7 @@ public void testPipelineWithEagleAndScoresAndFormat() throws IOException, ZipExc
assertEquals(true, file.isPhased());
assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps());
- int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
+ int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(snpInInfo, file.getNoSnps());
String[] args = { "test-data/tmp/chr20.dose.vcf.gz", "--ref", score1, "--out", "test-data/tmp/expected.txt" };
@@ -962,7 +962,7 @@ public void testPipelineWithEagleAndR2Filter() throws IOException, ZipException
// TODO: update SNPS_WITH_R2_BELOW_05
assertTrue(TOTAL_REFPANEL_CHR20_B37 > file.getNoSnps());
- int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
+ int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(snpInInfo, file.getNoSnps());
FileUtil.deleteDirectory("test-data/tmp");
@@ -973,7 +973,12 @@ private int getLineCount(String filename) throws IOException {
LineReader reader = new LineReader(filename);
int lines = 0;
while (reader.next()) {
- lines++;
+
+ String line = reader.get();
+ {
+ if (!line.startsWith("#"))
+ lines++;
+ }
}
return lines;
}
@@ -1001,12 +1006,12 @@ private boolean checkSortPositionInfo(String filename) throws IOException {
String line = reader.get();
- if (!line.startsWith("SNP")) {
- String snp = line.split("\t")[0];
- if (Integer.valueOf(snp.split(":")[1]) <= pos) {
+ if (!line.startsWith("#")) {
+ String snp = line.split("\\s+")[1];
+ if (Integer.valueOf(snp) <= pos) {
return false;
}
- pos = Integer.valueOf(snp.split(":")[1]);
+ pos = Integer.valueOf(snp);
}
}
@@ -1081,7 +1086,7 @@ public void testCompareInfoAndDosageSize() throws IOException, ZipException {
// subtract header
int infoCount = getLineCount("test-data/tmp/chr20.info.gz");
- assertEquals(infoCount - 1, file.getNoSnps());
+ assertEquals(infoCount, file.getNoSnps());
FileUtil.deleteDirectory("test-data/tmp");
}
diff --git a/test-data/configs/beagle/panels.txt b/test-data/configs/beagle/panels.txt
index 9099b68c..e0c6f302 100644
--- a/test-data/configs/beagle/panels.txt
+++ b/test-data/configs/beagle/panels.txt
@@ -1,7 +1,7 @@
panels:
- id: hapmap2
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
refBeagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bref3
mapBeagle: ref-panels/plink.chr$chr.GRCh37.map
diff --git a/test-data/configs/beagle/ref-panels/hapmap_r22.chr20.CEU.hg19.msav b/test-data/configs/beagle/ref-panels/hapmap_r22.chr20.CEU.hg19.msav
new file mode 100644
index 00000000..88f63812
Binary files /dev/null and b/test-data/configs/beagle/ref-panels/hapmap_r22.chr20.CEU.hg19.msav differ
diff --git a/test-data/configs/hapmap-3chr/panels.txt b/test-data/configs/hapmap-3chr/panels.txt
index 6d7461e6..4eea42e8 100644
--- a/test-data/configs/hapmap-3chr/panels.txt
+++ b/test-data/configs/hapmap-3chr/panels.txt
@@ -1,7 +1,7 @@
panels:
- id: hapmap2
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -13,7 +13,7 @@ panels:
mixed: Mixed
- id: hapmap2-qcfilter-strandflips
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -27,7 +27,7 @@ panels:
strandFlips: -1
- id: hapmap2-qcfilter-ref-overlap
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -43,7 +43,7 @@ panels:
minSnps: 1000
- id: hapmap2-qcfilter-min-snps
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -58,7 +58,7 @@ panels:
minSnps: 1000
- id: hapmap2-qcfilter-low-callrate
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
diff --git a/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr1.CEU.hg19.msav b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr1.CEU.hg19.msav
new file mode 100644
index 00000000..fcf93795
Binary files /dev/null and b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr1.CEU.hg19.msav differ
diff --git a/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr2.CEU.hg19.msav b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr2.CEU.hg19.msav
new file mode 100644
index 00000000..01774555
Binary files /dev/null and b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr2.CEU.hg19.msav differ
diff --git a/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr3.CEU.hg19.msav b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr3.CEU.hg19.msav
new file mode 100644
index 00000000..a9d58b4b
Binary files /dev/null and b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr3.CEU.hg19.msav differ
diff --git a/test-data/configs/hapmap-chr1/panels.txt b/test-data/configs/hapmap-chr1/panels.txt
index c8c8f642..a6552542 100644
--- a/test-data/configs/hapmap-chr1/panels.txt
+++ b/test-data/configs/hapmap-chr1/panels.txt
@@ -1,7 +1,7 @@
panels:
- id: hapmap2
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -13,7 +13,7 @@ panels:
mixed: Mixed
- id: hrc-fake
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -25,7 +25,7 @@ panels:
mixed: Mixed
- id: phase3-fake
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -45,7 +45,7 @@ panels:
mixed: Mixed
- id: TOPMedfreeze6-fake
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -66,7 +66,7 @@ panels:
- id: hapmap2-region-simple
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
@@ -79,7 +79,7 @@ panels:
range: 1:565111-752566
- id: hapmap2-region-complex
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_chr1.txt
refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
diff --git a/test-data/configs/hapmap-chr1/ref-panels/hapmap_r22.chr1.CEU.hg19.msav b/test-data/configs/hapmap-chr1/ref-panels/hapmap_r22.chr1.CEU.hg19.msav
new file mode 100644
index 00000000..62a915f4
Binary files /dev/null and b/test-data/configs/hapmap-chr1/ref-panels/hapmap_r22.chr1.CEU.hg19.msav differ
diff --git a/test-data/configs/hapmap-chr20-hg38/panels.txt b/test-data/configs/hapmap-chr20-hg38/panels.txt
index 1bf29b2d..7dc3cae8 100644
--- a/test-data/configs/hapmap-chr20-hg38/panels.txt
+++ b/test-data/configs/hapmap-chr20-hg38/panels.txt
@@ -1,7 +1,7 @@
panels:
- id: hapmap2
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg38.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg38.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg38_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg38_withX.txt.gz
refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg38.bcf
diff --git a/test-data/configs/hapmap-chr20-hg38/ref-panels/hapmap_r22.chr20.CEU.hg38.msav b/test-data/configs/hapmap-chr20-hg38/ref-panels/hapmap_r22.chr20.CEU.hg38.msav
new file mode 100644
index 00000000..12ca8d65
Binary files /dev/null and b/test-data/configs/hapmap-chr20-hg38/ref-panels/hapmap_r22.chr20.CEU.hg38.msav differ
diff --git a/test-data/configs/hapmap-chr20/panels.txt b/test-data/configs/hapmap-chr20/panels.txt
index 5679b82b..710d07a5 100644
--- a/test-data/configs/hapmap-chr20/panels.txt
+++ b/test-data/configs/hapmap-chr20/panels.txt
@@ -1,7 +1,7 @@
panels:
- id: hapmap2
- hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
+ hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_withX.txt.gz
refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
diff --git a/test-data/configs/hapmap-chr20/ref-panels/hapmap_r22.chr20.CEU.hg19.msav b/test-data/configs/hapmap-chr20/ref-panels/hapmap_r22.chr20.CEU.hg19.msav
new file mode 100644
index 00000000..8267f99d
Binary files /dev/null and b/test-data/configs/hapmap-chr20/ref-panels/hapmap_r22.chr20.CEU.hg19.msav differ
diff --git a/test-data/configs/hapmap-chrX-hg38/panels.txt b/test-data/configs/hapmap-chrX-hg38/panels.txt
index 216b690f..f16cfc05 100644
--- a/test-data/configs/hapmap-chrX-hg38/panels.txt
+++ b/test-data/configs/hapmap-chrX-hg38/panels.txt
@@ -1,7 +1,7 @@
panels:
- id: hapmap2
- hdfs: ref-panels/$chr.1000g.Phase1.v3.With.Parameter.Estimates.hg38.m3vcf.gz
+ hdfs: ref-panels/$chr.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav
legend: ref-panels/1000g_chrX_impute.hg38.legend.gz
mapEagle: ref-panels/genetic_map_hg38_withX.txt.gz
refEagle: ref-panels/ALL.$chr.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.hg38.bcf
diff --git a/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav
new file mode 100644
index 00000000..37c4a3f3
Binary files /dev/null and b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav differ
diff --git a/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav
new file mode 100644
index 00000000..46cc3120
Binary files /dev/null and b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav differ
diff --git a/test-data/configs/hapmap-chrX-hg38/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav
new file mode 100644
index 00000000..3989f9e0
Binary files /dev/null and b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav differ
diff --git a/test-data/configs/hapmap-chrX/panels.txt b/test-data/configs/hapmap-chrX/panels.txt
index 8769c7b2..cc3ea964 100644
--- a/test-data/configs/hapmap-chrX/panels.txt
+++ b/test-data/configs/hapmap-chrX/panels.txt
@@ -1,7 +1,7 @@
panels:
- id: phase1
- hdfs: ref-panels/$chr.1000g.Phase1.v3.With.Parameter.Estimates.m3vcf.gz
+ hdfs: ref-panels/$chr.1000g.Phase1.v3.With.Parameter.Estimates.msav
legend: ref-panels/1000g_chr$chr_impute.legend.gz
mapEagle: ref-panels/genetic_map_hg19_withX.txt.gz
refEagle: ref-panels/ALL.chr$chr.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.bcf
diff --git a/test-data/configs/hapmap-chrX/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.msav b/test-data/configs/hapmap-chrX/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.msav
new file mode 100644
index 00000000..272cad05
Binary files /dev/null and b/test-data/configs/hapmap-chrX/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.msav differ
diff --git a/test-data/configs/hapmap-chrX/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.msav b/test-data/configs/hapmap-chrX/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.msav
new file mode 100644
index 00000000..ecd532ec
Binary files /dev/null and b/test-data/configs/hapmap-chrX/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.msav differ
diff --git a/test-data/configs/hapmap-chrX/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.msav b/test-data/configs/hapmap-chrX/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.msav
new file mode 100644
index 00000000..71f9a0c9
Binary files /dev/null and b/test-data/configs/hapmap-chrX/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.msav differ
diff --git a/test-data/configs/phylotree-chrMT/panels.txt b/test-data/configs/phylotree-chrMT/panels.txt
index 0677f49e..86aceed1 100644
--- a/test-data/configs/phylotree-chrMT/panels.txt
+++ b/test-data/configs/phylotree-chrMT/panels.txt
@@ -1,6 +1,6 @@
panels:
- id: phylotree
- hdfs: ref-panels/chrMT.phylotree17.m3vcf.gz
+ hdfs: ref-panels/chrMT.phylotree17.msav
legend: ref-panels/chrMT.phylotree17.legend.gz
mapEagle: ref-panels/genetic_map_hg19_withX.txt.gz
samples:
diff --git a/test-data/configs/phylotree-chrMT/ref-panels/chrMT.phylotree17.msav b/test-data/configs/phylotree-chrMT/ref-panels/chrMT.phylotree17.msav
new file mode 100644
index 00000000..3e9db6f4
Binary files /dev/null and b/test-data/configs/phylotree-chrMT/ref-panels/chrMT.phylotree17.msav differ
diff --git a/test-data/data/chr20-phased-hg38/chr20.R50.merged.1.330k.recode.small.hg38.vcf.gz b/test-data/data/chr20-phased-hg38/chr20.R50.merged.1.330k.recode.small.hg38.vcf.gz
index 48d22d5c..c946eff9 100644
Binary files a/test-data/data/chr20-phased-hg38/chr20.R50.merged.1.330k.recode.small.hg38.vcf.gz and b/test-data/data/chr20-phased-hg38/chr20.R50.merged.1.330k.recode.small.hg38.vcf.gz differ
diff --git a/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz b/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz
index 832668f3..61d81f4a 100644
Binary files a/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz and b/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz differ
diff --git a/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz.tbi b/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz.tbi
index 95fb4fd0..1d0eaf81 100644
Binary files a/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz.tbi and b/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz.tbi differ
diff --git a/test-data/data/chr20-unphased-hg38/chr20.R50.merged.1.330k.recode.unphased.small.hg38.vcf.gz b/test-data/data/chr20-unphased-hg38/chr20.R50.merged.1.330k.recode.unphased.small.hg38.vcf.gz
index 75ff749b..58bd6476 100644
Binary files a/test-data/data/chr20-unphased-hg38/chr20.R50.merged.1.330k.recode.unphased.small.hg38.vcf.gz and b/test-data/data/chr20-unphased-hg38/chr20.R50.merged.1.330k.recode.unphased.small.hg38.vcf.gz differ
diff --git a/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz b/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz
index 6d728f34..21368b85 100644
Binary files a/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz and b/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz differ
diff --git a/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz.tbi b/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz.tbi
index 24eeeeea..6f0a5cc5 100644
Binary files a/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz.tbi and b/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz.tbi differ