diff --git a/files/bin/Minimac4 b/files/bin/Minimac4 deleted file mode 100755 index a9630714..00000000 Binary files a/files/bin/Minimac4 and /dev/null differ diff --git a/files/bin/minimac4 b/files/bin/minimac4 new file mode 100755 index 00000000..0ce2bec2 Binary files /dev/null and b/files/bin/minimac4 differ diff --git a/files/imputationserver-beagle.yaml b/files/imputationserver-beagle.yaml index 07302987..1774f954 100644 --- a/files/imputationserver-beagle.yaml +++ b/files/imputationserver-beagle.yaml @@ -1,7 +1,7 @@ id: imputationserver-beagle name: Genotype Imputation supporting Beagle (Minimac4) description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.

If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20). -version: 1.7.5 +version: 1.8.0-beta4 website: https://imputationserver.readthedocs.io category: diff --git a/files/imputationserver-hla.yaml b/files/imputationserver-hla.yaml index 6aff3034..737dda15 100644 --- a/files/imputationserver-hla.yaml +++ b/files/imputationserver-hla.yaml @@ -1,7 +1,7 @@ id: imputationserver-hla name: Genotype Imputation HLA (Minimac4) description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.

If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20). -version: 1.7.5 +version: 1.8.0-beta4 website: https://imputationserver.readthedocs.io category: diff --git a/files/imputationserver-pgs.yaml b/files/imputationserver-pgs.yaml index eb9b66a9..3a5436ea 100644 --- a/files/imputationserver-pgs.yaml +++ b/files/imputationserver-pgs.yaml @@ -1,7 +1,7 @@ id: imputationserver-pgs name: Genotype Imputation (PGS Calc Integration) description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.

If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20). -version: 1.7.5 +version: 1.8.0-beta4 website: https://imputationserver.readthedocs.io category: diff --git a/files/minimac4.yaml b/files/minimac4.yaml index 617c57fc..ff365b63 100644 --- a/files/minimac4.yaml +++ b/files/minimac4.yaml @@ -1,7 +1,7 @@ id: imputationserver name: Genotype Imputation (Minimac4) -description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.

If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20). -version: 1.7.5 +description: This is the new Michigan Imputation Server Pipeline using Minimac4. Documentation can be found here.

If your input data is GRCh37/hg19 please ensure chromosomes are encoded without prefix (e.g. 20).
If your input data is GRCh38hg38 please ensure chromosomes are encoded with prefix 'chr' (e.g. chr20). +version: 1.8.0-beta4 website: https://imputationserver.readthedocs.io category: diff --git a/pom.xml b/pom.xml index b70589b6..b8746e20 100644 --- a/pom.xml +++ b/pom.xml @@ -5,11 +5,8 @@ genepi imputationserver - - 1.7.5 - + 1.8.0-beta4 jar - University of Michigan Imputation Server http://maven.apache.org diff --git a/src/main/java/genepi/imputationserver/steps/imputation/ImputationMapper.java b/src/main/java/genepi/imputationserver/steps/imputation/ImputationMapper.java index 35c26be6..806de4cb 100644 --- a/src/main/java/genepi/imputationserver/steps/imputation/ImputationMapper.java +++ b/src/main/java/genepi/imputationserver/steps/imputation/ImputationMapper.java @@ -117,7 +117,6 @@ protected void setup(Context context) throws IOException, InterruptedException { String referenceName = parameters.get(ImputationJob.REF_PANEL); imputationParameters.setPhasing(phasingEngine); imputationParameters.setReferencePanelName(referenceName); - imputationParameters.setMinR2(minR2); imputationParameters.setPhasingRequired(phasingRequired); // get cached files @@ -153,11 +152,11 @@ protected void setup(Context context) throws IOException, InterruptedException { mapBeagleFilename = cache.getFile(mapBeagle); } - String minimacCommand = cache.getFile("Minimac4"); + String minimacCommand = cache.getFile("minimac4"); String eagleCommand = cache.getFile("eagle"); String beagleCommand = cache.getFile("beagle.jar"); String tabixCommand = cache.getFile("tabix"); - + // create temp directory DefaultPreferenceStore store = new DefaultPreferenceStore(context.getConfiguration()); folder = store.getString("minimac.tmp"); @@ -182,9 +181,9 @@ protected void setup(Context context) throws IOException, InterruptedException { String formatFile = cache.getFile(name + ".format"); if (formatFile != null) { // create symbolic link to format file. they have to be in the same folder - Files.createSymbolicLink(Paths.get(FileUtil.path(folder,name)), Paths.get(localFilename)); - Files.createSymbolicLink(Paths.get(FileUtil.path(folder,name+".format")), Paths.get(formatFile)); - scores[i] = FileUtil.path(folder,name); + Files.createSymbolicLink(Paths.get(FileUtil.path(folder, name)), Paths.get(localFilename)); + Files.createSymbolicLink(Paths.get(FileUtil.path(folder, name + ".format")), Paths.get(formatFile)); + scores[i] = FileUtil.path(folder, name); } } System.out.println("Loaded " + scores.length + " score files from distributed cache"); @@ -212,6 +211,7 @@ protected void setup(Context context) throws IOException, InterruptedException { int phasingWindow = Integer.parseInt(store.getString("phasing.window")); int window = Integer.parseInt(store.getString("minimac.window")); + int decay = Integer.parseInt(store.getString("minimac.decay")); String minimacParams = store.getString("minimac.command"); String eagleParams = store.getString("eagle.command"); @@ -226,6 +226,8 @@ protected void setup(Context context) throws IOException, InterruptedException { pipeline.setPhasingWindow(phasingWindow); pipeline.setBuild(build); pipeline.setMinimacWindow(window); + pipeline.setMinR2(minR2); + pipeline.setDecay(decay); } @@ -289,16 +291,8 @@ public void map(LongWritable key, Text value, Context context) throws IOExceptio statistics.setImportTime((end - start) / 1000); } else { - if (imputationParameters.getMinR2() > 0) { - // filter by r2 - String filteredInfoFilename = outputChunk.getInfoFilename() + "_filtered"; - filterInfoFileByR2(outputChunk.getInfoFilename(), filteredInfoFilename, - imputationParameters.getMinR2()); - HdfsUtil.put(filteredInfoFilename, HdfsUtil.path(output, chunk + ".info")); - - } else { - HdfsUtil.put(outputChunk.getInfoFilename(), HdfsUtil.path(output, chunk + ".info")); - } + + HdfsUtil.put(outputChunk.getInfoFilename(), HdfsUtil.path(output, chunk + ".info")); long start = System.currentTimeMillis(); diff --git a/src/main/java/genepi/imputationserver/steps/imputation/ImputationPipeline.java b/src/main/java/genepi/imputationserver/steps/imputation/ImputationPipeline.java index 14f7da24..3402b0a7 100644 --- a/src/main/java/genepi/imputationserver/steps/imputation/ImputationPipeline.java +++ b/src/main/java/genepi/imputationserver/steps/imputation/ImputationPipeline.java @@ -24,9 +24,10 @@ public class ImputationPipeline { - public static final String PIPELINE_VERSION = "michigan-imputationserver-1.7.5"; - public static final String IMPUTATION_VERSION = "minimac4-1.0.2"; + public static final String PIPELINE_VERSION = "michigan-imputationserver-1.8.0-beta4"; + + public static final String IMPUTATION_VERSION = "minimac-v4.1.6"; public static final String BEAGLE_VERSION = "beagle.18May20.d20.jar"; @@ -48,8 +49,12 @@ public class ImputationPipeline { private int minimacWindow; + private int minimacDecay; + private int phasingWindow; + private double minR2; + private String refFilename; private String mapMinimac; @@ -288,6 +293,16 @@ public boolean phaseWithBeagle(VcfChunk input, VcfChunkOutput output, String ref public boolean imputeVCF(VcfChunkOutput output) throws InterruptedException, IOException, CompilationFailedException { + // create tabix index + Command tabix = new Command(tabixCommand); + tabix.setSilent(false); + tabix.setParams(output.getPhasedVcfFilename()); + System.out.println("Command: " + tabix.getExecutedCommand()); + if (tabix.execute() != 0) { + System.out.println("Error during index creation: " + tabix.getStdOut()); + return false; + } + String chr = ""; if (build.equals("hg38")) { chr = "chr" + output.getChromosome(); @@ -306,6 +321,8 @@ public boolean imputeVCF(VcfChunkOutput output) binding.put("chr", chr); binding.put("unphased", false); binding.put("mapMinimac", mapMinimac); + binding.put("minR2", minR2); + binding.put("decay", minimacDecay); String[] params = createParams(minimacParams, binding); @@ -345,11 +362,11 @@ private boolean runPgsCalc(VcfChunkOutput output) { task.setVcfFilename(output.getImputedVcfFilename()); task.setChunk(scoreChunk); task.setRiskScoreFilenames(scores); - - //TODO: enable fix-strand-flips - //task.setFixStrandFlips(true); - //task.setRemoveAmbiguous(true); - + + // TODO: enable fix-strand-flips + // task.setFixStrandFlips(true); + // task.setRemoveAmbiguous(true); + for (String file : scores) { String autoFormat = file + ".format"; if (new File(autoFormat).exists()) { @@ -474,4 +491,13 @@ public void setMapBeagleFilename(String mapBeagleFilename) { this.mapBeagleFilename = mapBeagleFilename; } + public void setMinR2(double minR2) { + this.minR2 = minR2; + } + + public void setDecay(int decay) { + this.minimacDecay = decay; + + } + } diff --git a/src/main/java/genepi/imputationserver/util/DefaultPreferenceStore.java b/src/main/java/genepi/imputationserver/util/DefaultPreferenceStore.java index 01852193..5d086e91 100644 --- a/src/main/java/genepi/imputationserver/util/DefaultPreferenceStore.java +++ b/src/main/java/genepi/imputationserver/util/DefaultPreferenceStore.java @@ -71,11 +71,12 @@ public static Properties defaults() { defaults.setProperty("chunksize", "20000000"); defaults.setProperty("phasing.window", "5000000"); defaults.setProperty("minimac.window", "500000"); + defaults.setProperty("minimac.decay", "0"); defaults.setProperty("minimac.sendmail", "no"); defaults.setProperty("server.url", "https://imputationserver.sph.umich.edu"); defaults.setProperty("minimac.tmp", "/tmp"); defaults.setProperty("minimac.command", - "--refHaps ${ref} --haps ${vcf} --start ${start} --end ${end} --window ${window} --prefix ${prefix} --chr ${chr} --cpus 1 --noPhoneHome --format GT,DS,GP --allTypedSites --meta --minRatio 0.00001 ${chr =='MT' ? '--myChromosome ' + chr : ''} ${unphased ? '--unphasedOutput' : ''} ${mapMinimac != null ? '--referenceEstimates --map ' + mapMinimac : ''}"); + "--region ${chr}:${start}-${end} --overlap ${window} --output ${prefix}.dose.vcf.gz --output-format vcf.gz --format GT,DS,GP,HDS --min-ratio 0.00001 --decay ${decay} --all-typed-sites --sites ${prefix}.info --empirical-output ${prefix}.empiricalDose.vcf.gz ${minR2 != 0 ? '--min-r2 ' + minR2 : ''} ${mapMinimac != null ? '--map ' + mapMinimac : ''} ${ref} ${vcf}"); defaults.setProperty("eagle.command", "--vcfRef ${ref} --vcfTarget ${vcf} --geneticMapFile ${map} --outPrefix ${prefix} --bpStart ${start} --bpEnd ${end} --allowRefAltSwap --vcfOutFormat z --keepMissingPloidyX"); defaults.setProperty("beagle.command", diff --git a/src/main/java/genepi/imputationserver/util/FileMerger.java b/src/main/java/genepi/imputationserver/util/FileMerger.java index d78413af..7acdafe3 100644 --- a/src/main/java/genepi/imputationserver/util/FileMerger.java +++ b/src/main/java/genepi/imputationserver/util/FileMerger.java @@ -24,35 +24,18 @@ public static void splitIntoHeaderAndData(String input, OutputStream outHeader, while (reader.next()) { String line = reader.get(); + if (!line.startsWith("#")) { - if (parameters.getMinR2() > 0) { - // rsq set. parse line and check rsq - String info = parseInfo(line); - if (info != null) { - boolean keep = keepVcfLineByInfo(info, R2_FLAG, parameters.getMinR2()); - if (keep) { - outData.write(line.getBytes()); - outData.write("\n".getBytes()); - } - } else { - // no valid vcf line. keep line - outData.write(line.getBytes()); - outData.write("\n".getBytes()); - } - } else { - // no rsq set. keep all lines without parsing - outData.write(line.getBytes()); - outData.write("\n".getBytes()); - } + outData.write(line.getBytes()); + outData.write("\n".getBytes()); } else { // write filter command before ID List starting with #CHROM if (line.startsWith("#CHROM")) { - outHeader.write(("##pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes()); - outHeader.write(("##imputation=" + ImputationPipeline.IMPUTATION_VERSION + "\n").getBytes()); - outHeader.write(("##phasing=" + parameters.getPhasingMethod() + "\n").getBytes()); - outHeader.write(("##panel=" + parameters.getReferencePanelName() + "\n").getBytes()); - outHeader.write(("##r2Filter=" + parameters.getMinR2() + "\n").getBytes()); + outHeader.write(("##mis_pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes()); + outHeader.write(("##mis_imputation=" + ImputationPipeline.IMPUTATION_VERSION + "\n").getBytes()); + outHeader.write(("##mis_phasing=" + parameters.getPhasingMethod() + "\n").getBytes()); + outHeader.write(("##mis_panel=" + parameters.getReferencePanelName() + "\n").getBytes()); } // write all headers except minimac4 command @@ -85,9 +68,9 @@ public static void splitPhasedIntoHeaderAndData(String input, OutputStream outHe // write filter command before ID List starting with #CHROM if (line.startsWith("#CHROM")) { - outHeader.write(("##pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes()); - outHeader.write(("##phasing=" + parameters.getPhasingMethod() + "\n").getBytes()); - outHeader.write(("##panel=" + parameters.getReferencePanelName() + "\n").getBytes()); + outHeader.write(("##mis_pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes()); + outHeader.write(("##mis_phasing=" + parameters.getPhasingMethod() + "\n").getBytes()); + outHeader.write(("##mis_panel=" + parameters.getReferencePanelName() + "\n").getBytes()); } // write all headers except eagle command @@ -129,24 +112,30 @@ public static void mergeAndGzInfo(List hdfs, String local) throws IOExce LineReader reader = new LineReader(in); - boolean header = true; + boolean lineBreak = false; while (reader.next()) { String line = reader.get(); - if (header) { + if (line.startsWith("#")) { + if (firstFile) { + + if (lineBreak) { + out.write('\n'); + } out.write(line.toString().getBytes()); - firstFile = false; + lineBreak = true; } - header = false; } else { out.write('\n'); out.write(line.toString().getBytes()); } } + firstFile = false; + in.close(); } diff --git a/src/main/java/genepi/imputationserver/util/ImputationParameters.java b/src/main/java/genepi/imputationserver/util/ImputationParameters.java index 24116c5a..671efb6b 100644 --- a/src/main/java/genepi/imputationserver/util/ImputationParameters.java +++ b/src/main/java/genepi/imputationserver/util/ImputationParameters.java @@ -6,8 +6,6 @@ public class ImputationParameters { private String referencePanelName; - private double minR2; - private String phasing; private boolean phasingRequired; @@ -20,14 +18,6 @@ public void setReferencePanelName(String referencePanelName) { this.referencePanelName = referencePanelName; } - public double getMinR2() { - return minR2; - } - - public void setMinR2(double minR2) { - this.minR2 = minR2; - } - public String getPhasing() { return phasing; } diff --git a/src/test/java/genepi/imputationserver/steps/ImputationTest.java b/src/test/java/genepi/imputationserver/steps/ImputationTest.java index 30972b90..5bb7f60e 100644 --- a/src/test/java/genepi/imputationserver/steps/ImputationTest.java +++ b/src/test/java/genepi/imputationserver/steps/ImputationTest.java @@ -315,7 +315,7 @@ public void testPipelineWithEagle() throws IOException, ZipException { assertEquals(true, file.isPhased()); assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps()); - int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1; + int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz"); assertEquals(snpInInfo, file.getNoSnps()); FileUtil.deleteDirectory("test-data/tmp"); @@ -358,10 +358,10 @@ public void testValidatePanelWithEagle() throws IOException, ZipException { VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false); VCFHeader header = reader.getFileHeader(); - assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue()); - assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue()); - assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("imputation").getValue()); - assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue()); + assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue()); + assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue()); + assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("mis_imputation").getValue()); + assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue()); FileUtil.deleteDirectory("test-data/tmp"); @@ -404,10 +404,10 @@ public void testValidatePanelWithBeagle() throws IOException, ZipException { VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false); VCFHeader header = reader.getFileHeader(); - assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue()); - assertEquals(ImputationPipeline.BEAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue()); - assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("imputation").getValue()); - assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue()); + assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue()); + assertEquals(ImputationPipeline.BEAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue()); + assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("mis_imputation").getValue()); + assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue()); FileUtil.deleteDirectory("test-data/tmp"); @@ -451,9 +451,9 @@ public void testValidatePanelPhasingOnly() throws IOException, ZipException { VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.phased.vcf.gz"), false); VCFHeader header = reader.getFileHeader(); - assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue()); - assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue()); - assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue()); + assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue()); + assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue()); + assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue()); FileUtil.deleteDirectory("test-data/tmp"); @@ -497,9 +497,9 @@ public void testValidatePanelPhasedInput() throws IOException, ZipException { VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false); VCFHeader header = reader.getFileHeader(); - assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue()); - assertEquals("n/a", header.getOtherHeaderLine("phasing").getValue()); - assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue()); + assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue()); + assertEquals("n/a", header.getOtherHeaderLine("mis_phasing").getValue()); + assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue()); // FileUtil.deleteDirectory("test-data/tmp"); @@ -611,7 +611,7 @@ public void testPipelineWithEagleAndScores() throws IOException, ZipException { assertEquals(true, file.isPhased()); assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps()); - int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1; + int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz"); assertEquals(snpInInfo, file.getNoSnps()); String[] args = { "test-data/tmp/chr20.dose.vcf.gz", "--ref", "PGS000018,PGS000027", "--out", @@ -704,7 +704,7 @@ public void testPipelineWithEagleAndScoresAndFormat() throws IOException, ZipExc assertEquals(true, file.isPhased()); assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps()); - int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1; + int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz"); assertEquals(snpInInfo, file.getNoSnps()); String[] args = { "test-data/tmp/chr20.dose.vcf.gz", "--ref", score1, "--out", "test-data/tmp/expected.txt" }; @@ -962,7 +962,7 @@ public void testPipelineWithEagleAndR2Filter() throws IOException, ZipException // TODO: update SNPS_WITH_R2_BELOW_05 assertTrue(TOTAL_REFPANEL_CHR20_B37 > file.getNoSnps()); - int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1; + int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz"); assertEquals(snpInInfo, file.getNoSnps()); FileUtil.deleteDirectory("test-data/tmp"); @@ -973,7 +973,12 @@ private int getLineCount(String filename) throws IOException { LineReader reader = new LineReader(filename); int lines = 0; while (reader.next()) { - lines++; + + String line = reader.get(); + { + if (!line.startsWith("#")) + lines++; + } } return lines; } @@ -1001,12 +1006,12 @@ private boolean checkSortPositionInfo(String filename) throws IOException { String line = reader.get(); - if (!line.startsWith("SNP")) { - String snp = line.split("\t")[0]; - if (Integer.valueOf(snp.split(":")[1]) <= pos) { + if (!line.startsWith("#")) { + String snp = line.split("\\s+")[1]; + if (Integer.valueOf(snp) <= pos) { return false; } - pos = Integer.valueOf(snp.split(":")[1]); + pos = Integer.valueOf(snp); } } @@ -1081,7 +1086,7 @@ public void testCompareInfoAndDosageSize() throws IOException, ZipException { // subtract header int infoCount = getLineCount("test-data/tmp/chr20.info.gz"); - assertEquals(infoCount - 1, file.getNoSnps()); + assertEquals(infoCount, file.getNoSnps()); FileUtil.deleteDirectory("test-data/tmp"); } diff --git a/test-data/configs/beagle/panels.txt b/test-data/configs/beagle/panels.txt index 9099b68c..e0c6f302 100644 --- a/test-data/configs/beagle/panels.txt +++ b/test-data/configs/beagle/panels.txt @@ -1,7 +1,7 @@ panels: - id: hapmap2 - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz refBeagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bref3 mapBeagle: ref-panels/plink.chr$chr.GRCh37.map diff --git a/test-data/configs/beagle/ref-panels/hapmap_r22.chr20.CEU.hg19.msav b/test-data/configs/beagle/ref-panels/hapmap_r22.chr20.CEU.hg19.msav new file mode 100644 index 00000000..88f63812 Binary files /dev/null and b/test-data/configs/beagle/ref-panels/hapmap_r22.chr20.CEU.hg19.msav differ diff --git a/test-data/configs/hapmap-3chr/panels.txt b/test-data/configs/hapmap-3chr/panels.txt index 6d7461e6..4eea42e8 100644 --- a/test-data/configs/hapmap-3chr/panels.txt +++ b/test-data/configs/hapmap-3chr/panels.txt @@ -1,7 +1,7 @@ panels: - id: hapmap2 - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -13,7 +13,7 @@ panels: mixed: Mixed - id: hapmap2-qcfilter-strandflips - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -27,7 +27,7 @@ panels: strandFlips: -1 - id: hapmap2-qcfilter-ref-overlap - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -43,7 +43,7 @@ panels: minSnps: 1000 - id: hapmap2-qcfilter-min-snps - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -58,7 +58,7 @@ panels: minSnps: 1000 - id: hapmap2-qcfilter-low-callrate - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf diff --git a/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr1.CEU.hg19.msav b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr1.CEU.hg19.msav new file mode 100644 index 00000000..fcf93795 Binary files /dev/null and b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr1.CEU.hg19.msav differ diff --git a/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr2.CEU.hg19.msav b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr2.CEU.hg19.msav new file mode 100644 index 00000000..01774555 Binary files /dev/null and b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr2.CEU.hg19.msav differ diff --git a/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr3.CEU.hg19.msav b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr3.CEU.hg19.msav new file mode 100644 index 00000000..a9d58b4b Binary files /dev/null and b/test-data/configs/hapmap-3chr/ref-panels/hapmap_r22.chr3.CEU.hg19.msav differ diff --git a/test-data/configs/hapmap-chr1/panels.txt b/test-data/configs/hapmap-chr1/panels.txt index c8c8f642..a6552542 100644 --- a/test-data/configs/hapmap-chr1/panels.txt +++ b/test-data/configs/hapmap-chr1/panels.txt @@ -1,7 +1,7 @@ panels: - id: hapmap2 - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -13,7 +13,7 @@ panels: mixed: Mixed - id: hrc-fake - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -25,7 +25,7 @@ panels: mixed: Mixed - id: phase3-fake - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -45,7 +45,7 @@ panels: mixed: Mixed - id: TOPMedfreeze6-fake - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -66,7 +66,7 @@ panels: - id: hapmap2-region-simple - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf @@ -79,7 +79,7 @@ panels: range: 1:565111-752566 - id: hapmap2-region-complex - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_chr1.txt refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf diff --git a/test-data/configs/hapmap-chr1/ref-panels/hapmap_r22.chr1.CEU.hg19.msav b/test-data/configs/hapmap-chr1/ref-panels/hapmap_r22.chr1.CEU.hg19.msav new file mode 100644 index 00000000..62a915f4 Binary files /dev/null and b/test-data/configs/hapmap-chr1/ref-panels/hapmap_r22.chr1.CEU.hg19.msav differ diff --git a/test-data/configs/hapmap-chr20-hg38/panels.txt b/test-data/configs/hapmap-chr20-hg38/panels.txt index 1bf29b2d..7dc3cae8 100644 --- a/test-data/configs/hapmap-chr20-hg38/panels.txt +++ b/test-data/configs/hapmap-chr20-hg38/panels.txt @@ -1,7 +1,7 @@ panels: - id: hapmap2 - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg38.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg38.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg38_impute.legend.gz mapEagle: ref-panels/genetic_map_hg38_withX.txt.gz refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg38.bcf diff --git a/test-data/configs/hapmap-chr20-hg38/ref-panels/hapmap_r22.chr20.CEU.hg38.msav b/test-data/configs/hapmap-chr20-hg38/ref-panels/hapmap_r22.chr20.CEU.hg38.msav new file mode 100644 index 00000000..12ca8d65 Binary files /dev/null and b/test-data/configs/hapmap-chr20-hg38/ref-panels/hapmap_r22.chr20.CEU.hg38.msav differ diff --git a/test-data/configs/hapmap-chr20/panels.txt b/test-data/configs/hapmap-chr20/panels.txt index 5679b82b..710d07a5 100644 --- a/test-data/configs/hapmap-chr20/panels.txt +++ b/test-data/configs/hapmap-chr20/panels.txt @@ -1,7 +1,7 @@ panels: - id: hapmap2 - hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_withX.txt.gz refEagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bcf diff --git a/test-data/configs/hapmap-chr20/ref-panels/hapmap_r22.chr20.CEU.hg19.msav b/test-data/configs/hapmap-chr20/ref-panels/hapmap_r22.chr20.CEU.hg19.msav new file mode 100644 index 00000000..8267f99d Binary files /dev/null and b/test-data/configs/hapmap-chr20/ref-panels/hapmap_r22.chr20.CEU.hg19.msav differ diff --git a/test-data/configs/hapmap-chrX-hg38/panels.txt b/test-data/configs/hapmap-chrX-hg38/panels.txt index 216b690f..f16cfc05 100644 --- a/test-data/configs/hapmap-chrX-hg38/panels.txt +++ b/test-data/configs/hapmap-chrX-hg38/panels.txt @@ -1,7 +1,7 @@ panels: - id: hapmap2 - hdfs: ref-panels/$chr.1000g.Phase1.v3.With.Parameter.Estimates.hg38.m3vcf.gz + hdfs: ref-panels/$chr.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav legend: ref-panels/1000g_chrX_impute.hg38.legend.gz mapEagle: ref-panels/genetic_map_hg38_withX.txt.gz refEagle: ref-panels/ALL.$chr.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.hg38.bcf diff --git a/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav new file mode 100644 index 00000000..37c4a3f3 Binary files /dev/null and b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav differ diff --git a/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav new file mode 100644 index 00000000..46cc3120 Binary files /dev/null and b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav differ diff --git a/test-data/configs/hapmap-chrX-hg38/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav new file mode 100644 index 00000000..3989f9e0 Binary files /dev/null and b/test-data/configs/hapmap-chrX-hg38/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.hg38.msav differ diff --git a/test-data/configs/hapmap-chrX/panels.txt b/test-data/configs/hapmap-chrX/panels.txt index 8769c7b2..cc3ea964 100644 --- a/test-data/configs/hapmap-chrX/panels.txt +++ b/test-data/configs/hapmap-chrX/panels.txt @@ -1,7 +1,7 @@ panels: - id: phase1 - hdfs: ref-panels/$chr.1000g.Phase1.v3.With.Parameter.Estimates.m3vcf.gz + hdfs: ref-panels/$chr.1000g.Phase1.v3.With.Parameter.Estimates.msav legend: ref-panels/1000g_chr$chr_impute.legend.gz mapEagle: ref-panels/genetic_map_hg19_withX.txt.gz refEagle: ref-panels/ALL.chr$chr.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.bcf diff --git a/test-data/configs/hapmap-chrX/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.msav b/test-data/configs/hapmap-chrX/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.msav new file mode 100644 index 00000000..272cad05 Binary files /dev/null and b/test-data/configs/hapmap-chrX/ref-panels/X.PAR1.1000g.Phase1.v3.With.Parameter.Estimates.msav differ diff --git a/test-data/configs/hapmap-chrX/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.msav b/test-data/configs/hapmap-chrX/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.msav new file mode 100644 index 00000000..ecd532ec Binary files /dev/null and b/test-data/configs/hapmap-chrX/ref-panels/X.PAR2.1000g.Phase1.v3.With.Parameter.Estimates.msav differ diff --git a/test-data/configs/hapmap-chrX/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.msav b/test-data/configs/hapmap-chrX/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.msav new file mode 100644 index 00000000..71f9a0c9 Binary files /dev/null and b/test-data/configs/hapmap-chrX/ref-panels/X.nonPAR.1000g.Phase1.v3.With.Parameter.Estimates.msav differ diff --git a/test-data/configs/phylotree-chrMT/panels.txt b/test-data/configs/phylotree-chrMT/panels.txt index 0677f49e..86aceed1 100644 --- a/test-data/configs/phylotree-chrMT/panels.txt +++ b/test-data/configs/phylotree-chrMT/panels.txt @@ -1,6 +1,6 @@ panels: - id: phylotree - hdfs: ref-panels/chrMT.phylotree17.m3vcf.gz + hdfs: ref-panels/chrMT.phylotree17.msav legend: ref-panels/chrMT.phylotree17.legend.gz mapEagle: ref-panels/genetic_map_hg19_withX.txt.gz samples: diff --git a/test-data/configs/phylotree-chrMT/ref-panels/chrMT.phylotree17.msav b/test-data/configs/phylotree-chrMT/ref-panels/chrMT.phylotree17.msav new file mode 100644 index 00000000..3e9db6f4 Binary files /dev/null and b/test-data/configs/phylotree-chrMT/ref-panels/chrMT.phylotree17.msav differ diff --git a/test-data/data/chr20-phased-hg38/chr20.R50.merged.1.330k.recode.small.hg38.vcf.gz b/test-data/data/chr20-phased-hg38/chr20.R50.merged.1.330k.recode.small.hg38.vcf.gz index 48d22d5c..c946eff9 100644 Binary files a/test-data/data/chr20-phased-hg38/chr20.R50.merged.1.330k.recode.small.hg38.vcf.gz and b/test-data/data/chr20-phased-hg38/chr20.R50.merged.1.330k.recode.small.hg38.vcf.gz differ diff --git a/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz b/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz index 832668f3..61d81f4a 100644 Binary files a/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz and b/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz differ diff --git a/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz.tbi b/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz.tbi index 95fb4fd0..1d0eaf81 100644 Binary files a/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz.tbi and b/test-data/data/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz.tbi differ diff --git a/test-data/data/chr20-unphased-hg38/chr20.R50.merged.1.330k.recode.unphased.small.hg38.vcf.gz b/test-data/data/chr20-unphased-hg38/chr20.R50.merged.1.330k.recode.unphased.small.hg38.vcf.gz index 75ff749b..58bd6476 100644 Binary files a/test-data/data/chr20-unphased-hg38/chr20.R50.merged.1.330k.recode.unphased.small.hg38.vcf.gz and b/test-data/data/chr20-unphased-hg38/chr20.R50.merged.1.330k.recode.unphased.small.hg38.vcf.gz differ diff --git a/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz b/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz index 6d728f34..21368b85 100644 Binary files a/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz and b/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz differ diff --git a/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz.tbi b/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz.tbi index 24eeeeea..6f0a5cc5 100644 Binary files a/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz.tbi and b/test-data/data/chr20-unphased/chr20.R50.merged.1.330k.recode.unphased.small.vcf.gz.tbi differ