Skip to content

Commit

Permalink
Merge pull request #1369 from broadinstitute/gg_GL-53_ContaminationQc…
Browse files Browse the repository at this point in the history
…Metric

CreateVerifyIDIntensityContaminationMetricsFile
  • Loading branch information
gbggrant authored Aug 2, 2019
2 parents 8d55ae2 + 8b3de6b commit 69cf11c
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* The MIT License
*
* Copyright (c) 2019 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.arrays;

import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* A simple program to create a standard picard metrics file
* from the output of VerifyIDIntensity
*/
@CommandLineProgramProperties(
summary = CreateVerifyIDIntensityContaminationMetricsFile.USAGE_DETAILS,
oneLineSummary = "Program to generate a picard metrics file from the output of the VerifyIDIntensity tool.",
programGroup = picard.cmdline.programgroups.GenotypingArraysProgramGroup.class
)

@DocumentedFeature
public class CreateVerifyIDIntensityContaminationMetricsFile extends CommandLineProgram {
static final String USAGE_DETAILS =
"CreateVerifyIDIntensityContaminationMetricsFile takes an output file as generated by the VerifyIDIntensity tool and creates a picard metrics file. " +
"VerifyIDIntensity <a href='https://genome.sph.umich.edu/wiki/VerifyIDintensity'>VerifyIDintensity</a> is a tool for " +
"detecting and estimating sample contamination of Illumina genotyping array data." +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar CreateVerifyIDIntensityContaminationMetricsFile \\<br />" +
" INPUT=VerifyIDIntensityOutput.txt \\<br />" +
" OUTPUT=outputBaseName" +
"</pre>";

@Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The output of VerifyIDIntensity(typically captured stdout).")
public File INPUT;

@Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Basename for the metrics file that will be written." +
" Resulting file will be <OUTPUT>." + FILE_EXTENSION)
public File OUTPUT;

public static final String FILE_EXTENSION = "verifyidintensity_metrics";

@Override
protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
final File metricsFile = new File(OUTPUT + "." + FILE_EXTENSION);
IOUtil.assertFileIsWritable(metricsFile);

final MetricsFile<VerifyIDIntensityContaminationMetrics, ?> verifyIDIntensityContaminationMetricsFile = getMetricsFile();

final Pattern HEADER_PATTERN = Pattern.compile("^ID\\s+%Mix\\s+LLK\\s+LLK0\\s*$");
final Pattern DASHES_PATTERN = Pattern.compile("^[-]+$");
final Pattern DATA_PATTERN = Pattern.compile("^(\\d+)\\s+(\\d*\\.\\d+)\\s+(\\d+)\\s+(\\d+)\\s*$");
try (BufferedReader br = new BufferedReader(new FileReader(INPUT))) {
String line;
line = br.readLine();
lineMatch(line, HEADER_PATTERN);
line = br.readLine();
lineMatch(line, DASHES_PATTERN);
while ((line = br.readLine()) != null) {
final Matcher m = lineMatch(line, DATA_PATTERN);
// Load up and store the metrics
final VerifyIDIntensityContaminationMetrics metrics = new VerifyIDIntensityContaminationMetrics();
metrics.ID = Integer.parseInt(m.group(1));
metrics.PCT_MIX = Double.parseDouble(m.group(2));
metrics.LLK = Long.parseLong(m.group(3));
metrics.LLK0 = Long.parseLong(m.group(4));

verifyIDIntensityContaminationMetricsFile.addMetric(metrics);
}
verifyIDIntensityContaminationMetricsFile.write(metricsFile);
} catch (IOException e) {
throw new PicardException("Error parsing VerifyIDIntensity Output", e);
}
return 0;
}

private Matcher lineMatch(final String line, final Pattern patternToMatch) {
Matcher m = patternToMatch.matcher(line);
if (!m.matches()) {
throw new PicardException("Unrecognized line: " + line + " in " + INPUT.getAbsolutePath());
}
return m;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package picard.arrays;

import htsjdk.samtools.metrics.MetricBase;

public class VerifyIDIntensityContaminationMetrics extends MetricBase {
/** The ID of this entry */
public int ID;

/** The percent mixture (contamination) of the sample for ID */
public double PCT_MIX;

/** The log likelihood */
public long LLK;

/** The log likelihood 0 */
public long LLK0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package picard.arrays;

import htsjdk.samtools.metrics.MetricsFile;
import org.testng.Assert;
import org.testng.annotations.Test;
import picard.PicardException;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

public class CreateVerifyIDIntensityContaminationMetricsFileTest {

private static final File TEST_DATA_DIR = new File("testdata/picard/arrays");
private static final File TEST_INPUT_FILE = new File(TEST_DATA_DIR, "VerifyIDIntensity.txt");
private static final File TEST_BAD_INPUT_FILE = new File(TEST_DATA_DIR, "input.vcf");

@Test
public void testCreateVerifyIDIntensityContaminationMetricsFile() throws IOException {
final File outputBase = File.createTempFile("testCreateVerifyIDIntensityContaminationMetricsFile", "");
final File output = new File(outputBase.getAbsolutePath() + "." + CreateVerifyIDIntensityContaminationMetricsFile.FILE_EXTENSION);
output.deleteOnExit();
System.out.println(outputBase.getAbsolutePath());
System.out.println(output.getAbsolutePath());

final CreateVerifyIDIntensityContaminationMetricsFile createVerifyIDIntensityContaminationMetricsFile = new CreateVerifyIDIntensityContaminationMetricsFile();
createVerifyIDIntensityContaminationMetricsFile.INPUT = TEST_INPUT_FILE;
createVerifyIDIntensityContaminationMetricsFile.OUTPUT = outputBase;

Assert.assertEquals(createVerifyIDIntensityContaminationMetricsFile.instanceMain(new String[0]), 0);

final MetricsFile<VerifyIDIntensityContaminationMetrics, Comparable<?>> metrics = new MetricsFile<>();
metrics.read(new FileReader(output));

Assert.assertEquals(metrics.getMetrics().size(), 2);
Assert.assertEquals(metrics.getMetrics().get(0).ID, 0);
Assert.assertEquals(metrics.getMetrics().get(0).PCT_MIX, 0.214766);
Assert.assertEquals(metrics.getMetrics().get(0).LLK, 157575);
Assert.assertEquals(metrics.getMetrics().get(0).LLK0, 177169);

Assert.assertEquals(metrics.getMetrics().get(1).ID, 1);
Assert.assertEquals(metrics.getMetrics().get(1).PCT_MIX, 0.214767);
Assert.assertEquals(metrics.getMetrics().get(1).LLK, 157576);
Assert.assertEquals(metrics.getMetrics().get(1).LLK0, 177170);
}

@Test(expectedExceptions = PicardException.class)
public void testFailCreateVerifyIDIntensityContaminationMetricsFile() throws IOException {
final File outputBase = File.createTempFile("testFailCreateVerifyIDIntensityContaminationMetricsFile", "");
final File output = new File(outputBase.getAbsolutePath() + "." + CreateVerifyIDIntensityContaminationMetricsFile.FILE_EXTENSION);
output.deleteOnExit();
System.out.println(outputBase.getAbsolutePath());
System.out.println(output.getAbsolutePath());

final CreateVerifyIDIntensityContaminationMetricsFile createVerifyIDIntensityContaminationMetricsFile = new CreateVerifyIDIntensityContaminationMetricsFile();
createVerifyIDIntensityContaminationMetricsFile.INPUT = TEST_BAD_INPUT_FILE;
createVerifyIDIntensityContaminationMetricsFile.OUTPUT = outputBase;

Assert.assertEquals(createVerifyIDIntensityContaminationMetricsFile.instanceMain(new String[0]), 1);
}
}
4 changes: 4 additions & 0 deletions testdata/picard/arrays/VerifyIDIntensity.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ID %Mix LLK LLK0
-----------------------------------------------------------------
0 0.214766 157575 177169
1 0.214767 157576 177170

0 comments on commit 69cf11c

Please sign in to comment.