Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Stromberg committed Apr 10, 2018
2 parents 0738f88 + 493cdd7 commit 122cf57
Show file tree
Hide file tree
Showing 12 changed files with 167 additions and 77 deletions.
6 changes: 3 additions & 3 deletions CacheUtils/CacheUtils.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
</Content>
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Configuration" Version="2.0.0" />
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="2.0.0" />
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="2.0.0" />
<PackageReference Include="Microsoft.Extensions.Configuration" Version="2.0.1" />
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="2.0.1" />
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="2.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\CommandLine\CommandLine.csproj" />
Expand Down
2 changes: 1 addition & 1 deletion Jasix/Jasix.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<DebugType>Full</DebugType>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Newtonsoft.Json" Version="11.0.1" />
<PackageReference Include="Newtonsoft.Json" Version="11.0.2" />
<ProjectReference Include="..\CommandLine\CommandLine.csproj" />
<ProjectReference Include="..\Compression\Compression.csproj" />
<ProjectReference Include="..\ErrorHandling\ErrorHandling.csproj" />
Expand Down
35 changes: 21 additions & 14 deletions Nirvana/Nirvana.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
using Phantom.Workers;
using VariantAnnotation;
using VariantAnnotation.Interface;
using VariantAnnotation.Interface.AnnotatedPositions;
using VariantAnnotation.Interface.GeneAnnotation;
using VariantAnnotation.Interface.IO;
using VariantAnnotation.Interface.Plugins;
using VariantAnnotation.Interface.Positions;
using VariantAnnotation.Interface.Providers;
Expand Down Expand Up @@ -56,7 +58,7 @@ private ExitCodes ProgramExecution()
var plugins = PluginUtilities.LoadPlugins(_pluginDirectory);
var annotator = ProviderUtilities.GetAnnotator(transcriptAnnotationProvider, sequenceProvider, saProvider, conservationProvider, geneAnnotationProvider, plugins);
var recomposer = _disableRecomposition ? new NullRecomposer() : Recomposer.Create(sequenceProvider, _inputCachePrefix);
var logger = _outputFileName == "" ? (ILogger)new NullLogger() : new ConsoleLogger();
var logger = _outputFileName == "-" ? (ILogger) new NullLogger() : new ConsoleLogger();
var metrics = new PerformanceMetrics(logger);

var dataSourceVersions = GetDataSourceVersions(plugins, transcriptAnnotationProvider, saProvider,
Expand All @@ -72,7 +74,7 @@ private ExitCodes ProgramExecution()
using (var gvcfWriter = _gvcf ? new LiteVcfWriter(ReadWriteUtilities.GetGvcfOutputWriter(_outputFileName), vcfReader.GetHeaderLines(), _annotatorVersionTag, vepDataVersion, dataSourceVersions) : null)
using (var jasixIndexCreator = new OnTheFlyIndexCreator(FileUtilities.GetCreateStream(jasixFileName)))
{
var bgzipTextWriter = outputWriter as BgzipTextWriter;
if (!(outputWriter is BgzipTextWriter bgzipTextWriter)) throw new NullReferenceException("Unable to create the bgzip text writer.");

try
{
Expand All @@ -94,19 +96,10 @@ private ExitCodes ProgramExecution()

var annotatedPosition = annotator.Annotate(position);

var jsonOutput = annotatedPosition.GetJsonString();
if (jsonOutput != null)
{
if (bgzipTextWriter != null)
jasixIndexCreator.Add(annotatedPosition.Position, bgzipTextWriter.Position);
}
jsonWriter.WriteJsonEntry(jsonOutput);
string json = annotatedPosition.GetJsonString();

if (annotatedPosition.AnnotatedVariants?.Length > 0) vcfWriter?.Write(_conversion.Convert(annotatedPosition));

gvcfWriter?.Write(annotatedPosition.AnnotatedVariants?.Length > 0
? _conversion.Convert(annotatedPosition)
: string.Join("\t", position.VcfFields));
if (json != null) WriteOutput(annotatedPosition, bgzipTextWriter.Position, jasixIndexCreator, jsonWriter, vcfWriter, gvcfWriter, json);
else gvcfWriter?.Write(string.Join("\t", position.VcfFields));

metrics.Increment();
}
Expand All @@ -125,6 +118,20 @@ private ExitCodes ProgramExecution()
return ExitCodes.Success;
}

private void WriteOutput(IAnnotatedPosition annotatedPosition, long textWriterPosition,
OnTheFlyIndexCreator jasixIndexCreator, IJsonWriter jsonWriter, LiteVcfWriter vcfWriter,
LiteVcfWriter gvcfWriter, string jsonOutput)
{
jasixIndexCreator.Add(annotatedPosition.Position, textWriterPosition);
jsonWriter.WriteJsonEntry(jsonOutput);

if (vcfWriter == null && gvcfWriter == null || annotatedPosition.Position.IsRecomposed) return;

string vcfLine = _conversion.Convert(annotatedPosition);
vcfWriter?.Write(vcfLine);
gvcfWriter?.Write(vcfLine);
}

private static List<IDataSourceVersion> GetDataSourceVersions(IEnumerable<IPlugin> plugins,
params IProvider[] providers)
{
Expand Down
4 changes: 2 additions & 2 deletions Phantom/DataStructures/PositionSet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,13 @@ internal int[][] GetSampleTagIndexes(string[] tagsToExtract)

internal static string ExtractSamplePhaseSet(int phaseSetTagIndex, string[] sampleInfo)
{
if (phaseSetTagIndex == -1) return ".";
if (phaseSetTagIndex == -1 || sampleInfo.Length <= phaseSetTagIndex) return ".";
if (sampleInfo.Length == 1 && sampleInfo[0] == ".") return ".";
var phaseSet = sampleInfo[phaseSetTagIndex];
return phaseSet;
}

internal static string ExtractSampleGq(int gqTagIndex, string[] sampleInfo) => (gqTagIndex == -1) ? "." : sampleInfo[gqTagIndex];
internal static string ExtractSampleGq(int gqTagIndex, string[] sampleInfo) => gqTagIndex == -1 || sampleInfo.Length <= gqTagIndex ? "." : sampleInfo[gqTagIndex];

private static Dictionary<(string Genotypes, int Start), List<int>> GetGenotypeToSampleIndex(PositionSet positionSet)
{
Expand Down
2 changes: 1 addition & 1 deletion Phantom/Phantom.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
<DefineConstants>TRACE;RELEASE;NETCOREAPP2_0</DefineConstants>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Newtonsoft.Json" Version="11.0.1-beta1" />
<PackageReference Include="Newtonsoft.Json" Version="11.0.2" />
<ProjectReference Include="..\CommandLine\CommandLine.csproj" />
<ProjectReference Include="..\Compression\Compression.csproj" />
<ProjectReference Include="..\ErrorHandling\ErrorHandling.csproj" />
Expand Down
80 changes: 69 additions & 11 deletions Phantom/Workers/VariantGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ private static VariantInfo GetVariantInfo(PositionSet positionSet, AlleleIndexBl
string[] psValues = new string[numSamples];
for (int i = 0; i < numSamples; i++)
// PS tags are the same in the decomposed variants
psValues[i] = positionSet.PsInfo[i][startIndex];
psValues[i] = positionSet.PsInfo[i][startIndex];

return new VariantInfo(qual, filter, gqValues, psValues);
}
Expand Down Expand Up @@ -171,7 +171,7 @@ public sealed class VariantInfo
public readonly string[] SamplePhaseSets;
public readonly Dictionary<string, List<SampleAllele>> AltAlleleToSample = new Dictionary<string, List<SampleAllele>>();

public VariantInfo(string qual, string filter, string[] sampleGqs, string[]samplePhaseSets)
public VariantInfo(string qual, string filter, string[] sampleGqs, string[] samplePhaseSets)
{
Qual = qual;
Filter = filter;
Expand All @@ -194,7 +194,6 @@ internal sealed class RecomposedAlleleSet
private readonly string _chrName;
private const string VariantId = ".";
private const string InfoTag = "RECOMPOSED";
private const string FormatTag = "GT:GQ:PS";


public RecomposedAlleleSet(string chrName, int numSamples)
Expand Down Expand Up @@ -257,7 +256,7 @@ private void SetGenotypeWithAlleleIndex(List<int> sampleGenotype, byte sampleAll
sampleGenotype[sampleAlleleAlleleIndex] = currentGenotypeIndex;
}

private string[] GetVcfFields(VariantSite varSite, string altAlleleColumn, string qual, string filter, List<int>[] sampleGenoTypes, string[] sampleGqs, string[] samplePhasesets, string variantId = VariantId, string info = InfoTag, string format = FormatTag)
private string[] GetVcfFields(VariantSite varSite, string altAlleleColumn, string qual, string filter, List<int>[] sampleGenoTypes, string[] sampleGqs, string[] samplePhasesets, string variantId = VariantId, string info = InfoTag)
{
var vcfFields = new List<string>
{
Expand All @@ -268,20 +267,79 @@ private string[] GetVcfFields(VariantSite varSite, string altAlleleColumn, strin
altAlleleColumn,
qual,
filter,
info,
format
info
};

for (var index = 0; index < sampleGenoTypes.Length; index++)
AddFormatAndSampleColumns(sampleGenoTypes, sampleGqs, samplePhasesets, ref vcfFields);
return vcfFields.ToArray();
}

private static void AddFormatAndSampleColumns(List<int>[] sampleGenoTypes, string[] sampleGqs, string[] samplePhasesets, ref List<string> vcfFields)
{
var formatTags = "GT";
var hasGq = false;
var hasPs = false;
int numSamples = sampleGenoTypes.Length;

var sampleGenotypeStrings = new string[numSamples];
for (var index = 0; index < numSamples; index++)
{
sampleGenotypeStrings[index] = GetGenotype(sampleGenoTypes[index]);
if (sampleGenotypeStrings[index] == ".") continue;
if (sampleGqs[index] != ".") hasGq = true;
if (samplePhasesets[index] != ".") hasPs = true;
if (hasGq && hasPs) break;
}

int numFields = 1;

if (hasGq)
{
formatTags += ":GQ";
numFields++;
}
if (hasPs)
{
formatTags += ":PS";
numFields++;
}

vcfFields.Add(formatTags);

for (var index = 0; index < numSamples; index++)
{
var sampleGenotypeStr = GetGenotype(sampleGenoTypes[index]);
if (sampleGenotypeStr == ".") vcfFields.Add(".:.:.");
var sampleGenotypeStr = sampleGenotypeStrings[index];
if (sampleGenotypeStr == ".") vcfFields.Add(".");
else
{
vcfFields.Add(sampleGenotypeStr + ":" + sampleGqs[index] + ":" + samplePhasesets[index]);
var nonMissingFields = new string[numFields];
nonMissingFields[0] = sampleGenotypeStr;
var fieldIndex = 1;
if (hasGq)
{
nonMissingFields[fieldIndex] = sampleGqs[index];
fieldIndex++;
}
if (hasPs)
{
nonMissingFields[fieldIndex] = samplePhasesets[index];
}

var sampleColumnStr = string.Join(":", TrimTrailingMissValues(nonMissingFields));
vcfFields.Add(sampleColumnStr);
}
}
return vcfFields.ToArray();
}

private static string[] TrimTrailingMissValues(string[] values)
{
int indexLastRemainedValue = values.Length - 1;
// Need to have at least one value remained
for (; indexLastRemainedValue > 0; indexLastRemainedValue--)
{
if (values[indexLastRemainedValue] != ".") break;
}
return new ArraySegment<string>(values, 0, indexLastRemainedValue + 1).ToArray();
}

private static string GetGenotype(List<int> sampleGenotype) => sampleGenotype.Count == 0 ? "." : string.Join("|", sampleGenotype);
Expand Down
2 changes: 1 addition & 1 deletion UnitTests/Phantom/Workers/PositionProcessorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ public void GenerateOutput_Return_OriginalAndRecomposed_VcfFieldList()
var expectedOutput = new string[4][];
expectedOutput[0] = position1.VcfFields;
expectedOutput[1] = new[]
{"chr1", "2", ".", "AGCTG", "AGGTG,TGGTC", ".", "PASS", "RECOMPOSED", "GT:GQ:PS", "1|2:.:."};
{"chr1", "2", ".", "AGCTG", "AGGTG,TGGTC", ".", "PASS", "RECOMPOSED", "GT", "1|2"};
expectedOutput[2] = position2.VcfFields;
expectedOutput[3] = position3.VcfFields;

Expand Down
Loading

0 comments on commit 122cf57

Please sign in to comment.