Skip to content

Commit

Permalink
Bugfix/custom alt column 4938 (#571)
Browse files Browse the repository at this point in the history
* making alt column optional

* added unit test for no ALT field

* fixing test name

* fixing the position for padded intervals
  • Loading branch information
rajatshuvro authored and GitHub Enterprise committed Nov 10, 2020
1 parent 7c5ce81 commit 984eb04
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 17 deletions.
11 changes: 6 additions & 5 deletions SAUtils/Custom/VariantAnnotationsParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ internal void ParseHeaderLines()
DataSourceDescription = value;
break;
default:
var e = new UserErrorException("Unexpected header tag observed");
var e = new UserErrorException("Unexpected header tag observed:"+value);
e.Data[ExitCodeUtilities.Line] = line;
throw e;
}
Expand Down Expand Up @@ -261,15 +261,16 @@ internal CustomItem ExtractItems(string line)

if (IsInterval(splits))
{
var jsonStringValues = new List<string> { splits[1], splits[_endColumnIndex] };


if (!int.TryParse(splits[_endColumnIndex], out var end))
throw new UserErrorException($"END is not an integer.\nInput line: {line}.");

jsonStringValues.AddRange(annotationValues);
//for symbolic alleles, position needs to increment to account for the padding base
if (IsSymbolicAllele(splits[_altColumnIndex]))
if (_altColumnIndex >=0 && IsSymbolicAllele(splits[_altColumnIndex]))
position++;

var jsonStringValues = new List<string> { position.ToString(), splits[_endColumnIndex] };
jsonStringValues.AddRange(annotationValues);
_intervals.Add(new CustomInterval(chrom, position, end, jsonStringValues.Select(x => new[] { x }).ToList(), IntervalJsonSchema, line));
return null;
}
Expand Down
27 changes: 18 additions & 9 deletions SAUtils/Custom/VariantMain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,16 @@ private static ExitCodes ProgramExecution()
var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

List<CustomInterval> intervals;
SaJsonSchema intervalJsonSchema;
string jsonTag;
DataSourceVersion version;
string outputPrefix = GetOutputPrefix(_inputFile);
string nsaFileName = Path.Combine(_outputDirectory, outputPrefix + SaCommon.SaFileSuffix);
string nsaIndexFileName = nsaFileName + SaCommon.IndexSufix;
string nsaSchemaFileName = nsaFileName + SaCommon.JsonSchemaSuffix;
ReportFor reportFor;
SaJsonSchema intervalJsonSchema;
string jsonTag;
DataSourceVersion version;
string outputPrefix = GetOutputPrefix(_inputFile);
string nsaFileName = Path.Combine(_outputDirectory, outputPrefix + SaCommon.SaFileSuffix);
string nsaIndexFileName = nsaFileName + SaCommon.IndexSufix;
string nsaSchemaFileName = nsaFileName + SaCommon.JsonSchemaSuffix;
ReportFor reportFor;

var nsaItemCount = 0;

using (var parser = VariantAnnotationsParser.Create(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))
using (var nsaStream = FileUtilities.GetCreateStream(nsaFileName))
Expand All @@ -76,11 +78,18 @@ private static ExitCodes ProgramExecution()
using (var saJsonSchemaStream = FileUtilities.GetCreateStream(nsaSchemaFileName))
using (var schemaWriter = new StreamWriter(saJsonSchemaStream))
{
(jsonTag, _, intervalJsonSchema, intervals) = CaUtilities.WriteSmallVariants(parser, nsaWriter, schemaWriter);
(jsonTag, nsaItemCount, intervalJsonSchema, intervals) = CaUtilities.WriteSmallVariants(parser, nsaWriter, schemaWriter);
reportFor = parser.ReportFor;
if (intervals == null) return ExitCodes.Success;
}

if (nsaItemCount == 0)
{
File.Delete(nsaFileName);
File.Delete(nsaIndexFileName);
File.Delete(nsaSchemaFileName);
}

using (var nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix)))
using (var nsiWriter = CaUtilities.GetNsiWriter(nsiStream, version, referenceProvider.Assembly, jsonTag, reportFor))
using (var siJsonSchemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix + SaCommon.JsonSchemaSuffix)))
Expand Down
4 changes: 2 additions & 2 deletions SAUtils/Properties/launchSettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@
},
"RR_custom": {
"commandName": "Project",
"commandLineArgs": " CustomVar --ref /Users/rroy1/Development/References/7/Homo_sapiens.GRCh37.Nirvana.dat --out /Users/rroy1/Development/SupplementaryDatabase/59/GRCh37/custom --in Polaris_SV.OLYM.v4.tsv",
"workingDirectory": "/Users/rroy1/Development/ExternalDataSources/Custom/GRCh37/"
"commandLineArgs": " CustomVar --ref /Users/rroy1/Development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --out /Users/rroy1/Development/SupplementaryDatabase/60/GRCh38/ --in MyDataSource3.tsv",
"workingDirectory": "/Users/rroy1/Development/ExternalDataSources/Custom/GRCh38/"
},
"RR_globalAllele": {
"commandName": "Project",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,50 @@ public void GetItems()
Assert.Equal("\"refAllele\":\"C\",\"altAllele\":\"A\",\"allAc\":53,\"allAn\":8928,\"allAf\":0.001421", items[1].GetJsonString());
}
}

[Fact]
public void GetIntervals_noALT()
{
const string text = "#title=IcslAlleleFrequencies\n" +
"#assembly=GRCh38\n" +
"#matchVariantsBy=allele\n" +
"#CHROM\tPOS\tREF\tEND\tnotes\n" +
"#categories\t.\t.\t.\t.\n" +
"#descriptions\t.\t.\t.\t.\n" +
"#type\t.\t.\t.\tstring\n" +
"chr16\t20000000\tT\t70000000\tLots of false positives in this region";
using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
{
var items = custParser.GetItems().ToArray();
Assert.Empty(items);
var intervals = custParser.GetCustomIntervals();
Assert.Single(intervals);
Assert.Equal("\"start\":20000000,\"end\":70000000,\"notes\":\"Lots of false positives in this region\"", intervals[0].GetJsonString());
}
}

[Fact]
public void GetIntervals_start()
{
const string text = "#title=IcslAlleleFrequencies\n" +
"#assembly=GRCh38\n" +
"#matchVariantsBy=allele\n" +
"#CHROM\tPOS\tREF\tALT\tEND\tnotes\n" +
"#categories\t.\t.\t.\t.\t.\n" +
"#descriptions\t.\t.\t.\t.\t.\n" +
"#type\t.\t.\t.\t.\tstring\n" +
"chr21\t10510818\tT\t.\t10699435\tinterval 1\n"+
"chr21\t10510818\tT\t<DEL>\t10699435\tinterval 2";
using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
{
var items = custParser.GetItems().ToArray();
Assert.Empty(items);
var intervals = custParser.GetCustomIntervals();
Assert.Equal(2,intervals.Count);
Assert.Equal("\"start\":10510818,\"end\":10699435,\"notes\":\"interval 1\"", intervals[0].GetJsonString());
Assert.Equal("\"start\":10510819,\"end\":10699435,\"notes\":\"interval 2\"", intervals[1].GetJsonString());
}
}

[Fact]
public void GetItems_OnlyAlleleFrequencyTreatedAsDouble_OtherNumbersPrintAsIs()
Expand Down Expand Up @@ -287,7 +331,7 @@ public void GetIntervals()

var intervals = custParser.GetCustomIntervals();
Assert.Single(intervals);
Assert.Equal("\"start\":46993,\"end\":50879,\"allAc\":50,\"allAn\":250,\"allAf\":0.001,\"pathogenicity\":\"benign\"", intervals[0].GetJsonString());
Assert.Equal("\"start\":46994,\"end\":50879,\"allAc\":50,\"allAn\":250,\"allAf\":0.001,\"pathogenicity\":\"benign\"", intervals[0].GetJsonString());
}
}

Expand Down

0 comments on commit 984eb04

Please sign in to comment.