From d28f8034bf9ec08f32e3300f4fd55f95eaf793db Mon Sep 17 00:00:00 2001 From: "Stromberg, Michael" Date: Thu, 29 Mar 2018 13:49:30 -0700 Subject: [PATCH 1/3] Fixed the variant frequency calculation for reference sites when using allele counts. (#161) --- UnitTests/Vcf/Samples/SampleFieldExtractorTests.cs | 2 ++ Vcf/Sample/VariantFrequency.cs | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/UnitTests/Vcf/Samples/SampleFieldExtractorTests.cs b/UnitTests/Vcf/Samples/SampleFieldExtractorTests.cs index b949c6ad..05afe8ca 100644 --- a/UnitTests/Vcf/Samples/SampleFieldExtractorTests.cs +++ b/UnitTests/Vcf/Samples/SampleFieldExtractorTests.cs @@ -264,6 +264,8 @@ public void VariantFrequency_Nominal(string altAllele, string formatCol, string [InlineData("C", "T,A", "GT:GQ:GQX:DP:DPF:AD:VF", "1/1:208:47:70:3:0,70:0.75")] // multiple alleles (VF) [InlineData("C", "T,A", "GT:NR:NV", "1/1:10:7")] // multiple alleles (NR/NV) [InlineData("CG", "T", "GT:AU:CU:GU:TU", "1/1:10,11:20,21:30,31:40,41")] // multiple ref bases (AC) + [InlineData("C", ".", "DP:AU:CU:GU:TU", "19:0,0:14,14:0,0:5,6")] // ref minor (AC) + [InlineData("C", ".", "DP:AU:CU:GU:TU", "75:0,0:72,77:0,0:0,2")] // ref minor (AC) public void VariantFrequency_ReturnNull(string refAllele, string altAllele, string formatCol, string sampleCol) { var vcfLine = $"chr1\t5592503\t.\t{refAllele}\t{altAllele}\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; diff --git a/Vcf/Sample/VariantFrequency.cs b/Vcf/Sample/VariantFrequency.cs index e55fc976..d5da079b 100644 --- a/Vcf/Sample/VariantFrequency.cs +++ b/Vcf/Sample/VariantFrequency.cs @@ -31,16 +31,17 @@ public static double[] GetVariantFrequencies(IntermediateSampleFields sampleFiel private static double[] GetVariantFrequenciesUsingVf(IntermediateSampleFields sampleFields) { if (sampleFields.AltAlleles.Length > 1 || sampleFields.VF == null) return null; - return new double[] { sampleFields.VF.Value }; + return new[] { sampleFields.VF.Value }; } private static double[] GetVariantFrequenciesUsingAlleleCounts(IntermediateSampleFields sampleFields) { bool isRefSingleBase = sampleFields.VcfRefAllele.Length == 1; bool areAllAltsSingleBase = sampleFields.AltAlleles.All(altAllele => altAllele.Length == 1); + bool isReference = sampleFields.AltAlleles.Length == 1 && sampleFields.AltAlleles[0] == "."; // for this to work we need a single-base reference allele and all raw allele counts must be available - if (sampleFields.TotalAlleleCount == null || !isRefSingleBase || !areAllAltsSingleBase) return null; + if (sampleFields.TotalAlleleCount == null || isReference || !isRefSingleBase || !areAllAltsSingleBase) return null; int numAltAlleles = sampleFields.AltAlleles.Length; double[] variantFreqs = new double[numAltAlleles]; From fdf39042fd43d3494bd06b81a2bb0ccc8654c122 Mon Sep 17 00:00:00 2001 From: "Stromberg, Michael" Date: Thu, 29 Mar 2018 17:24:43 -0700 Subject: [PATCH 2/3] Updated the TopMed schema. (#162) --- SAUtils/InputFileParsers/TOPMed/TopMedItem.cs | 47 +++++------- .../InputFileParsers/TOPMed/TopMedReader.cs | 73 +++++-------------- 2 files changed, 40 insertions(+), 80 deletions(-) diff --git a/SAUtils/InputFileParsers/TOPMed/TopMedItem.cs b/SAUtils/InputFileParsers/TOPMed/TopMedItem.cs index 395afc60..dd5f2fd2 100644 --- a/SAUtils/InputFileParsers/TOPMed/TopMedItem.cs +++ b/SAUtils/InputFileParsers/TOPMed/TopMedItem.cs @@ -5,25 +5,24 @@ namespace SAUtils.InputFileParsers.TOPMed { - public sealed class TopMedItem: SupplementaryDataItem + public sealed class TopMedItem : SupplementaryDataItem { - private readonly int? _numSamples; private readonly int? _alleleNum; private readonly int? _alleleCount; private readonly int? _homCount; - private readonly bool _hasFailedFilters; + private readonly bool _failedFilter; - public TopMedItem(IChromosome chrom, int position, string refAllele, string altAllele, int? numSamples, int? alleleNum, int? alleleCount, int? homCount, bool hasFailedFilters) + public TopMedItem(IChromosome chrom, int position, string refAllele, string altAllele, int? alleleNum, + int? alleleCount, int? homCount, bool failedFilter) { - Chromosome = chrom; - Start = position; - ReferenceAllele = refAllele; - AlternateAllele = altAllele; - _numSamples = numSamples; - _alleleNum = alleleNum; - _alleleCount = alleleCount; - _homCount = homCount; - _hasFailedFilters = hasFailedFilters; + Chromosome = chrom; + Start = position; + ReferenceAllele = refAllele; + AlternateAllele = altAllele; + _alleleNum = alleleNum; + _alleleCount = alleleCount; + _homCount = homCount; + _failedFilter = failedFilter; } public override bool Equals(object other) @@ -50,24 +49,18 @@ public override int GetHashCode() public string GetJsonString() { - var sb = new StringBuilder(); + var sb = new StringBuilder(); var jsonObject = new JsonObject(sb); - - if (_hasFailedFilters) jsonObject.AddBoolValue("hasFailedFilters", true); - jsonObject.AddIntValue("numSamples", _numSamples); - jsonObject.AddStringValue("alleleFreq", ComputingUtilities.ComputeFrequency(_alleleNum, _alleleCount), false); - jsonObject.AddIntValue("alleleNumber", _alleleNum); - jsonObject.AddIntValue("alleleCount", _alleleCount); - jsonObject.AddIntValue("homCount", _homCount); - return sb.ToString(); - } + jsonObject.AddStringValue("allAf", ComputingUtilities.ComputeFrequency(_alleleNum, _alleleCount), false); + jsonObject.AddIntValue("allAn", _alleleNum); + jsonObject.AddIntValue("allAc", _alleleCount); + jsonObject.AddIntValue("allHc", _homCount); + if (_failedFilter) jsonObject.AddBoolValue("failedFilter", true); - public override SupplementaryIntervalItem GetSupplementaryInterval() - { - return null; + return sb.ToString(); } - + public override SupplementaryIntervalItem GetSupplementaryInterval() => null; } } \ No newline at end of file diff --git a/SAUtils/InputFileParsers/TOPMed/TopMedReader.cs b/SAUtils/InputFileParsers/TOPMed/TopMedReader.cs index 31efbd7d..033b88b6 100644 --- a/SAUtils/InputFileParsers/TOPMed/TopMedReader.cs +++ b/SAUtils/InputFileParsers/TOPMed/TopMedReader.cs @@ -13,34 +13,23 @@ public sealed class TopMedReader : IDisposable private int? _alleleNum; private int? _alleleCount; - private bool _hasFailedFilters; - private int? _numSamples; + private bool _failedFilter; private int? _homCount; - private int? _hetCount; - private double? _alleleFreq; public TopMedReader(StreamReader streamReader, IDictionary refChromDict) { - _reader = streamReader; + _reader = streamReader; _refChromDict = refChromDict; } private void Clear() { - _alleleNum = null; - _alleleCount = null; - _numSamples = null; - _homCount = null; - _hetCount = null; - _alleleFreq = null; - _hasFailedFilters = false; + _alleleNum = null; + _alleleCount = null; + _homCount = null; + _failedFilter = false; } - /// - /// Parses a source file and return an enumeration object containing - /// all the data objects that have been extracted. - /// - /// public IEnumerable GetGnomadItems() { using (_reader) @@ -48,10 +37,8 @@ public IEnumerable GetGnomadItems() string line; while ((line = _reader.ReadLine()) != null) { - // Skip empty lines. - if (string.IsNullOrWhiteSpace(line)) continue; - // Skip comments. - if (line.StartsWith("#")) continue; + if (string.IsNullOrWhiteSpace(line) || line.StartsWith("#")) continue; + var topMedItem = ExtractItems(line); if (topMedItem == null) continue; yield return topMedItem; @@ -62,7 +49,7 @@ public IEnumerable GetGnomadItems() private TopMedItem ExtractItems(string vcfLine) { if (vcfLine == null) return null; - var splitLine = vcfLine.Split('\t');// we don't care about the many fields after info field + var splitLine = vcfLine.Split('\t'); if (splitLine.Length < 8) return null; @@ -71,11 +58,11 @@ private TopMedItem ExtractItems(string vcfLine) var chromosome = splitLine[VcfCommon.ChromIndex]; if (!_refChromDict.ContainsKey(chromosome)) return null; - //chr1 10169 TOPMed_freeze_5?chr1:10,169 T C 255 SVM VRT=1;NS=62784;AN=125568;AC=20;AF=0.000159276;Het=20;Hom=0 NA:FRQ 125568:0.000159276 + // chr1 10169 TOPMed_freeze_5?chr1:10,169 T C 255 SVM VRT=1;NS=62784;AN=125568;AC=20;AF=0.000159276;Het=20;Hom=0 NA:FRQ 125568:0.000159276 var chrom = _refChromDict[chromosome]; var position = int.Parse(splitLine[VcfCommon.PosIndex]);//we have to get it from RSPOS in info var refAllele = splitLine[VcfCommon.RefIndex]; - var altAllele = splitLine[VcfCommon.AltIndex]; + var altAllele = splitLine[VcfCommon.AltIndex]; var filters = splitLine[VcfCommon.FilterIndex]; var infoFields = splitLine[VcfCommon.InfoIndex]; @@ -85,14 +72,14 @@ private TopMedItem ExtractItems(string vcfLine) throw new InvalidDataException("het site found!!"); } - _hasFailedFilters = !(filters.Equals("PASS") || filters.Equals(".")); + _failedFilter = !(filters.Equals("PASS") || filters.Equals(".")); ParseInfoField(infoFields); if (_alleleNum == 0) return null; - return new TopMedItem(chrom, position, refAllele, altAllele, _numSamples, - _alleleNum, _alleleCount, _homCount, _hasFailedFilters); + return new TopMedItem(chrom, position, refAllele, altAllele, _alleleNum, _alleleCount, _homCount, + _failedFilter); } private void ParseInfoField(string infoFields) @@ -103,9 +90,10 @@ private void ParseInfoField(string infoFields) foreach (var infoItem in infoItems) { var infoKeyValue = infoItem.Split('='); - if (infoKeyValue.Length == 2)//sanity check + + if (infoKeyValue.Length == 2) { - var key = infoKeyValue[0]; + var key = infoKeyValue[0]; var value = infoKeyValue[1]; SetInfoField(key, value); @@ -113,20 +101,11 @@ private void ParseInfoField(string infoFields) } } - /// - /// Get a key value pair and using the key, set appropriate values - /// - /// - /// - private void SetInfoField(string vcfId, string value) + private void SetInfoField(string vcfId, string value) { - //VRT=1;NS=62784;AN=125568;AC=20;AF=0.000159276;Het=20;Hom=0 - + // VRT=1;NS=62784;AN=125568;AC=20;AF=0.000159276;Het=20;Hom=0 switch (vcfId) { - case "NS": - _numSamples = Convert.ToInt32(value); - break; case "AN": _alleleNum = Convert.ToInt32(value); break; @@ -136,21 +115,9 @@ private void SetInfoField(string vcfId, string value) case "Hom": _homCount = Convert.ToInt32(value); break; - case "Het": - _hetCount = Convert.ToInt32(value); - break; - case "AF": - _alleleFreq = Convert.ToDouble(value); - break; } - } - - public void Dispose() - { - _reader?.Dispose(); - } + public void Dispose() => _reader?.Dispose(); } - } \ No newline at end of file From 74f0b00e2ebc401902b3a54e97acffee7a8114ea Mon Sep 17 00:00:00 2001 From: "Stromberg, Michael" Date: Tue, 3 Apr 2018 11:06:16 -0700 Subject: [PATCH 3/3] Fixed a bug that caused Nirvana to crash if a potential gene fusion endpoint landed in a gap transcript region * Fixed a bug that caused Nirvana to crash if a potential gene fusion endpoint landed in a gap transcript region. * Bumping the version number to 2.0.7 --- Jasix/OnTheFlyIndexCreator.cs | 5 +- Nirvana/Properties/launchSettings.json | 39 +++---- .../AnnotatedPositions/HgvsUtilitiesTests.cs | 107 +++++++++++++++++- .../AnnotatedPositions/HgvsUtilities.cs | 18 ++- VariantAnnotation/CommonAssemblyInfo.props | 6 +- 5 files changed, 145 insertions(+), 30 deletions(-) diff --git a/Jasix/OnTheFlyIndexCreator.cs b/Jasix/OnTheFlyIndexCreator.cs index 89d4925d..380deb0a 100644 --- a/Jasix/OnTheFlyIndexCreator.cs +++ b/Jasix/OnTheFlyIndexCreator.cs @@ -29,14 +29,15 @@ public void SetHeader(string header) public void Add(IPosition position, long fileLocation) { - var chromName = position.VcfFields[VcfCommon.ChromIndex];//we want to preserve the chrom name from input + var chromName = position.VcfFields[VcfCommon.ChromIndex]; var start = position.Start; var end = position.InfoData.End; if (chromName == _lastChromName && start < _lastPosition) { - throw new UserErrorException($"the Json file is not sorted at {position.Chromosome.UcscName}: {start}"); + throw new UserErrorException($"The Json file is not sorted at {position.Chromosome.UcscName}: {start}"); } + _lastPosition = start; _lastChromName = chromName; diff --git a/Nirvana/Properties/launchSettings.json b/Nirvana/Properties/launchSettings.json index 5d6a9450..d6317c36 100644 --- a/Nirvana/Properties/launchSettings.json +++ b/Nirvana/Properties/launchSettings.json @@ -25,45 +25,40 @@ "commandLineArgs": "-c Cache\\26\\GRCh37\\Ensembl -r References\\5\\Homo_sapiens.GRCh37.Nirvana.dat -i Data\\Mother\\Mother.vcf.gz -o mother", "workingDirectory": "E:\\Data\\Nirvana" }, - "MS_ClinVar": { + "MS Test": { "commandName": "Project", - "commandLineArgs": " -c Cache\\25\\GRCh37\\Ensembl90 -r References\\5\\Homo_sapiens.GRCh37.Nirvana.dat -i Data\\ClinVar\\ClinVar20150901_ShankarBugNIR1202-ClinVar_dbSNP-unknown-WG-hg19.vcf.gz -o clinvar", + "commandLineArgs": "-c Cache\\26\\GRCh37\\Both -r References\\5\\Homo_sapiens.GRCh37.Nirvana.dat -i test.vcf -o test", "workingDirectory": "E:\\Data\\Nirvana" }, - "MS_Mother": { + "RR clinvar": { "commandName": "Project", - "commandLineArgs": "-c Cache\\25\\GRCh37\\Ensembl90 -r References\\5\\Homo_sapiens.GRCh37.Nirvana.dat -i Data\\Mother\\Mother.vcf.gz -o mother --sd SupplementaryDatabase\\41\\GRCh37", - "workingDirectory": "E:\\Data\\Nirvana" + "commandLineArgs": " --cache C:\\Development\\Cache\\26\\GRCh37\\Ensembl --sd C:\\Development\\SupplementaryDatabase\\43\\GRCh37 --ref C:\\Development\\References\\5\\Homo_sapiens.GRCh37.Nirvana.dat --in ClinVar20150901_ShankarBugNIR1202-ClinVar_dbSNP-unknown-WG-hg19.vcf.gz --out clinvar --disable-recomposition", + "workingDirectory": "c:\\Development\\TestDatasets" }, - "SK Nirvana": { + "RR dq": { "commandName": "Project", - "commandLineArgs": "-i E:\\Nirvana_resources\\test_runs\\test_Phantom\\MS_data\\NA12878_AH72T3CCXX-l2_S1.genome.vcf.gz -c E:\\Nirvana_resources\\Nirvana\\Development\\Cache\\25\\GRCh38\\Both90 -r E:\\Nirvana_resources\\Nirvana\\Development\\References\\5\\Homo_sapiens.GRCh38.Nirvana.dat -o NIR_NA12878 ", - "workingDirectory": "E:\\Nirvana_resources\\test_runs\\test_Phantom\\" + "commandLineArgs": " --cache C:\\Development\\Cache\\26\\GRCh38\\Both --ref C:\\Development\\References\\5\\Homo_sapiens.GRCh38.Nirvana.dat --disable-recomposition --in DQ-Strelka-Germline-chr22-hg38.vcf.gz --out dq22", + "workingDirectory": "c:\\Development\\TestDatasets" }, - "SK Pedigree": { + "RR platypus": { "commandName": "Project", - "commandLineArgs": "-i E:\\Nirvana_resources\\test_runs\\test_Phantom\\test_data\\Pedigree.vcf.gz -c E:\\Nirvana_resources\\Nirvana\\Development\\Cache\\26\\GRCh38\\Ensembl -r E:\\Nirvana_resources\\Nirvana\\Development\\References\\5\\Homo_sapiens.GRCh38.Nirvana.dat --vcf -o Pedigree_NIR_Phan_out", - "workingDirectory": "E:\\Nirvana_resources\\test_runs\\test_Phantom\\" + "commandLineArgs": " --cache C:\\Development\\Cache\\26\\GRCh37\\Ensembl --sd C:\\Development\\SupplementaryDatabase\\43\\GRCh37 --ref C:\\Development\\References\\5\\Homo_sapiens.GRCh37.Nirvana.dat --in Platypus-Platypus-unknown-short-hg19.vcf.gz --out platypus --disable-recomposition", + "workingDirectory": "c:\\Development\\TestDatasets" }, "SK bugfix": { "commandName": "Project", "commandLineArgs": "-i E:\\Nirvana_resources\\test_runs\\test_Phantom\\test_data\\Unsorted_Chromosomes.vcf -c E:\\Nirvana_resources\\Nirvana\\Development\\Cache\\26\\GRCh38\\Ensembl -r E:\\Nirvana_resources\\Nirvana\\Development\\References\\5\\Homo_sapiens.GRCh38.Nirvana.dat --vcf -o Pedigree_bugfix", "workingDirectory": "E:\\Nirvana_resources\\test_runs\\test_Phantom" }, - "RR_clinvar": { - "commandName": "Project", - "commandLineArgs": " --cache C:\\Development\\Cache\\26\\GRCh37\\Ensembl --sd C:\\Development\\SupplementaryDatabase\\43\\GRCh37 --ref C:\\Development\\References\\5\\Homo_sapiens.GRCh37.Nirvana.dat --in ClinVar20150901_ShankarBugNIR1202-ClinVar_dbSNP-unknown-WG-hg19.vcf.gz --out clinvar --disable-recomposition", - "workingDirectory": "c:\\Development\\TestDatasets" - }, - "RR_dq": { + "SK Nirvana": { "commandName": "Project", - "commandLineArgs": " --cache C:\\Development\\Cache\\26\\GRCh38\\Both --ref C:\\Development\\References\\5\\Homo_sapiens.GRCh38.Nirvana.dat --disable-recomposition --in DQ-Strelka-Germline-chr22-hg38.vcf.gz --out dq22", - "workingDirectory": "c:\\Development\\TestDatasets" + "commandLineArgs": "-i E:\\Nirvana_resources\\test_runs\\test_Phantom\\MS_data\\NA12878_AH72T3CCXX-l2_S1.genome.vcf.gz -c E:\\Nirvana_resources\\Nirvana\\Development\\Cache\\25\\GRCh38\\Both90 -r E:\\Nirvana_resources\\Nirvana\\Development\\References\\5\\Homo_sapiens.GRCh38.Nirvana.dat -o NIR_NA12878 ", + "workingDirectory": "E:\\Nirvana_resources\\test_runs\\test_Phantom\\" }, - "RR_platypus": { + "SK Pedigree": { "commandName": "Project", - "commandLineArgs": " --cache C:\\Development\\Cache\\26\\GRCh37\\Ensembl --sd C:\\Development\\SupplementaryDatabase\\43\\GRCh37 --ref C:\\Development\\References\\5\\Homo_sapiens.GRCh37.Nirvana.dat --in Platypus-Platypus-unknown-short-hg19.vcf.gz --out platypus --disable-recomposition", - "workingDirectory": "c:\\Development\\TestDatasets" + "commandLineArgs": "-i E:\\Nirvana_resources\\test_runs\\test_Phantom\\test_data\\Pedigree.vcf.gz -c E:\\Nirvana_resources\\Nirvana\\Development\\Cache\\26\\GRCh38\\Ensembl -r E:\\Nirvana_resources\\Nirvana\\Development\\References\\5\\Homo_sapiens.GRCh38.Nirvana.dat --vcf -o Pedigree_NIR_Phan_out", + "workingDirectory": "E:\\Nirvana_resources\\test_runs\\test_Phantom\\" } } } \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs index ec0da2bf..59ef771b 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs @@ -265,6 +265,106 @@ public void GetCdnaPositionOffset_Intron_LeqR_Reverse() Assert.Equal("*909+909", po.Value); } + private static ITranscript GetForwardGapTranscript() + { + var regions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1001, 1100, 1, 100), + new TranscriptRegion(TranscriptRegionType.Gap, 1, 1101, 1103, 100, 101), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1104, 1203, 101, 200), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1204, 1303, 200, 201), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1304, 1403, 201, 300) + }; + + var translation = new Mock(); + translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(1051, 1353, 51, 250, 200)); + + var transcript = new Mock(); + transcript.SetupGet(x => x.Start).Returns(1001); + transcript.SetupGet(x => x.End).Returns(1403); + transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false); + transcript.SetupGet(x => x.TranscriptRegions).Returns(regions); + transcript.SetupGet(x => x.Translation).Returns(translation.Object); + + return transcript.Object; + } + + [Fact] + public void GetCdnaPositionOffset_Gap_LeftSide_Forward() + { + var transcript = GetForwardGapTranscript(); + var po = HgvsUtilities.GetCdnaPositionOffset(transcript, 1101, 1); + + Assert.NotNull(po); + Assert.False(po.HasStopCodonNotation); + Assert.Equal(0, po.Offset); + Assert.Equal(100, po.Position); + Assert.Equal("50", po.Value); + } + + [Fact] + public void GetCdnaPositionOffset_Gap_RightSide_Forward() + { + var transcript = GetForwardGapTranscript(); + var po = HgvsUtilities.GetCdnaPositionOffset(transcript, 1102, 1); + + Assert.NotNull(po); + Assert.False(po.HasStopCodonNotation); + Assert.Equal(0, po.Offset); + Assert.Equal(101, po.Position); + Assert.Equal("51", po.Value); + } + + private static ITranscript GetReverseGapTranscript() + { + var regions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1001, 1100, 201, 300), + new TranscriptRegion(TranscriptRegionType.Gap, 1, 1101, 1103, 200, 201), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1104, 1203, 101, 200), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1204, 1303, 100, 101), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1304, 1403, 1, 100) + }; + + var translation = new Mock(); + translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(1051, 1353, 51, 250, 200)); + + var transcript = new Mock(); + transcript.SetupGet(x => x.Start).Returns(1001); + transcript.SetupGet(x => x.End).Returns(1403); + transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(true); + transcript.SetupGet(x => x.TranscriptRegions).Returns(regions); + transcript.SetupGet(x => x.Translation).Returns(translation.Object); + + return transcript.Object; + } + + [Fact] + public void GetCdnaPositionOffset_Gap_LeftSide_Reverse() + { + var transcript = GetReverseGapTranscript(); + var po = HgvsUtilities.GetCdnaPositionOffset(transcript, 1102, 1); + + Assert.NotNull(po); + Assert.False(po.HasStopCodonNotation); + Assert.Equal(0, po.Offset); + Assert.Equal(201, po.Position); + Assert.Equal("151", po.Value); + } + + [Fact] + public void GetCdnaPositionOffset_Gap_RightSide_Reverse() + { + var transcript = GetReverseGapTranscript(); + var po = HgvsUtilities.GetCdnaPositionOffset(transcript, 1103, 1); + + Assert.NotNull(po); + Assert.False(po.HasStopCodonNotation); + Assert.Equal(0, po.Offset); + Assert.Equal(200, po.Position); + Assert.Equal("150", po.Value); + } + [Fact] public void GetCdnaPositionOffset_Intron_RltL_Forward() { @@ -349,7 +449,12 @@ public void GetCdnaPositionOffset_Gap_Forward_ReturnNull() transcript.SetupGet(x => x.Translation).Returns(translation.Object); var po = HgvsUtilities.GetCdnaPositionOffset(transcript.Object, 135001, 0); - Assert.Null(po); + + Assert.NotNull(po); + Assert.True(po.HasStopCodonNotation); + Assert.Equal(0, po.Offset); + Assert.Equal(1760, po.Position); + Assert.Equal("*910", po.Value); } private static ISequence GetGenomicRefSequence() diff --git a/VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs b/VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs index 8c460d97..77cb7fe3 100644 --- a/VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs +++ b/VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs @@ -162,9 +162,14 @@ private static (int Position, int Offset) GetPositionAndOffset(int position, ITr return (region.CdnaStart + (onReverseStrand ? region.End - position : position - region.Start), 0); } - if (region.Type != TranscriptRegionType.Intron) return (-1, -1); + return region.Type == TranscriptRegionType.Gap + ? GetGapPositionAndOffset(position, region, onReverseStrand) + : GetIntronPositionAndOffset(position, region, onReverseStrand); + } - // intron + private static (int Position, int Offset) GetIntronPositionAndOffset(int position, ITranscriptRegion region, + bool onReverseStrand) + { int leftDist = position - region.Start + 1; int rightDist = region.End - position + 1; @@ -187,6 +192,15 @@ private static (int Position, int Offset) GetPositionAndOffset(int position, ITr return (cdnaPosition, offset); } + private static (int Position, int Offset) GetGapPositionAndOffset(int position, ITranscriptRegion region, bool onReverseStrand) + { + int leftDist = position - region.Start + 1; + int rightDist = region.End - position + 1; + + if (leftDist < rightDist && !onReverseStrand || rightDist < leftDist && onReverseStrand) return (region.CdnaStart, 0); + return (region.CdnaEnd, 0); + } + private static (string CdnaCoord, bool HasStopCodonNotation, bool HasNoPosition) GetCdnaCoord(int position, int offset, int codingRegionStart, int codingRegionEnd) { diff --git a/VariantAnnotation/CommonAssemblyInfo.props b/VariantAnnotation/CommonAssemblyInfo.props index 190e60ca..a006a30a 100644 --- a/VariantAnnotation/CommonAssemblyInfo.props +++ b/VariantAnnotation/CommonAssemblyInfo.props @@ -2,9 +2,9 @@ Illumina © 2018 Illumina, Inc. - 2.0.6.0 - 2.0.6.0 - 2.0.6 + 2.0.7.0 + 2.0.7.0 + 2.0.7 Stromberg, Roy, Lajugie, Jiang, Li, and Kang \ No newline at end of file