diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs b/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs index ab2613bf..b83aaf40 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs @@ -90,7 +90,7 @@ private void ApplyRnaEdits(StringBuilder sb) } } - public int Length => _sequence?.Length ?? _codingRegion.Length; + public int Length => _sequence?.Length ?? _codingRegion?.Length ?? 0; public Band[] CytogeneticBands => null; public string Substring(int offset, int length) diff --git a/VariantAnnotation/Caches/DataStructures/Transcript.cs b/VariantAnnotation/Caches/DataStructures/Transcript.cs index 7ded7171..a92f3e2e 100644 --- a/VariantAnnotation/Caches/DataStructures/Transcript.cs +++ b/VariantAnnotation/Caches/DataStructures/Transcript.cs @@ -295,7 +295,7 @@ public static ITranscript Read(BufferedBinaryReader reader, // rnaEdits, translation); } - + if (id.WithVersion == "NM_175741.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) { rnaEdits = new IRnaEdit[] @@ -304,9 +304,23 @@ public static ITranscript Read(BufferedBinaryReader reader, new RnaEdit(380,380,"C") }; - TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_175741.1", gene.OnReverseStrand, - transcriptRegions, - rnaEdits, translation); + // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_175741.1", gene.OnReverseStrand, + // transcriptRegions, + // rnaEdits, translation); + + } + + if (id.WithVersion == "NR_003085.2" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1703,1703,"G"), + new RnaEdit(2832,2831,"AAAAAAAAAAAAAAA"), + }; + + // TranscriptValidator.Validate(sequenceProvider, chromosome, "NR_003085.2", gene.OnReverseStrand, + // transcriptRegions, + // rnaEdits, translation); } diff --git a/VariantAnnotation/Caches/Utilities/TranscriptValidator.cs b/VariantAnnotation/Caches/Utilities/TranscriptValidator.cs index 1e429a94..53da35d0 100644 --- a/VariantAnnotation/Caches/Utilities/TranscriptValidator.cs +++ b/VariantAnnotation/Caches/Utilities/TranscriptValidator.cs @@ -42,6 +42,8 @@ public static class TranscriptValidator {"NM_000545.5", "CGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGCCAGCTGCAGACGGAGCTCCTGGCGGCCCTGCTCGAGTCAGGGCTGAGCAAAGAGGCACTGATCCAGGCACTGGGTGAGCCGGGGCCCTACCTCCTGGCTGGAGAAGGCCCCCTGGACAAGGGGGAGTCCTGCGGCGGCGGTCGAGGGGAGCTGGCTGAGCTGCCCAATGGGCTGGGGGAGACTCGGGGCTCCGAGGACGAGACGGACGACGATGGGGAAGACTTCACGCCACCCATCCTCAAAGAGCTGGAGAACCTCAGCCCTGAGGAGGCGGCCCACCAGAAAGCCGTGGTGGAGACCCTTCTGCAGGAGGACCCGTGGCGTGTGGCGAAGATGGTCAAGTCCTACCTGCAGCAGCACAACATCCCACAGCGGGAGGTGGTCGATACCACTGGCCTCAACCAGTCCCACCTGTCCCAACACCTCAACAAGGGCACTCCCATGAAGACGCAGAAGCGGGCCGCCCTGTACACCTGGTACGTCCGCAAGCAGCGAGAGGTGGCGCAGCAGTTCACCCATGCAGGGCAGGGAGGGCTGATTGAAGAGCCCACAGGTGATGAGCTACCAACCAAGAAGGGGCGGAGGAACCGTTTCAAGTGGGGCCCAGCATCCCAGCAGATCCTGTTCCAGGCCTATGAGAGGCAGAAGAACCCTAGCAAGGAGGAGCGAGAGACGCTAGTGGAGGAGTGCAATAGGGCGGAATGCATCCAGAGAGGGGTGTCCCCATCACAGGCACAGGGGCTGGGCTCCAACCTCGTCACGGAGGTGCGTGTCTACAACTGGTTTGCCAACCGGCGCAAAGAAGAAGCCTTCCGGCACAAGCTGGCCATGGACACGTACAGCGGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTGCCCTCTCCCCCAGTAAGGTCCACGGTGTGCGCTATGGACAGCCTGCGACCAGTGAGACTGCAGAAGTACCCTCAAGCAGCGGCGGTCCCTTAGTGACAGTGTCTACACCCCTCCACCAAGTGTCCCCCACGGGCCTGGAGCCCAGCCACAGCCTGCTGAGTACAGAAGCCAAGCTGGTCTCAGCAGCTGGGGGCCCCCTCCCCCCTGTCAGCACCCTGACAGCACTGCACAGCTTGGAGCAGACATCCCCAGGCCTCAACCAGCAGCCCCAGAACCTCATCATGGCCTCACTTCCTGGGGTCATGACCATCGGGCCTGGTGAGCCTGCCTCCCTGGGTCCTACGTTCACCAACACAGGTGCCTCCACCCTGGTCATCGGCCTGGCCTCCACGCAGGCACAGAGTGTGCCGGTCATCAACAGCATGGGCAGCAGCCTGACCACCCTGCAGCCCGTCCAGTTCTCCCAGCCGCTGCACCCCTCCTACCAGCAGCCGCTCATGCCACCTGTGCAGAGCCATGTGACCCAGAGCCCCTTCATGGCCACCATGGCTCAGCTGCAGAGCCCCCACGCCCTCTACAGCCACAAGCCCGAGGTGGCCCAGTACACCCACACGGGCCTGCTCCCGCAGACTATGCTCATCACCGACACCACCAACCTGAGCGCCCTGGCCAGCCTCACGCCCACCAAGCAGGTCTTCACCTCAGACACTGAGGCCTCCAGTGAGTCCGGGCTTCACACGCCGGCATCTCAGGCCACCACCCTCCACGTCCCCAGCCAGGACCCTGCCGGCATCCAGCACCTGCAGCCGGCCCACCGGCTCAGCGCCAGCCCCACAGTGTCCTCCAGCAGCCTGGTGCTGTACCAGAGCTCAGACTCCAGCAATGGCCAGAGCCACCTGCTGCCATCCAACCACAGCGTCATCGAGACCTTCATCTCCACCCAGATGGCCTCTTCCTCCCAGTAACCACGGCACCTGGGCCCTGGGGCCTGTACTGCCTGCTTGGGGGGTGATGAGGGCAGCAGCCAGCCCTGCCTGGAGGACCTGAGCCTGCCGAGCAACCGTGGCCCTTCCTGGACAGCTGTGCCTCGCTCCCCACTCTGCTCTGATGCATCAGAAAGGGAGGGCTCTGAGGCGCCCCAACCCGTGGAGGCTGCTCGGGGTGCACAGGAGGGGGTCGTGGAGAGCTAGGAGCAAAGCCTGTTCATGGCAGATGTAGGAGGGACTGTCGCTGCTTCGTGGGATACAGTCTTCTTACTTGGAACTGAAGGGGGCGGCCTATGACTTGGGCACCCCCAGCCTGGGCCTATGGAGAGCCCTGGGACCGCTACACCACTCTGGCAGCCACACTTCTCAGGACACAGGCCTGTGTAGCTGTGACCTGCTGAGCTCTGAGAGGCCCTGGATCAGCGTGGCCTTGTTCTGTCACCAATGTACCCACCGGGCCACTCCTTCCTGCCCCAACTCCTTCCAGCTAGTGACCCACATGCCATTTGTACTGACCCCATCACCTACTCACACAGGCATTTCCTGGGTGGCTACTCTGTGCCAGAGCCTGGGGCTCTAACGCCTGAGCCCAGGGAGGCCGAAGCTAACAGGGAAGGCAGGCAGGGCTCTCCTGGCTTCCCATCCCCAGCGATTCCCTCTCCCAGGCCCCATGACCTCCAGCTTTCCTGTATTTGTTCCCAAGAGCATCATGCCTCTGAGGCCAGCCTGGCCTCCTGCCTCTACTGGGAAGGCTACTTCGGGGCTGGGAAGTCGTCCTTACTCCTGTGGGAGCCTCGCAACCCGTGCCAAGTCCAGGTCCTGGTGGGGCAGCTCCTCTGTCTCGAGCGCCCTGCAGACCCTGCCCTTGTTTGGGGCAGGAGTAGCTGAGCTCACAAGGCAGCAAGGCCCGAGCAGCTGAGCAGGGCCGGGGAACTGGCCAAGCTGAGGTGCCCAGGAGAAGAAAGAGGTGACCCCAGGGCACAGGAGCTACCTGTGTGGACAGGACTAACACTCAGAAGCCTGGGGGCCTGGCTGGCTGAGGGCAGTTCGCAGCCACCCTGAGGAGTCTGAGGTCCTGAGCACTGCCAGGAGGGACAAAGGAGCCTGTGAACCCAGGACAAGCATGGTCCCACATCCCTGGGCCTGCTGCTGAGAACCTGGCCTTCAGTGTACCGCGTCTACCCTGGGATTCAGGAAAAGGCCTGGGGTGACCCGGCACCCCCTGCAGCTTGTAGCCAGCCGGGGCGAGTGGCACGTTTATTTAACTTTTAGTAAAGTCAAGGAGAAATGCGGTGGAAAAA"}, {"NM_001220765.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, {"NM_000535.5", "AGCCAATGGGAGTTCAGGAGGCGGAGCGCCTGTGGGAGCCCTGGAGGGAACTTTCCCAGTCCCCGAGGCGGATCGGGTGTTGCATCCATGGAGCGAGCTGAGAGCTCGAGTACAGAACCTGCTAAGGCCATCAAACCTATTGATCGGAAGTCAGTCCATCAGATTTGCTCTGGGCAGGTGGTACTGAGTCTAAGCACTGCGGTAAAGGAGTTAGTAGAAAACAGTCTGGATGCTGGTGCCACTAATATTGATCTAAAGCTTAAGGACTATGGAGTGGATCTTATTGAAGTTTCAGACAATGGATGTGGGGTAGAAGAAGAAAACTTCGAAGGCTTAACTCTGAAACATCACACATCTAAGATTCAAGAGTTTGCCGACCTAACTCAGGTTGAAACTTTTGGCTTTCGGGGGGAAGCTCTGAGCTCACTTTGTGCACTGAGCGATGTCACCATTTCTACCTGCCACGCATCGGCGAAGGTTGGAACTCGACTGATGTTTGATCACAATGGGAAAATTATCCAGAAAACCCCCTACCCCCGCCCCAGAGGGACCACAGTCAGCGTGCAGCAGTTATTTTCCACACTACCTGTGCGCCATAAGGAATTTCAAAGGAATATTAAGAAGGAGTATGCCAAAATGGTCCAGGTCTTACATGCATACTGTATCATTTCAGCAGGCATCCGTGTAAGTTGCACCAATCAGCTTGGACAAGGAAAACGACAGCCTGTGGTATGCACAGGTGGAAGCCCCAGCATAAAGGAAAATATCGGCTCTGTGTTTGGGCAGAAGCAGTTGCAAAGCCTCATTCCTTTTGTTCAGCTGCCCCCTAGTGACTCCGTGTGTGAAGAGTACGGTTTGAGCTGTTCCGATGCTCTGCATAATCTTTTTTACATCTCAGGTTTCATTTCACAATGCACGCATGGAGTTGGAAGGAGTTCAACAGACAGACAGTTTTTCTTTATCAACCGGCGGCCTTGTGACCCAGCAAAGGTCTGCAGACTCGTGAATGAGGTCTACCACATGTATAATCGACACCAGTATCCATTTGTTGTTCTTAACATTTCTGTTGATTCAGAATGCGTTGATATCAATGTTACTCCAGATAAAAGGCAAATTTTGCTACAAGAGGAAAAGCTTTTGTTGGCAGTTTTAAAGACCTCTTTGATAGGAATGTTTGATAGTGATGTCAACAAGCTAAATGTCAGTCAGCAGCCACTGCTGGATGTTGAAGGTAACTTAATAAAAATGCATGCAGCGGATTTGGAAAAGCCCATGGTAGAAAAGCAGGATCAATCCCCTTCATTAAGGACTGGAGAAGAAAAAAAAGACGTGTCCATTTCCAGACTGCGAGAGGCCTTTTCTCTTCGTCACACAACAGAGAACAAGCCTCACAGCCCAAAGACTCCAGAACCAAGAAGGAGCCCTCTAGGACAGAAAAGGGGTATGCTGTCTTCTAGCACTTCAGGTGCCATCTCTGACAAAGGCGTCCTGAGACCTCAGAAAGAGGCAGTGAGTTCCAGTCACGGACCCAGTGACCCTACGGACAGAGCGGAGGTGGAGAAGGACTCGGGGCACGGCAGCACTTCCGTGGATTCTGAGGGGTTCAGCATCCCAGACACGGGCAGTCACTGCAGCAGCGAGTATGCGGCCAGCTCCCCAGGGGACAGGGGCTCGCAGGAACATGTGGACTCTCAGGAGAAAGCGCCTGAAACTGACGACTCTTTTTCAGATGTGGACTGCCATTCAAACCAGGAAGATACCGGATGTAAATTTCGAGTTTTGCCTCAGCCAACTAATCTCGCAACCCCAAACACAAAGCGTTTTAAAAAAGAAGAAATTCTTTCCAGTTCTGACATTTGTCAAAAGTTAGTAAATACTCAGGACATGTCAGCCTCTCAGGTTGATGTAGCTGTGAAAATTAATAAGAAAGTTGTGCCCCTGGACTTTTCTATGAGTTCTTTAGCTAAACGAATAAAGCAGTTACATCATGAAGCACAGCAAAGTGAAGGGGAACAGAATTACAGGAAGTTTAGGGCAAAGATTTGTCCTGGAGAAAATCAAGCAGCCGAAGATGAACTAAGAAAAGAGATAAGTAAAACGATGTTTGCAGAAATGGAAATCATTGGTCAGTTTAACCTGGGATTTATAATAACCAAACTGAATGAGGATATCTTCATAGTGGACCAGCATGCCACGGACGAGAAGTATAACTTCGAGATGCTGCAGCAGCACACCGTGCTCCAGGGGCAGAGGCTCATAGCACCTCAGACTCTCAACTTAACTGCTGTTAATGAAGCTGTTCTGATAGAAAATCTGGAAATATTTAGAAAGAATGGCTTTGATTTTGTTATCGATGAAAATGCTCCAGTCACTGAAAGGGCTAAACTGATTTCCTTGCCAACTAGTAAAAACTGGACCTTCGGACCCCAGGACGTCGATGAACTGATCTTCATGCTGAGCGACAGCCCTGGGGTCATGTGCCGGCCTTCCCGAGTCAAGCAGATGTTTGCCTCCAGAGCCTGCCGGAAGTCGGTGATGATTGGGACTGCTCTTAACACAAGCGAGATGAAGAAACTGATCACCCACATGGGGGAGATGGACCACCCCTGGAACTGTCCCCATGGAAGGCCAACCATGAGACACATCGCCAACCTGGGTGTCATTTCTCAGAACTGACCGTAGTCACTGTATGGAATAATTGGTTTTATCGCAGATTTTTATGTTTTGAAAGACAGAGTCTTCACTAACCTTTTTTGTTTTAAAATGAACCTGCTACTTAAAAAAAATACACATCACACCCATTTAAAAGTGATCTTGAGAACCTTTTCAAACCAGAAAAAAAAAAAAAAAA"}, + {"NR_073517.1", "GTGGCGGCGGCGGAGGCGGGGATCCCGCGGCTGCGGCGACGGTGGCCGCGGTGGAGCCACGGGGCGGGCTTGGCTTGGTGTGACGGCGGCTGCGGCGGCGGTGGCGGCCGCGACCAGGTCGGCGTCCTCAGCTGGCCGAGCATGGTGGCAGCCTGCACCCTTGGCTCCCTTGTCTGGTGCAGCCAGCAGAGCCGCCAGCCTTGGGCGCCCATGGCCCTCCGTGTGAGGGCGTGAGCGGCCTGCCCCAGCCTCACCTGCTGATGGAGGACTCAATGGCCCAGTGACCTGACACCACACCACCAACTCCCTCCCACCAGCTGACGAATGGTGGACCCAGTGACGAGTGGCCCTTGTAAGGGTCATGGAATAATTTGAAGCGAGGCATGAGCGGCCCCTGTGGTCGCCTGTGACTGCTGGAGATAGAGGTCCCAGCACCCCAAGCCAACCCAGCGGACCCTCCCAGCCCTGCTTCAACCAATGGGGCCAGTGGGGCTCCAAGCAGCCACCTAACCATCCAGACCCCACCCCACTCACGCGGCCATGGCGGGCCCTGAGGGCTTCCAGTACCGCGCTCTGTACCCGTTCCGCCGGGAGCGGCCGGAGGACCTGGAGCTGCTGCCCGGCGACGTGCTGGTAGTGAGCCGGGCGGCCTTGCAGGCGCTGGGCGTGGCCGAGGGTGGCGAGCGCTGCCCACAGAGCGTGGGCTGGATGCCCGGCCTCAACGAGCGCACACGGCAGCGAGGTGACTTCCCTGGCACCTATGTGGAGTTCCTGGGGCCCGTGGCCCTGGCCCGGCCCGGCCCTCGCCCACGGGGCCCCCGCCCACTGCCCGCCAGGCCCCGTGATGGGGCCCCTGAGCCAGGCCTCACACTCCCCGACTTGCCCGAGCAGTTCTCCCCACCTGATGTGGCTCCCCCTCTTCTGGTGAAGCTTGTGGAGGCCATTGAAAGGACAGGGCTGGACAGCGAATCTCACTACCGCCCGGAGCTGCCCGCACCGCGTACAGACTGGTCCCTGAGCGACGTGGATCAGTGGGACACGGCAGCCCTGGCTGACGGCATTAAGAGCTTCCTGCTGGCACTGCCCGCGCCGCTCGTGACCCCCGAGGCCTCGGCCGAGGCGCGCCGGGCCCTGCGGGAGGCCGCGGGGCCCGTGGGGCCGGCGCTGGAGCCACCGACGCTGCCGCTGCACCGCGCGCTCACGCTGCGCTTCCTGCTCCAGCACCTGGGCCGCGTGGCCCGCCGCGCCCCGGCCCTGGGTCCCGCGGTCCGGGCCCTGGGCGCCACCTTTGGGCCGCTGCTGCTGCGCGCGCCGCCGCCGCCGTCCTCGCCGCCGCCAGGGGGCGCTCCCGACGGGAGTGAGCCCAGCCCTGACTTCCCGGCGCTGCTGGTGGAGAAGCTGCTTCAGGAACACTTGGAAGAGCAGGAGGTTGCGCCCCCAGCGCTGCCGCCTAAACCCCCCAAGGCAAAGCCGGCCCCCACAGTCCTGGCCAATGGAGGGAGCCCACCCTCCCTGCAGGATGCTGAGTGGTACTGGGGGGACATTTCAAGGGAGGAGGTGAACGAGAAACTCCGGGACACTCCCGATGGCACCTTCCTAGTCCGAGATGCTTCTAGCAAGATCCAGGGCGAGTACACGCTGACCCTCAGGAAAGGCGGGAACAATAAGCTGATCAAGGTCTTCCACCGAGATGGGCACTATGGCTTCTCAGAGCCACTCACCTTCTGCTCCGTTGTGGACCTCATCAATCACTACCGCCACGAGTCTCTGGCCCAGTACAATGCCAAGCTGGACACACGGCTCCTCTACCCTGTGTCCAAATACCAGCAGGACCAGATTGTCAAGGAGGACAGCGTGGAGGCAGTGGGCGCCCAGCTTAAGGTCTATCACCAGCAGTACCAGGACAAGAGCCGCGAGTATGACCAGCTTTATGAAGAGTACACACGGACCTCCCAGGGCCTTTTGGGGAGTCCCAGGAGGTGCTGAGCTGCGCCCCCTCCTCCAGGAGCTGCAGATGAAGCGTACTGCAATTGAGGCCTTCAATGAGACTATCAAGATCTTTGAAGAGCAGGGCCAGACTCAAGAGAAATGCAGCAAGGAATACCTGGAGCGCTTCCGGCGTGAGGGCAACGAGAAAGAGATGCAAAGGATCCTGCTGAACTCCGAGCGGCTCAAGTCCCGCATTGCCGAGATCCATGAGAGCCGCACGAAGCTGGAGCAGCAGCTGCGGGCCCAGGCCTCGGACAACAGAGAGATCGACAAGCGCATGAACAGCCTCAAGCCGGACCTCATGCAGCTGCGCAAGATCCGAGACCAGTACCTCGTGTGGCTCACCCAGAAAGGCGCCCGGCAGAAGAAAATCAACGAGTGGCTGGGGATTAAAAATGAGACTGAGGACCAGTACGCACTCATGGAGGACGAGGACGATCTCCCGCACCACGAGGAACGCACTTGGTACGTGGGCAAGATCAACCGCACGCAGGCAGAGGAGATGCTGAGTGGCAAGCGGGATGGCACCTTCCTCATCCGCGAGAGCAGCCAGCGGGGCTGCTACGCCTGCTCCGTGGTAGTGGACGGCGACACCAAGCACTGCGTCATCTACCGCACGGCCACCGGCTTCGGCTTCGCGGAGCCCTACAACCTGTACGGGTCGCTGAAGGAGCTGGTGCTGCACTACCAGCACGCCTCGCTGGTGCAGCACAACGACGCGCTCACCGTCACCCTGGCGCACCCAGTGCGCGCCCCGGGCCCCGGCCCGCCGCCTGCCGCCCGCTGAGCACCGAGGACCCGCCCCAAGCAGAGCCGCCCCTGGGCCCGTCTGCGCCGGAGGCTGCGGCGGCGGGAGCCACGGACCAGACCAGCCACATCCAGGGGTCCTCATTTCTCCGGCTCTGGCTCTTGTTTGGGGTTCTCTCACCCTCTTTCTCTTTCCTTCCCTCCCCCATTCTCCAGATCTCCCTCTGTCTCCTTTTCTCTGTCTTTCTTGGCCCCTGTCTCTCTCCATGTTGGGGGTCCTAACTCCCCCACCCCATATCTACGTGTCCTCCGGGCATTGCCCTCTCCATGGCTCTGGTCACCCTGACCCTCTGCCCTGCCCACCGCAGGTCCCCCGGGGTCCCGGAAGCCCCTTCTGGCTGCACCTGCCATGTTTACAGAGGGCCCCTGGGCTGCGCGGCCCCAGCCTGGGCACCCTGATTTTTAAGCCATAGACCTGGGGTCAGGGCAGGAAGGAACTTCACTCTGCTGCTTCCGAGAACCTCGGCCGTGACATTCGGGGCCGGGCGGGACCCGCCCCACAGACTCCAACTTCCCCTCCAAACCCCGAAGTGAAACCCGCCACCGGGTTACCCCCACAAGGGGGCCGCTGCGAGAAGTTCACCCACCCCCGAAAAAATAATTAAACTCGCAGGCCAGGCACGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGACGGGCGGATCTTTTGAGGTCGGGAGTTGGAGGCCAGCCTGGCCAAAATGGCAAAACCCCGCATCTACTAAAATACAAAAATTAGCCGGGCGTGGTGGCGGCCGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCGTGAGAATCTCTTGAACCCAGGAGATGGAGGTTGCAGTGAGCAGAGATCGTGCCACTGCACTCCAGCCTGGGTAACAGAGGGAGACTCCTCCGTCTCAAAAAAATAAATAAATAAACTTGTGAGCTGGCCCCAACCCCTCCTAGGAATCACAGCTCCCCGTACTGGTGCCGCCGCAGTGGCCAAGTTGCGACACTGCCCACGGCCCCTCCCTCTGATGCAGATTCAGGGCTTCTCTTCGATCATGTTGGGTTTTGATTCTGTTTTTCCTTGACTGCAAAACCCTCTTTCCTCTCCTCTTTTGGGACAAGAGCCCTGGTTTTCTACGCTGCCCTTGGCCACCACACTGCCTGCCCCACGAGCTGGGAGGCAGGTTTTGTACGGTACGTTGTTATTGATATGATATAAAACATCAAACGTCGAAAAAAAAAAAAAAA"}, + {"NR_003085.2", "AGCCAATGGGAGTTCAGGAGGCGGAGCGCCTGTGGGAGCCCTGGAGGGAACTTTCCCAGTCCCCGAGGCGGATCGGGTGTTGCATCCATGGAGCGAGCTGAGAGCTCGAGAACCTGCTAAGGCCATCAAACCTATTGATCGGAAGTCAGTCCATCAGATTTGCTCTGGGCAGGTGGTACTGAGTCTAAGCACTGCGGTAAAGGAGTTAGTAGAAAACAGTCTGGATGCTGGTGCCACTAATATTGATCTAAAGCTTAAGGACTATGGAGTGGATCTTATTGAAGTTTCAGACAATGGATGTGGGGTAGAAGAAGAAAACTTCGAAGGCTTAACTCTGAAACATCACACATCTAAGATTCAAGAGTTTGCCGACCTAACTCAGGTTGAAACTTTTGGCTTTCGGGGGGAAGCTCTGAGCTCACTTTGTGCACTGAGCGATGTCACCATTTCTACCTGCCACGCATCGGCGAAGGTTGGAACTCGACTGATGTTTGATCACAATGGGAAAATTATCCAGAAAACCCCCTACCCCCGCCCCAGAGGGACCACAGTCAGCGTGCAGCAGTTATTTTCCACACTACCTGTGCGCCATAAGGAATTTCAAAGGAATATTAAGAAGGAGTATGCCAAAATGGTCCAGGTCTTACATGCATACTGTATCATTTCAGCAGGCATCCGTGTAAGTTGCACCAATCAGCTTGGACAAGGAAAACGACAGCCTGTGGTATGCACAGGTGGAAGCCCCAGCATAAAGGAAAATATCGGCTCTGTGTTTGGGCAGAAGCAGTTGCAAAGCCTCATTCCTTTTGTTCAGCTGCCCCCTAGTGACTCCGTGTGTGAAGAGTACGGTTTGAGCTGTTCCGATGCTCTGCATAATCTTTTTTACATCTCAGGTTTCATTTCACAATGCACGCATGGAGTTGGAAGGAGTTCAACAGACAGACAGTTTTTCTTTATCAACCGGCGGCCTTGTGACCCAGCAAAGGTCTGCAGACTCGTGAATGAGGTCTACCACATGTATAATCGACACCAGTATCCATTTGTTGTTCTTAACATTTCTGTTGATTCAGAATGCGTTGATATCAATGTTACTCCAGATAAAAGGCAAATTTTGCTACAAGAGGAAAAGCTTTTGTTGGCAGTTTTAAAGACCTCTTTGATAGGAATGTTTGATAGTGATGTCAACAAGCTAAATGTCAGTCAGCAGCCACTGCTGGATGTTGAAGGTAACTTAATAAAAATGCATGCAGCGGATTTGGAAAAGCCCATGGTAGAAAAGCAGGATCAATCCCCTTCATTAAGGACTGGAGAAGAAAAAAAAGACGTGTCCATTTCCAGACTGCGAGAGGCCTTTTCTCTTCGTCACACAACAGAGAACAAGCCTCACAGCCCAAAGACTCCAGAACCAAGAAGGAGCCCTCTAGGACAGAAAAGGGGTATGCTGTCTTCTAGCACTTCAGGTGCCATCTCTGACAAAGGCGTCCTGAGACCTCAGAAAGAGGCAGTGAGTTCCAGTCACGGACCCAGTGACCCTACGGACAGAGCGGAGGTGGAGAAGGACTCGGGGCACGGCAGCACTTCCGTGGATTCTGAGGGGTTCAGCATCCCAGACACGGGCAGTCACTGCAGCAGCGAGTATGCGGCCAGCTCCCCAGGGGACAGGGGCTCGCAGGAACATGTGGACTCTCAGGAGAAAGCGCCTGAAACTGACGACTCTTTTTCAGATGTGGACTGCCATTCAAACCAGGAAGATACCGGATGTAAATTTCGAGTTTTGCCTCAGCCAACTAATCTCGCAACCCCAAACACAAAGCGTTTTAAAAAAGAAGAAATTCTTTCCAGTTCTGACATTTGTCAAAAGTTAGTAAATACTCAGGACATGTCAGCCTCTCAGGTTGATGTAGCTGTGAAAATTAATAAGAAAGTTGTGCCCCTGGACTTTTCTATGAGTTCTTTAGCTAAACGAATAAAGCAGTTACATCATGAAGCACAGCAAAGTGAAGGGGAACAGAATTACAGGAAGTTTAGGGCAAAGATTTGTCCTGGAGAAAATCAAGCAGCCGAAGATGAACTAAGAAAAGAGATAAGTAAAACGATGTTTGCAGAAATGGAAATCATTGGTCAGTTTAACCTGGGATTTATAATAACCAAACTGAATGAGGATATCTTCATAGTGGACCAGCATGCCACGGACGAGAAGTATAACTTCGAGATGCTGCAGCAGCACACCGTGCTCCAGGGGCAGAGGCTCATAGCACCTCAGACTCTCAACTTAACTGCTGTTAATGAAGCTGTTCTGATAGAAAATCTGGAAATATTTAGAAAGAATGGCTTTGATTTTGTTATCGATGAAAATGCTCCAGTCACTGAAAGGGCTAAACTGATTTCCTTGCCAACTAGTAAAAACTGGACCTTCGGACCCCAGGACGTCGATGAACTGATCTTCATGCTGAGCGACAGCCCTGGGGTCATGTGCCGGCCTTCCCGAGTCAAGCAGATGTTTGCCTCCAGAGCCTGCCGGAAGTCGGTGATGATTGGGACTGCTCTTAACACAAGCGAGATGAAGAAACTGATCACCCACATGGGGGAGATGGACCACCCCTGGAACTGTCCCCATGGAAGGCCAACCATGAGACACATCGCCAACCTGGGTGTCATTTCTCAGAACTGACCGTAGTCACTGTATGGAATAATTGGTTTTATCGCAGATTTTTATGTTTTGAAAGACAGAGTCTTCACTAACCTTTTTTGTTTTAAAATGAACCTGCTACTTAAAAAAAATACACATCACACCCATTTAAAAGTGATCTTGAGAACCTTTTCAAACCAGAAAAAAAAAAAAAAAA"} }; public static Dictionary TranscriptCds = new Dictionary() { @@ -120,7 +122,7 @@ public static bool Validate(ISequenceProvider sequenceProvider, IChromosome chro if (TranscriptCdnas.TryGetValue(transcriptId, out var expectedCdna)) { - var cdnaSequence = new CdnaSequence(sequenceProvider.Sequence, translation.CodingRegion, + var cdnaSequence = new CdnaSequence(sequenceProvider.Sequence, translation?.CodingRegion, transcriptRegions, onReverseStrand, rnaEdits); string cdna = cdnaSequence.GetCdnaSequence(); if (cdna != expectedCdna) throw new InvalidDataException($"{transcriptId} cdna is still not right."); @@ -142,10 +144,7 @@ public static bool Validate(ISequenceProvider sequenceProvider, IChromosome chro if (cds != expectedCds) throw new InvalidDataException($"{transcriptId} CDS is still not right."); } - else - { - throw new InvalidDataException($"{transcriptId} cds not listed"); - } + if (cds != null && TranscriptPeptides.TryGetValue(transcriptId, out var expectedAA)) { var aminoAcids = new AminoAcids(false); @@ -153,11 +152,6 @@ public static bool Validate(ISequenceProvider sequenceProvider, IChromosome chro if (aa != expectedAA) throw new InvalidDataException($"{transcriptId} AA is still not right."); } - else - { - throw new InvalidDataException($"{transcriptId} AA not listed"); - } - return true; } diff --git a/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs b/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs index a8b1f42e..84d195be 100644 --- a/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs +++ b/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs @@ -21,9 +21,6 @@ public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript var rightShiftedVariant = VariantRotator.Right(leftShiftedVariant, transcript, refSequence, transcript.Gene.OnReverseStrand); - // if(transcript.Id.WithVersion=="NM_001293228.1") - // Console.WriteLine("bug"); - var leftAnnotation = AnnotateTranscript(transcript, leftShiftedVariant, aminoAcids, refSequence); var rightAnnotation = ReferenceEquals(leftShiftedVariant, rightShiftedVariant) @@ -156,10 +153,8 @@ private static ISequence GetCodingSequence(ITranscript transcript, ISequence ref private static ISequence GetCdnaSequence(ITranscript transcript, ISequence refSequence) { - if (transcript.Translation == null) return null; - return transcript.CdnaSequence ?? (transcript.CdnaSequence = new CdnaSequence(refSequence, - transcript.Translation.CodingRegion, transcript.TranscriptRegions, + transcript.Translation?.CodingRegion, transcript.TranscriptRegions, transcript.Gene.OnReverseStrand, transcript.RnaEdits)); }