diff --git a/SAUtils/DataStructures/CosmicItem.cs b/SAUtils/DataStructures/CosmicItem.cs index 2cc60cd3..1cd8ec97 100644 --- a/SAUtils/DataStructures/CosmicItem.cs +++ b/SAUtils/DataStructures/CosmicItem.cs @@ -19,7 +19,7 @@ public sealed class CosmicItem : ISupplementaryDataItem private string Id { get; } private string Gene { get; } private int? SampleCount { get; } - public HashSet Studies { get; } + public HashSet Tumors { get; } public CosmicItem( Chromosome chromosome, @@ -28,48 +28,69 @@ public CosmicItem( string refAllele, string altAllele, string gene, - HashSet studies, int? sampleCount) + HashSet tumors, + int? sampleCount) { Chromosome = chromosome; Position = position; Id = id; - RefAllele = refAllele; - AltAllele = altAllele; + RefAllele = refAllele; + AltAllele = altAllele; Gene = gene; - Studies = studies; + Tumors = tumors; SampleCount = sampleCount; + } + public override int GetHashCode() + { + var hashCode = Id?.GetHashCode() ?? 0; + return hashCode; } - public sealed class CosmicStudy : IEquatable + public sealed class CosmicTumor : IEquatable { #region members public string Id { get; } - public IEnumerable Histologies { get; } - public IEnumerable Sites { get; } + public string Histology { get; } + public string Site { get; } + public string Tier { get; } #endregion - public CosmicStudy(string studyId, IEnumerable histologies, IEnumerable sites) + public CosmicTumor(string tumorId, + string histology, + string site, + string tier) { - Id = studyId; - Sites = sites; - Histologies = histologies; + Id = tumorId; + Site = site; + Histology = histology; + Tier = tier; } - public bool Equals(CosmicStudy other) + public bool Equals(CosmicTumor other) { if (other == null) return false; + return Id.Equals(other.Id) - && Histologies.SequenceEqual(other.Histologies) - && Sites.SequenceEqual(other.Sites); + && StringsEqual(Histology, other.Histology) + && StringsEqual(Site, other.Site) + && StringsEqual(Tier, other.Tier); + } + + private static bool StringsEqual(string s1, string s2) + { + if (s1 == null && s2 != null) return false; + if (s1 != null && s2 == null) return false; + if (s1 == null && s2 == null) return true; + return s1.Equals(s2); } public override int GetHashCode() { var hashCode = Id?.GetHashCode() ?? 0; - //hashCode ^= Histologies.GetHashCode() ^ Sites.GetHashCode(); + //hashCode ^= Histology.GetHashCode() ^ Site.GetHashCode(); return hashCode; } } @@ -86,28 +107,26 @@ public string GetJsonString() jsonObject.AddStringValue("gene", Gene); jsonObject.AddIntValue("sampleCount", SampleCount); - jsonObject.AddStringValue("cancerTypesAndCounts", GetJsonStringFromDict("cancerType",GetCancerTypeCounts()), false); - jsonObject.AddStringValue("cancerSitesAndCounts", GetJsonStringFromDict("cancerSite",GetTissueCounts()), false); + jsonObject.AddStringValue("cancerTypesAndCounts", GetJsonStringFromDict("cancerType", GetCancerTypeCounts()), false); + jsonObject.AddStringValue("cancerSitesAndCounts", GetJsonStringFromDict("cancerSite", GetTissueCounts()), false); + jsonObject.AddStringValue("tiersAndCounts", GetJsonStringFromDict("tier", GetTierCounts()), false); return StringBuilderPool.GetStringAndReturn(sb); } internal Dictionary GetTissueCounts() { - if (Studies == null) return null; + if (Tumors == null) return null; var tissueCounts = new Dictionary(); - foreach (var study in Studies) + foreach (var tumor in Tumors) { - if (study.Sites == null) return null; + if (string.IsNullOrEmpty(tumor.Site)) continue; - foreach (var site in study.Sites) + if (tissueCounts.TryGetValue(tumor.Site, out _)) { - if (tissueCounts.TryGetValue(site, out _)) - { - tissueCounts[site]++; - } - else tissueCounts[site] = 1; + tissueCounts[tumor.Site]++; } + else tissueCounts[tumor.Site] = 1; } return tissueCounts; @@ -115,22 +134,38 @@ internal Dictionary GetTissueCounts() internal Dictionary GetCancerTypeCounts() { - if (Studies == null) return null; - var cancerTypeCounts = new Dictionary(); - foreach (var study in Studies) + if (Tumors == null) return null; + var histologyCounts = new Dictionary(); + foreach (var tumor in Tumors) + { + if (string.IsNullOrEmpty(tumor.Histology)) continue; + + if (histologyCounts.TryGetValue(tumor.Histology, out _)) + { + histologyCounts[tumor.Histology]++; + } + else histologyCounts[tumor.Histology] = 1; + } + + return histologyCounts; + } + + internal Dictionary GetTierCounts() + { + if (Tumors == null) return null; + var tierCounts = new Dictionary(); + foreach (var tumor in Tumors) { - if (study.Histologies == null) return null; - foreach (var histology in study.Histologies) + if (string.IsNullOrEmpty(tumor.Tier)) continue; + + if (tierCounts.TryGetValue(tumor.Tier, out _)) { - if (cancerTypeCounts.TryGetValue(histology, out _)) - { - cancerTypeCounts[histology]++; - } - else cancerTypeCounts[histology] = 1; + tierCounts[tumor.Tier]++; } + else tierCounts[tumor.Tier] = 1; } - return cancerTypeCounts; + return tierCounts; } private static string GetJsonStringFromDict(string dataType, Dictionary dictionary) diff --git a/SAUtils/InputFileParsers/Cosmic/MergedCosmicReader.cs b/SAUtils/InputFileParsers/Cosmic/MergedCosmicReader.cs index cd63eae3..90cf5522 100644 --- a/SAUtils/InputFileParsers/Cosmic/MergedCosmicReader.cs +++ b/SAUtils/InputFileParsers/Cosmic/MergedCosmicReader.cs @@ -19,16 +19,17 @@ public sealed class MergedCosmicReader private string _geneName; private int? _sampleCount; - private int _mutationIdIndex = -1; + private int _cosmicIdIndex = -1; private int _primarySiteIndex = -1; private int _primaryHistologyIndex = -1; - private int _studyIdIndex = -1; + private int _tumorIdIndex = -1; + private int _tierIndex = -1; - private const string StudyIdTag = "ID_STUDY"; + private const string TumorIdTag = "ID_tumour"; private readonly Dictionary _refChromDict; private readonly ISequenceProvider _sequenceProvider; - private readonly Dictionary> _studies; + private readonly Dictionary> _tumors; public MergedCosmicReader(string vcfFile, string tsvFile, ISequenceProvider sequenceProvider) { @@ -36,12 +37,12 @@ public MergedCosmicReader(string vcfFile, string tsvFile, ISequenceProvider sequ _tsvFileReader = GZipUtilities.GetAppropriateStreamReader(tsvFile); _sequenceProvider = sequenceProvider; _refChromDict = _sequenceProvider.RefNameToChromosome; - _studies = new Dictionary>(); + _tumors = new Dictionary>(); } public IEnumerable GetItems() { - // taking up all studies in to the dictionary + // taking up all tumors in to the dictionary using (_tsvFileReader) { string line; @@ -49,7 +50,7 @@ public IEnumerable GetItems() { if (IsHeaderLine(line)) GetColumnIndexes(line); // the first line is supposed to be a the header line - else AddCosmicStudy(line); + else AddCosmicTumor(line); } } @@ -74,69 +75,54 @@ public IEnumerable GetItems() } } - private void AddCosmicStudy(string line) + private void AddCosmicTumor(string line) { var columns = line.OptimizedSplit('\t'); - string mutationId = columns[_mutationIdIndex]; - string studyId = columns[_studyIdIndex]; - var sites = GetSites(columns); - var histologies = GetHistologies(columns); - - if (string.IsNullOrEmpty(mutationId)) return; - - var study = new CosmicItem.CosmicStudy(studyId, histologies, sites); - if (_studies.TryGetValue(mutationId, out var studySet)) - studySet.Add(study); - else _studies[mutationId] = new HashSet { study }; - } - - private List GetHistologies(string[] columns) - { - var histologies = new HashSet(); - var primaryHistology = columns[_primaryHistologyIndex].Replace('_', ' '); - TryAddValue(primaryHistology, histologies); - - return histologies.ToList(); - } - - private List GetSites(string[] columns) - { - var sites = new HashSet(); + string cosmicId = columns[_cosmicIdIndex]; + string tumorId = columns[_tumorIdIndex]; + string site = GetString(columns[_primarySiteIndex]); + string histology = GetString(columns[_primaryHistologyIndex]); + string tier = GetString(columns[_tierIndex]); - var primarySite = columns[_primarySiteIndex].Replace('_', ' '); - TryAddValue(primarySite, sites); + if (string.IsNullOrEmpty(cosmicId)) return; - return sites.ToList(); + var tumor = new CosmicItem.CosmicTumor(tumorId, histology, site, tier); + if (_tumors.TryGetValue(cosmicId, out var tumorSet)) + tumorSet.Add(tumor); + else _tumors[cosmicId] = new HashSet { tumor }; } - private static void TryAddValue(string value, ISet sites) + private string GetString(string value) { - if (!string.IsNullOrEmpty(value) && value != "NS") - sites.Add(value); + if (string.IsNullOrEmpty(value) || value == "NS") + return null; + value = value.Replace('_', ' '); + return value; } - private static bool IsHeaderLine(string line) => line.Contains(StudyIdTag); + private static bool IsHeaderLine(string line) => line.Contains(TumorIdTag); private void GetColumnIndexes(string headerLine) { - //Gene name Accession Number Gene CDS length HGNC ID Sample name ID_sample ID_tumour Primary site Site subtype 1 Site subtype 2 Site subtype 3 Primary histology Histology subtype 1 Histology subtype 2 Histology subtype 3 Genome-wide screen Mutation ID Mutation CDS Mutation AA Mutation Description Mutation zygosity LOH GRCh Mutation genome position Mutation strand SNP FATHMM prediction FATHMM score Mutation somatic status Pubmed_PMID ID_STUDY Sample source Tumour origin Age + //Gene name Accession Number Gene CDS length HGNC ID Sample name ID_sample ID_tumour Primary site Site subtype 1 Site subtype 2 Site subtype 3 Primary histology Histology subtype 1 Histology subtype 2 Histology subtype 3 Genome-wide screen GENOMIC_MUTATION_ID LEGACY_MUTATION_ID MUTATION_ID Mutation CDS Mutation AA Mutation Description Mutation zygosity LOH GRCh Mutation genome position Mutation strand SNP Resistance Mutation FATHMM prediction FATHMM score Mutation somatic status Pubmed_PMID ID_STUDY Sample Type Tumour origin Age Tier HGVSP HGVSC HGVSG - _mutationIdIndex = -1; - _studyIdIndex = -1; + _cosmicIdIndex = -1; + _tumorIdIndex = -1; _primarySiteIndex = -1; _primaryHistologyIndex = -1; + _tierIndex = -1; var columns = headerLine.OptimizedSplit('\t'); for (int i = 0; i < columns.Length; i++) { switch (columns[i]) { - case "Mutation ID": - _mutationIdIndex = i; + case "GENOMIC_MUTATION_ID": + _cosmicIdIndex = i; break; - case StudyIdTag: - _studyIdIndex = i; + case TumorIdTag: + _tumorIdIndex = i; break; case "Primary site": _primarySiteIndex = i; @@ -144,17 +130,22 @@ private void GetColumnIndexes(string headerLine) case "Primary histology": _primaryHistologyIndex = i; break; + case "Tier": + _tierIndex = i; + break; } } - if (_mutationIdIndex == -1) - throw new InvalidDataException("Column for mutation Id could not be detected"); - if (_studyIdIndex == -1) - throw new InvalidDataException("Column for study Id could not be detected"); + if (_cosmicIdIndex == -1) + throw new InvalidDataException("Column for Cosmic Id could not be detected"); + if (_tumorIdIndex == -1) + throw new InvalidDataException("Column for tumor Id could not be detected"); if (_primarySiteIndex == -1) throw new InvalidDataException("Column for primary site could not be detected"); if (_primaryHistologyIndex == -1) throw new InvalidDataException("Column for primary histology could not be detected"); + if (_tierIndex == -1) + throw new InvalidDataException("Column for tier could not be decteded"); } private const int MaxVariantLength= 1000; @@ -185,7 +176,7 @@ internal List ExtractCosmicItems(string vcfLine) var (shiftedPos, shiftedRef, shiftedAlt) = VariantUtils.TrimAndLeftAlign(position, refAllele, altAllele, _sequenceProvider.Sequence); - cosmicItems.Add(_studies.TryGetValue(cosmicId, out var studies) + cosmicItems.Add(_tumors.TryGetValue(cosmicId, out var studies) ? new CosmicItem(chromosome, shiftedPos, cosmicId, shiftedRef, shiftedAlt, _geneName, studies, _sampleCount) : new CosmicItem(chromosome, shiftedPos, cosmicId, shiftedRef, shiftedAlt, _geneName, null, diff --git a/UnitTests/SAUtils/InputFileParsers/CosmicItemTests.cs b/UnitTests/SAUtils/InputFileParsers/CosmicItemTests.cs index f85e5b4c..128901bd 100644 --- a/UnitTests/SAUtils/InputFileParsers/CosmicItemTests.cs +++ b/UnitTests/SAUtils/InputFileParsers/CosmicItemTests.cs @@ -8,78 +8,192 @@ namespace UnitTests.SAUtils.InputFileParsers public sealed class CosmicItemTests { [Fact] - public void GetCancerSiteCount_same_study() + public void Tumors_Equal() { - var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + var tumor01 = new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"); + var tumor02 = new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"); + var tumor03 = new CosmicItem.CosmicTumor("101", "primary histology 0", "primarySite 0", "tier 0"); + + var tumor04 = new CosmicItem.CosmicTumor("100", null, null, null); + var tumor05 = new CosmicItem.CosmicTumor("100", null, null, null); + var tumor06 = new CosmicItem.CosmicTumor("101", null, null, null); + + var tumor07 = new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"); + var tumor08 = new CosmicItem.CosmicTumor("100", "primary histology 1", "primarySite 0", "tier 0"); + var tumor09 = new CosmicItem.CosmicTumor("100", null, "primarySite 0", "tier 0"); + + var tumor10 = new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"); + var tumor11 = new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 1", "tier 0"); + var tumor12 = new CosmicItem.CosmicTumor("100", "primary histology 0", null, "tier 0"); + + var tumor13 = new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"); + var tumor14 = new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 1"); + var tumor15 = new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", null ); + + + Assert.True(tumor01.Equals(tumor02)); + Assert.True(tumor04.Equals(tumor05)); + + Assert.False(tumor01.Equals(tumor03)); + Assert.False(tumor01.Equals(tumor04)); + Assert.False(tumor05.Equals(tumor06)); + Assert.False(tumor07.Equals(tumor08)); + Assert.False(tumor07.Equals(tumor09)); + Assert.False(tumor08.Equals(tumor09)); + Assert.False(tumor10.Equals(tumor11)); + Assert.False(tumor10.Equals(tumor12)); + Assert.False(tumor11.Equals(tumor12)); + Assert.False(tumor13.Equals(tumor14)); + Assert.False(tumor13.Equals(tumor15)); + Assert.False(tumor14.Equals(tumor15)); + } + + [Fact] + public void GetTissueCount_same_tumor() + { + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet { - new CosmicItem.CosmicStudy("100", new []{"primary histology 0", "histology subtype 1"}, new []{"primarySite 0", "site subtype 1"}), - new CosmicItem.CosmicStudy("100", new []{"primary histology 0", "histology subtype 1"}, new []{"primarySite 0", "site subtype 1"}) + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0") }, 1); var counts = cosmicItem.GetTissueCounts(); - Assert.Equal(2, counts.Count); + Assert.Equal(1, counts.Count); Assert.Equal(1, counts["primarySite 0"]); - Assert.Equal(1, counts["site subtype 1"]); } [Fact] - public void GetTissueCount_different_studies() + public void GetTissueCount_different_tumors_same_sites() { - var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet { - new CosmicItem.CosmicStudy("100", new []{"primary histology 0", "histology subtype 1"}, new []{"primarySite 0", "site subtype 2"}), - new CosmicItem.CosmicStudy("110", new []{"primary histology 0", "histology subtype 1"}, new []{"primarySite 0", "site subtype 1"}) + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("101", "primary histology 0", "primarySite 0", "tier 0") }, 1); var counts = cosmicItem.GetTissueCounts(); - Assert.Equal(3, counts.Count); + Assert.Equal(2, counts.Count); Assert.Equal(2, counts["primarySite 0"]); - Assert.Equal(1, counts["site subtype 1"]); - Assert.Equal(1, counts["site subtype 2"]); } [Fact] - public void GetCancerTypeCount_same_study() + public void GetTissueCount_different_tumors_different_sites() { - var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet { - new CosmicItem.CosmicStudy("100", new []{"primary histology 0", "histology subtype 1"}, new []{"primarySite 0", "site subtype 1"}), - new CosmicItem.CosmicStudy("100", new []{"primary histology 0", "histology subtype 1"}, new []{"primarySite 0", "site subtype 1"}) + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("101", "primary histology 0", "primarySite 1", "tier 0") + }, 1); + + var counts = cosmicItem.GetTissueCounts(); + Assert.Equal(2, counts.Count); + Assert.Equal(1, counts["primarySite 0"]); + Assert.Equal(1, counts["primarySite 1"]); + } + + [Fact] + public void GetCancerTypeCount_same_tumor() + { + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + { + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0") }, 1); var cancerTypeCounts = cosmicItem.GetCancerTypeCounts(); - Assert.Equal(2, cancerTypeCounts.Count); + Assert.Equal(1, cancerTypeCounts.Count); Assert.Equal(1, cancerTypeCounts["primary histology 0"]); - Assert.Equal(1, cancerTypeCounts["histology subtype 1"]); } - [Fact] - public void GetCancerTypeCount_different_studies() + public void GetCancerTypeCount_different_tumors_same_histologies() { - var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet { - new CosmicItem.CosmicStudy("100", new []{"primary histology 0", "histology subtype 1"}, new []{"primarySite 0", "site subtype 1"}), - new CosmicItem.CosmicStudy("101", new []{"primary histology 0", "histology subtype 2"}, new []{"primarySite 0", "site subtype 1"}) + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("101", "primary histology 0", "primarySite 0", "tier 0") }, 1); var cancerTypeCounts = cosmicItem.GetCancerTypeCounts(); - Assert.Equal(3, cancerTypeCounts.Count); + Assert.Equal(2, cancerTypeCounts.Count); Assert.Equal(2, cancerTypeCounts["primary histology 0"]); - Assert.Equal(1, cancerTypeCounts["histology subtype 1"]); - Assert.Equal(1, cancerTypeCounts["histology subtype 2"]); } + + [Fact] + public void GetCancerTypeCount_different_tumors_different_histologies() + { + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + { + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("101", "primary histology 1", "primarySite 0", "tier 0") + }, 1); + var cancerTypeCounts = cosmicItem.GetCancerTypeCounts(); + Assert.Equal(2, cancerTypeCounts.Count); + Assert.Equal(1, cancerTypeCounts["primary histology 0"]); + Assert.Equal(1, cancerTypeCounts["primary histology 1"]); + } + [Fact] - public void GetJsonString() + public void GetTierCount_same_tumor() + { + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + { + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0") + }, 1); + + var tierCounts = cosmicItem.GetTierCounts(); + Assert.Equal(1, tierCounts.Count); + Assert.Equal(1, tierCounts["tier 0"]); + } + + [Fact] + public void GetTierCount_different_tumors_same_tiers() + { + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + { + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("101", "primary histology 0", "primarySite 0", "tier 0") + }, 1); + + var tierCounts = cosmicItem.GetTierCounts(); + Assert.Equal(2, tierCounts.Count); + Assert.Equal(2, tierCounts["tier 0"]); + } + + [Fact] + public void GetTierCount_different_tumors_different_tiers() { - var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet { - new CosmicItem.CosmicStudy("100", new []{"primary histology 0", "histology subtype 1"}, new []{"primarySite 0", "site subtype 1"}), - new CosmicItem.CosmicStudy("101", new []{"primary histology 0", "histology subtype 2"}, new []{"primarySite 0", "site subtype 1"}) + new CosmicItem.CosmicTumor("100", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("101", "primary histology 0", "primarySite 0", "tier 1") }, 1); - Assert.Equal("\"id\":\"rs101\",\"refAllele\":\"A\",\"altAllele\":\"C\",\"gene\":\"GENE0\",\"sampleCount\":1,\"cancerTypesAndCounts\":[{\"cancerType\":\"primary histology 0\",\"count\":2},{\"cancerType\":\"histology subtype 1\",\"count\":1},{\"cancerType\":\"histology subtype 2\",\"count\":1}],\"cancerSitesAndCounts\":[{\"cancerSite\":\"primarySite 0\",\"count\":2},{\"cancerSite\":\"site subtype 1\",\"count\":2}]", cosmicItem.GetJsonString()); + var tierCounts = cosmicItem.GetTierCounts(); + Assert.Equal(2, tierCounts.Count); + Assert.Equal(1, tierCounts["tier 0"]); + Assert.Equal(1, tierCounts["tier 1"]); + } + + + [Fact] + public void GetJsonString() + { + var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, "rs101", "A", "C", "GENE0", new HashSet + { + new CosmicItem.CosmicTumor("101", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("102", "primary histology 0", "primarySite 1", "tier 0"), + new CosmicItem.CosmicTumor("103", "primary histology 0", "primarySite 0", "tier 0"), + new CosmicItem.CosmicTumor("104", "primary histology 1", "primarySite 1", "tier 1"), + new CosmicItem.CosmicTumor("105", "primary histology 1", "primarySite 0", "tier 1"), + new CosmicItem.CosmicTumor("106", "primary histology 1", "primarySite 1", "tier 1"), + new CosmicItem.CosmicTumor("107", "primary histology 1", "primarySite 0", "tier 1"), + new CosmicItem.CosmicTumor("108", "primary histology 2", "primarySite 2", "tier 1") + }, 8); + + Assert.Equal("\"id\":\"rs101\",\"refAllele\":\"A\",\"altAllele\":\"C\",\"gene\":\"GENE0\",\"sampleCount\":8,\"cancerTypesAndCounts\":[{\"cancerType\":\"primary histology 0\",\"count\":3},{\"cancerType\":\"primary histology 1\",\"count\":4},{\"cancerType\":\"primary histology 2\",\"count\":1}],\"cancerSitesAndCounts\":[{\"cancerSite\":\"primarySite 0\",\"count\":4},{\"cancerSite\":\"primarySite 1\",\"count\":3},{\"cancerSite\":\"primarySite 2\",\"count\":1}],\"tiersAndCounts\":[{\"tier\":\"tier 0\",\"count\":4},{\"tier\":\"tier 1\",\"count\":4}]", cosmicItem.GetJsonString()); } } diff --git a/UnitTests/SAUtils/InputFileParsers/MergedCosmicReaderTests.cs b/UnitTests/SAUtils/InputFileParsers/MergedCosmicReaderTests.cs index 40643a7b..20729b17 100644 --- a/UnitTests/SAUtils/InputFileParsers/MergedCosmicReaderTests.cs +++ b/UnitTests/SAUtils/InputFileParsers/MergedCosmicReaderTests.cs @@ -9,24 +9,24 @@ namespace UnitTests.SAUtils.InputFileParsers public sealed class MergedCosmicReaderTests { [Fact] - public void TwoStudyCosmicCoding() + public void TwoTumorCosmicCoding() { var seqProvider = ParserTestUtils.GetSequenceProvider(35416, "A", 'C', ChromosomeUtilities.RefNameToChromosome); var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), seqProvider); var cosmicItem = cosmicReader.GetItems().ToList()[0]; - var studies = cosmicItem.Studies.ToList(); + var tumors = cosmicItem.Tumors.ToList(); - Assert.Equal("544", studies[0].Id); - Assert.Equal(new[] { "haematopoietic and lymphoid tissue" }, studies[0].Sites); - Assert.Equal(new[] { "haematopoietic neoplasm" }, studies[0].Histologies); - //Assert.Equal(new [] { "haematopoietic neoplasm", "acute myeloid leukaemia" }, study.Histologies); + Assert.Equal("2205513", tumors[0].Id); + Assert.Equal("haematopoietic and lymphoid tissue" , tumors[0].Site); + Assert.Equal("haematopoietic neoplasm", tumors[0].Histology); + //Assert.Equal(new [] { "haematopoietic neoplasm", "acute myeloid leukaemia" }, tumor.Histologies); - Assert.Equal("544", studies[1].Id); - Assert.Equal(new[] { "haematopoietic;lymphoid tissue" }, studies[1].Sites); - Assert.Equal(new[] { "haematopoietic neoplasm" }, studies[1].Histologies); - //Assert.Equal(new[] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies); + Assert.Equal("2205513", tumors[1].Id); + Assert.Equal("haematopoietic;lymphoid tissue", tumors[1].Site); + Assert.Equal("haematopoietic neoplasm", tumors[1].Histology); + //Assert.Equal(new[] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, tumor.Histologies); } [Fact]