From 5892eaf1e43c7b8d8630afa49c9bc64b0d1d486b Mon Sep 17 00:00:00 2001 From: "Zhaoyuan (Ryan) Fu" Date: Tue, 3 Dec 2024 16:12:58 -0500 Subject: [PATCH] Adjacent merging version --- .../impl/StudyViewColumnarServiceImpl.java | 2 +- .../util/StudyViewColumnarServiceUtil.java | 47 ++++++++++++++++++- .../web/parameter/GenomicDataFilter.java | 7 +++ .../web/parameter/StudyViewFilter.java | 45 ------------------ .../StudyViewColumnarServiceUtilTest.java | 26 +++++++++- 5 files changed, 78 insertions(+), 49 deletions(-) diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index 9092a58560d..f2d0d228266 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -287,7 +287,7 @@ public List getMutationTypeCountsByGeneSpecific(StudyViewF private StudyViewFilterContext createContext(StudyViewFilter studyViewFilter) { - studyViewFilter.mergeDataFilterNumericalValues(); + StudyViewColumnarServiceUtil.mergeDataFilterNumericalValues(studyViewFilter); List customSampleIdentifiers = customDataFilterUtil.extractCustomDataSamples(studyViewFilter); return new StudyViewFilterContext(studyViewFilter, customSampleIdentifiers); } diff --git a/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java b/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java index 3a4bcc7c34a..ecd1c02f4f7 100644 --- a/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java +++ b/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java @@ -5,14 +5,18 @@ import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.ClinicalDataCountItem; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.web.parameter.DataFilterValue; +import org.cbioportal.web.parameter.GenomicDataFilter; +import org.cbioportal.web.parameter.StudyViewFilter; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.stream.Collectors; -public class StudyViewColumnarServiceUtil { +public final class StudyViewColumnarServiceUtil { private StudyViewColumnarServiceUtil() {} @@ -155,5 +159,44 @@ public static List normalizeDataCounts(List(normalizedDataCounts); } - + /** + * Merge the range of numerical bins in DataFilters to reduce the number of scans that runs on the database when filtering. + */ + public static void mergeDataFilterNumericalValues(StudyViewFilter studyViewFilter) { + if (studyViewFilter.getGenomicDataFilters() == null || studyViewFilter.getGenomicDataFilters().isEmpty()) return; + + List mergedGenomicDataFilters = new ArrayList<>(); + + for (GenomicDataFilter genomicDataFilter : studyViewFilter.getGenomicDataFilters()) { + List mergedValues = new ArrayList<>(); + + BigDecimal mergedStart = null, mergedEnd = null; + for (DataFilterValue dataFilterValue : genomicDataFilter.getValues()) { + // leave non-numerical values as they are + if (dataFilterValue.getValue() != null) { + mergedValues.add(dataFilterValue); + } + // merge adjacent numerical bins + else { + BigDecimal start = dataFilterValue.getStart(); + BigDecimal end = dataFilterValue.getEnd(); + + if (mergedStart == null) mergedStart = start; + if (mergedEnd == null) mergedEnd = end; + else if (mergedEnd.equals(start)) mergedEnd = end; + else { + mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null)); + mergedStart = null; + mergedEnd = null; + } + } + } + + mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null)); + GenomicDataFilter mergedGenomicDataFilter = new GenomicDataFilter(genomicDataFilter.getHugoGeneSymbol(), genomicDataFilter.getProfileType(), mergedValues); + mergedGenomicDataFilters.add(mergedGenomicDataFilter); + } + + studyViewFilter.setGenomicDataFilters(mergedGenomicDataFilters); + } } \ No newline at end of file diff --git a/src/main/java/org/cbioportal/web/parameter/GenomicDataFilter.java b/src/main/java/org/cbioportal/web/parameter/GenomicDataFilter.java index edf95943436..34aca9c0894 100644 --- a/src/main/java/org/cbioportal/web/parameter/GenomicDataFilter.java +++ b/src/main/java/org/cbioportal/web/parameter/GenomicDataFilter.java @@ -1,6 +1,7 @@ package org.cbioportal.web.parameter; import java.io.Serializable; +import java.util.List; public class GenomicDataFilter extends DataFilter implements Serializable { private String hugoGeneSymbol; @@ -13,6 +14,12 @@ public GenomicDataFilter(String hugoGeneSymbol, String profileType) { this.profileType = profileType; } + public GenomicDataFilter(String hugoGeneSymbol, String profileType, List values) { + this.hugoGeneSymbol = hugoGeneSymbol; + this.profileType = profileType; + this.setValues(values); + } + public String getHugoGeneSymbol() { return hugoGeneSymbol; } diff --git a/src/main/java/org/cbioportal/web/parameter/StudyViewFilter.java b/src/main/java/org/cbioportal/web/parameter/StudyViewFilter.java index 95ae884bddd..738d3632a70 100644 --- a/src/main/java/org/cbioportal/web/parameter/StudyViewFilter.java +++ b/src/main/java/org/cbioportal/web/parameter/StudyViewFilter.java @@ -1,8 +1,6 @@ package org.cbioportal.web.parameter; import java.io.Serializable; -import java.math.BigDecimal; -import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -43,7 +41,6 @@ public class StudyViewFilter implements Serializable { private AlterationFilter alterationFilter; private List clinicalEventFilters; private List mutationDataFilters; - private static boolean areBinsMerged = false; @AssertTrue private boolean isEitherSampleIdentifiersOrStudyIdsPresent() { @@ -233,46 +230,4 @@ public void setClinicalEventFilters(List clinicalEventFilters) { public void setMutationDataFilters(List mutationDataFilters) { this.mutationDataFilters = mutationDataFilters; } - - /** - * Merge the range of numerical values in DataFilters to reduce the number of scans that runs on the database. - * Variable 'areBinsMerged' is static so this method only gets run once. - */ - public void mergeDataFilterNumericalValues() { - if (areBinsMerged || this.genomicDataFilters == null || this.genomicDataFilters.isEmpty()) return; - - List mergedGenomicDataFilters = new ArrayList<>(); - - for (GenomicDataFilter genomicDataFilter : this.genomicDataFilters) { - GenomicDataFilter mergedGenomicDataFilter = new GenomicDataFilter(genomicDataFilter.getHugoGeneSymbol(), genomicDataFilter.getProfileType()); - List mergedValues = new ArrayList<>(); - - boolean hasNullStart = false, hasNullEnd = false; - BigDecimal mergedStart = null, mergedEnd = null; - for (DataFilterValue dataFilterValue : genomicDataFilter.getValues()) { - // filter non-numerical values and keep them intact - if (dataFilterValue.getValue() != null) { - mergedValues.add(dataFilterValue); - } - // record if numerical values have null start or end, otherwise record their start-end range - else { - if (dataFilterValue.getStart() == null) hasNullStart = true; - else if (mergedStart == null) mergedStart = dataFilterValue.getStart(); - else if (dataFilterValue.getStart().compareTo(mergedStart) < 0) mergedStart = dataFilterValue.getStart(); - if (dataFilterValue.getEnd() == null) hasNullEnd = true; - else if (mergedEnd == null) mergedEnd = dataFilterValue.getEnd(); - else if (dataFilterValue.getEnd().compareTo(mergedEnd) > 0) mergedEnd = dataFilterValue.getEnd(); - } - } - if (hasNullStart) mergedStart = null; - if (hasNullEnd) mergedEnd = null; - - mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null)); - mergedGenomicDataFilter.setValues(mergedValues); - mergedGenomicDataFilters.add(mergedGenomicDataFilter); - } - - this.genomicDataFilters = mergedGenomicDataFilters; - areBinsMerged = true; - } } diff --git a/src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java b/src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java index 58308717c01..f5bd1ba283b 100644 --- a/src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java +++ b/src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java @@ -5,9 +5,14 @@ import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.ClinicalDataCountItem; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.web.parameter.DataFilterValue; +import org.cbioportal.web.parameter.GenomicDataFilter; +import org.cbioportal.web.parameter.StudyViewFilter; import org.junit.Assert; import org.junit.Test; +import java.math.BigDecimal; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -264,5 +269,24 @@ public void testNormalizeDataCounts() { assertEquals(9, falseCount.getCount().intValue()); } - + @Test + public void testMergeDataFilterNumericalValues() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + List genomicDataFilters = new ArrayList<>(); + List values = new ArrayList<>(); + values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null)); + values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null)); + values.add(new DataFilterValue(BigDecimal.valueOf(-2), BigDecimal.valueOf(-1.75), null)); + genomicDataFilters.add(new GenomicDataFilter(null, null, values)); + studyViewFilter.setGenomicDataFilters(genomicDataFilters); + + StudyViewColumnarServiceUtil.mergeDataFilterNumericalValues(studyViewFilter); + + List actualGenomicDataFilters = studyViewFilter.getGenomicDataFilters(); + List actualDataFilterValues = actualGenomicDataFilters.getFirst().getValues(); + BigDecimal start = actualDataFilterValues.getFirst().getStart(); + BigDecimal end = actualDataFilterValues.getFirst().getEnd(); + assertEquals(0, BigDecimal.valueOf(-2.5).compareTo(start)); + assertEquals(0, BigDecimal.valueOf(-1.75).compareTo(end)); + } } \ No newline at end of file