Skip to content

Commit

Permalink
Address comments & move to helper
Browse files Browse the repository at this point in the history
  • Loading branch information
fuzhaoyuan authored and alisman committed Dec 6, 2024
1 parent 9f7fb69 commit 31a3b0d
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 169 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@
import org.cbioportal.web.parameter.ClinicalDataFilter;
import org.cbioportal.web.parameter.CategorizedGenericAssayDataCountFilter;
import org.cbioportal.web.parameter.CustomSampleIdentifier;
import org.cbioportal.web.parameter.DataFilter;
import org.cbioportal.web.parameter.DataFilterValue;
import org.cbioportal.web.parameter.GenericAssayDataFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.springframework.lang.NonNull;
import org.springframework.lang.Nullable;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
Expand All @@ -28,6 +33,14 @@ public static StudyViewFilterHelper build(@Nullable StudyViewFilter studyViewFil
if (Objects.isNull(customDataSamples)) {
customDataSamples = new ArrayList<>();
}
if (studyViewFilter.getGenomicDataFilters() != null && !studyViewFilter.getGenomicDataFilters().isEmpty()) {
List<GenomicDataFilter> mergedGenomicDataFilters = mergeDataFilters(studyViewFilter.getGenomicDataFilters());
studyViewFilter.setGenomicDataFilters(mergedGenomicDataFilters);
}
if (studyViewFilter.getGenericAssayDataFilters() != null && !studyViewFilter.getGenericAssayDataFilters().isEmpty()) {
List<GenericAssayDataFilter> mergedGenericAssayDataFilters = mergeDataFilters(studyViewFilter.getGenericAssayDataFilters());
studyViewFilter.setGenericAssayDataFilters(mergedGenericAssayDataFilters);
}
return new StudyViewFilterHelper(studyViewFilter, genericAssayProfilesMap, customDataSamples);
}

Expand Down Expand Up @@ -93,4 +106,46 @@ public boolean isCategoricalClinicalDataFilter(ClinicalDataFilter clinicalDataFi
return filterValue.getValue() != null;
}

/**
* Merge the range of numerical bins in DataFilters to reduce the number of scans that runs on the database when filtering.
*/
public static <T extends DataFilter> List<T> mergeDataFilters(List<T> filters) {
List<T> mergedDataFilters = new ArrayList<>();

for (T filter : filters) {
List<DataFilterValue> mergedValues = new ArrayList<>();

BigDecimal mergedStart = null;
BigDecimal mergedEnd = null;
for (DataFilterValue dataFilterValue : filter.getValues()) {
// leave non-numerical values as they are
if (dataFilterValue.getValue() != null) {
mergedValues.add(dataFilterValue);
}
// merge adjacent numerical bins
else {
BigDecimal start = dataFilterValue.getStart();
BigDecimal end = dataFilterValue.getEnd();

if (mergedStart == null && mergedEnd == null) {
mergedStart = start;
mergedEnd = end;
}
else if (mergedEnd.equals(start)) {
mergedEnd = end;
} else {
mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null));
mergedStart = start;
mergedEnd = end;
}
}
}

mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null));
filter.setValues(mergedValues);
mergedDataFilters.add(filter);
}

return mergedDataFilters;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,6 @@ public List<GenomicDataCountItem> getMutationTypeCountsByGeneSpecific(StudyViewF


private StudyViewFilterContext createContext(StudyViewFilter studyViewFilter) {
StudyViewColumnarServiceUtil.mergeDataFilterNumericalValues(studyViewFilter);
List<CustomSampleIdentifier> customSampleIdentifiers = customDataFilterUtil.extractCustomDataSamples(studyViewFilter);
return new StudyViewFilterContext(studyViewFilter, customSampleIdentifiers);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,14 @@
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.GenomicDataCount;
import org.cbioportal.web.parameter.ClinicalDataFilter;
import org.cbioportal.web.parameter.DataFilter;
import org.cbioportal.web.parameter.DataFilterValue;
import org.cbioportal.web.parameter.GenericAssayDataFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.cbioportal.web.parameter.StudyViewFilter;

import java.math.BigDecimal;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public final class StudyViewColumnarServiceUtil {
public class StudyViewColumnarServiceUtil {

private StudyViewColumnarServiceUtil() {}

Expand Down Expand Up @@ -162,62 +155,5 @@ public static List<ClinicalDataCount> normalizeDataCounts(List<ClinicalDataCount
return new ArrayList<>(normalizedDataCounts);
}

public static void mergeDataFilterNumericalValues(StudyViewFilter studyViewFilter) {
if (studyViewFilter.getGenomicDataFilters() != null && !studyViewFilter.getGenomicDataFilters().isEmpty()) {
List<GenomicDataFilter> mergedGenomicDataFilters = mergeDataFilters(studyViewFilter.getGenomicDataFilters());
studyViewFilter.setGenomicDataFilters(mergedGenomicDataFilters);
}

if (studyViewFilter.getClinicalDataFilters() != null && !studyViewFilter.getClinicalDataFilters().isEmpty()) {
List<ClinicalDataFilter> mergedClinicalDataFilters = mergeDataFilters(studyViewFilter.getClinicalDataFilters());
studyViewFilter.setClinicalDataFilters(mergedClinicalDataFilters);
}

if (studyViewFilter.getGenericAssayDataFilters() != null && !studyViewFilter.getGenericAssayDataFilters().isEmpty()) {
List<GenericAssayDataFilter> mergedGenericAssayDataFilters = mergeDataFilters(studyViewFilter.getGenericAssayDataFilters());
studyViewFilter.setGenericAssayDataFilters(mergedGenericAssayDataFilters);
}
}

/**
* Merge the range of numerical bins in DataFilters to reduce the number of scans that runs on the database when filtering.
*/
private static <T extends DataFilter> List<T> mergeDataFilters(List<T> filters) {
List<T> mergedDataFilters = new ArrayList<>();

for (T filter : filters) {
List<DataFilterValue> mergedValues = new ArrayList<>();

BigDecimal mergedStart = null;
BigDecimal mergedEnd = null;
for (DataFilterValue dataFilterValue : filter.getValues()) {
// leave non-numerical values as they are
if (dataFilterValue.getValue() != null) {
mergedValues.add(dataFilterValue);
}
// merge adjacent numerical bins
else {
BigDecimal start = dataFilterValue.getStart();
BigDecimal end = dataFilterValue.getEnd();

if (mergedStart == null && mergedEnd == null) {
mergedStart = start;
mergedEnd = end;
}
else if (mergedEnd.equals(start)) mergedEnd = end;
else {
mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null));
mergedStart = start;
mergedEnd = end;
}
}
}

mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null));
filter.setValues(mergedValues);
mergedDataFilters.add(filter);
}

return mergedDataFilters;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package org.cbioportal.persistence.helper;

import org.cbioportal.web.parameter.DataFilterValue;
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.junit.Test;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

public class StudyViewFilterHelperTest {

// (-5, -1], (-1, 3], (3, 7] -> (-5, 7]
@Test
public void testMergeDataFilterNumericalContinuousValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-5), BigDecimal.valueOf(-1), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-1), BigDecimal.valueOf(3), null));
values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(7), null));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
BigDecimal start = mergedDataFilterValues.getFirst().getStart();
BigDecimal end = mergedDataFilterValues.getFirst().getEnd();
assertEquals(0, BigDecimal.valueOf(-5).compareTo(start));
assertEquals(0, BigDecimal.valueOf(7).compareTo(end));
}

// (-2.5, -2.25], (-2.25, -2], (-1.75, -1.5], (-1.5, -1.25] -> (-2.5, -2], (-1.75, -1.25]
@Test
public void testMergeDataFilterNumericalDiscontinuousValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-1.75), BigDecimal.valueOf(-1.5), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-1.5), BigDecimal.valueOf(-1.25), null));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
BigDecimal firstStart = mergedDataFilterValues.getFirst().getStart();
BigDecimal firstEnd = mergedDataFilterValues.getFirst().getEnd();
assertEquals(0, BigDecimal.valueOf(-2.5).compareTo(firstStart));
assertEquals(0, BigDecimal.valueOf(-2).compareTo(firstEnd));

BigDecimal secondStart = mergedDataFilterValues.get(1).getStart();
BigDecimal secondEnd = mergedDataFilterValues.get(1).getEnd();
assertEquals(0, BigDecimal.valueOf(-1.75).compareTo(secondStart));
assertEquals(0, BigDecimal.valueOf(-1.25).compareTo(secondEnd));
}

// (null, -2.25], (-2.25, -2], (-2, null] -> (null, null]
@Test
public void testMergeDataFilterNumericalInfiniteValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(null, BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2), null, null));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
BigDecimal start = mergedDataFilterValues.getFirst().getStart();
BigDecimal end = mergedDataFilterValues.getFirst().getEnd();
assertNull(start);
assertNull(end);
}

// (-2.5, -2.25], (-2.25, -2], "NA" -> "NA", (-2.5, -1.75]
@Test
public void testMergeDataFilterNumericalNonNumericalValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(null, null, "NA"));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
String value = mergedDataFilterValues.getFirst().getValue();
BigDecimal start = mergedDataFilterValues.get(1).getStart();
BigDecimal end = mergedDataFilterValues.get(1).getEnd();
assertEquals("NA", value);
assertEquals(0, BigDecimal.valueOf(-2.5).compareTo(start));
assertEquals(0, BigDecimal.valueOf(-2).compareTo(end));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,9 @@
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.GenomicDataCount;
import org.cbioportal.web.parameter.DataFilterValue;
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.junit.Assert;
import org.junit.Test;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -268,100 +263,6 @@ public void testNormalizeDataCounts() {
.orElse(null);
assertEquals(9, falseCount.getCount().intValue());
}

// (-2.5, -2.25], (-2.25, -2], (-2, -1.75] -> (-2.5, -1.75]
@Test
public void testMergeDataFilterNumericalContinuousValues() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2), BigDecimal.valueOf(-1.75), null));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));
studyViewFilter.setGenomicDataFilters(genomicDataFilters);

StudyViewColumnarServiceUtil.mergeDataFilterNumericalValues(studyViewFilter);

List<GenomicDataFilter> actualGenomicDataFilters = studyViewFilter.getGenomicDataFilters();
List<DataFilterValue> actualDataFilterValues = actualGenomicDataFilters.getFirst().getValues();
BigDecimal start = actualDataFilterValues.getFirst().getStart();
BigDecimal end = actualDataFilterValues.getFirst().getEnd();
assertEquals(0, BigDecimal.valueOf(-2.5).compareTo(start));
assertEquals(0, BigDecimal.valueOf(-1.75).compareTo(end));
}

// (-2.5, -2.25], (-2.25, -2], (-1.75, -1.5], (-1.5, -1.25] -> (-2.5, -2], (-1.75, -1.25]
@Test
public void testMergeDataFilterNumericalDiscontinuousValues() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-1.75), BigDecimal.valueOf(-1.5), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-1.5), BigDecimal.valueOf(-1.25), null));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));
studyViewFilter.setGenomicDataFilters(genomicDataFilters);

StudyViewColumnarServiceUtil.mergeDataFilterNumericalValues(studyViewFilter);

List<GenomicDataFilter> actualGenomicDataFilters = studyViewFilter.getGenomicDataFilters();
List<DataFilterValue> actualDataFilterValues = actualGenomicDataFilters.getFirst().getValues();
BigDecimal firstStart = actualDataFilterValues.getFirst().getStart();
BigDecimal firstEnd = actualDataFilterValues.getFirst().getEnd();
assertEquals(0, BigDecimal.valueOf(-2.5).compareTo(firstStart));
assertEquals(0, BigDecimal.valueOf(-2).compareTo(firstEnd));

BigDecimal secondStart = actualDataFilterValues.get(1).getStart();
BigDecimal secondEnd = actualDataFilterValues.get(1).getEnd();
assertEquals(0, BigDecimal.valueOf(-1.75).compareTo(secondStart));
assertEquals(0, BigDecimal.valueOf(-1.25).compareTo(secondEnd));
}

// (null, -2.25], (-2.25, -2], (-2, null] -> (null, null]
@Test
public void testMergeDataFilterNumericalInfiniteValues() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(null, BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2), null, null));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));
studyViewFilter.setGenomicDataFilters(genomicDataFilters);

StudyViewColumnarServiceUtil.mergeDataFilterNumericalValues(studyViewFilter);

List<GenomicDataFilter> actualGenomicDataFilters = studyViewFilter.getGenomicDataFilters();
List<DataFilterValue> actualDataFilterValues = actualGenomicDataFilters.getFirst().getValues();
BigDecimal start = actualDataFilterValues.getFirst().getStart();
BigDecimal end = actualDataFilterValues.getFirst().getEnd();
assertNull(start);
assertNull(end);
}

// (-2.5, -2.25], (-2.25, -2], "NA" -> "NA", (-2.5, -1.75]
@Test
public void testMergeDataFilterNumericalNonNumericalValues() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(null, null, "NA"));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));
studyViewFilter.setGenomicDataFilters(genomicDataFilters);

StudyViewColumnarServiceUtil.mergeDataFilterNumericalValues(studyViewFilter);

List<GenomicDataFilter> actualGenomicDataFilters = studyViewFilter.getGenomicDataFilters();
List<DataFilterValue> actualDataFilterValues = actualGenomicDataFilters.getFirst().getValues();
String value = actualDataFilterValues.getFirst().getValue();
BigDecimal start = actualDataFilterValues.get(1).getStart();
BigDecimal end = actualDataFilterValues.get(1).getEnd();
assertEquals("NA", value);
assertEquals(0, BigDecimal.valueOf(-2.5).compareTo(start));
assertEquals(0, BigDecimal.valueOf(-2).compareTo(end));
}


}

0 comments on commit 31a3b0d

Please sign in to comment.