diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index d353819a301..2633361188f 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -272,6 +272,74 @@ + + + + + + AND + + + + + + + AND + ${attribute_value} ILIKE #{dataFilterValue.value} + + + + AND match(${attribute_value}, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$') + + + AND match(${attribute_value}, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$') + + + AND match(${attribute_value}, '^[-+]?[0-9]*[.,]?[0-9]+$') + + + + + + AND abs( + minus( + + + , + cast(#{dataFilterValue.start} as float) + ) + ) < exp(-11) + + + + AND + + + > cast(#{dataFilterValue.start} as float) + + + AND + + + <= cast(#{dataFilterValue.end} as float) + + + + + + + + - - - - - + + + + + @@ -362,57 +430,15 @@ - SELECT ${unique_id} - FROM ${table_name} - WHERE attribute_name = #{clinicalDataFilter.attributeId} AND - type='${type}' - - - - AND - - - - - - AND match(attribute_value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$') - - - AND match(attribute_value, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$') - - - AND match(attribute_value, '^[-+]?[0-9]*[.,]?[0-9]+$') - - - - - AND abs( - minus( - - - , - cast(#{dataFilterValue.start} as float) - ) - ) < exp(-11) - - - - AND - - - > cast(#{dataFilterValue.start} as float) - - - AND - - - <= cast(#{dataFilterValue.end} as float) - - - - - - + SELECT ${unique_id} + FROM ${table_name} + WHERE attribute_name = #{clinicalDataFilter.attributeId} AND + type='${type}' + AND + + + + ) @@ -452,11 +478,7 @@ = 'NA' - ( - - - - ) ILIKE #{dataFilterValue.value} + attribute_value ILIKE #{dataFilterValue.value} @@ -494,47 +516,10 @@ SELECT DISTINCT sample_unique_id FROM () AS genomic_numerical_query WHERE - - - - AND match(alteration_value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$') - - - AND match(alteration_value, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$') - - - AND match(alteration_value, '^[-+]?[0-9]*[.,]?[0-9]+$') - - - - - AND abs( - minus( - - - , - cast(#{dataFilterValue.start} as float) - ) - ) < exp(-11) - - - - AND - - - > cast(#{dataFilterValue.start} as float) - - - AND - - - <= cast(#{dataFilterValue.end} as float) - - - - - - + + + + @@ -596,53 +581,11 @@ != 'NA' - - - - AND - - - - - - AND match(value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$') - - - AND match(value, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$') - - - AND match(value, '^[-+]?[0-9]*[.,]?[0-9]+$') - - - - - AND abs( - minus( - - - , - cast(#{dataFilterValue.start} as float) - ) - ) < exp(-11) - - - - AND - - - > cast(#{dataFilterValue.start} as float) - - - AND - - - <= cast(#{dataFilterValue.end} as float) - - - - - - + AND + + + + @@ -662,11 +605,7 @@ = 'NA' - ( - - - - ) ILIKE #{dataFilterValue.value} + value ILIKE #{dataFilterValue.value} diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/FilteredSamplesTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/FilteredSamplesTest.java index e313507f578..3795fd25652 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/FilteredSamplesTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/FilteredSamplesTest.java @@ -16,6 +16,7 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringRunner; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -32,6 +33,7 @@ public class FilteredSamplesTest extends AbstractTestcontainers { private static final String STUDY_TCGA_PUB = "study_tcga_pub"; private static final String STUDY_ACC_TCGA = "acc_tcga"; + private static final String STUDY_GENIE_PUB = "study_genie_pub"; @Autowired private StudyViewMapper studyViewMapper; @@ -57,4 +59,114 @@ public void getFilteredSamples() { var filteredSamples2 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, Arrays.asList(customSampleIdentifier))); assertEquals(1, filteredSamples2.size()); } + + @Test + public void getSamplesFilteredByClinicalData() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + studyViewFilter.setStudyIds(Arrays.asList(STUDY_GENIE_PUB, STUDY_ACC_TCGA)); + + // samples of patients with AGE <= 20.0 + studyViewFilter.setClinicalDataFilters( + List.of( + newClinicalDataFilter( + "age", List.of( + newDataFilterValue(null, 20.0, null) + ) + ) + ) + ); + var filteredSamples1 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>())); + assertEquals(4, filteredSamples1.size()); + + // samples of patients with AGE <= 20.0 or (80.0, 82.0] + studyViewFilter.setClinicalDataFilters( + List.of( + newClinicalDataFilter( + "age", List.of( + newDataFilterValue(null, 20.0, null), + newDataFilterValue(80.0, 82.0, null) + ) + ) + ) + ); + var filteredSamples2 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>())); + assertEquals(6, filteredSamples2.size()); + + // samples of patients with UNKNOWN AGE + studyViewFilter.setClinicalDataFilters( + List.of( + newClinicalDataFilter( + "age", List.of( + newDataFilterValue(null, null, "Unknown") + ) + ) + ) + ); + var filteredSamples3 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>())); + assertEquals(1, filteredSamples3.size()); + + // samples of patients with AGE <= 20.0 or (80.0, 82.0] or UNKNOWN + // this is a mixed list of filters of both numerical and non-numerical values + studyViewFilter.setClinicalDataFilters( + List.of( + newClinicalDataFilter( + "age", List.of( + newDataFilterValue(null, 20.0, null), + newDataFilterValue(80.0, 82.0, null), + newDataFilterValue(null, null, "unknown") + ) + ) + ) + ); + var filteredSamples4 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>())); + assertEquals(7, filteredSamples4.size()); + + // NA filter + studyViewFilter.setClinicalDataFilters( + List.of( + newClinicalDataFilter( + "age", List.of( + newDataFilterValue(null, null, "NA") + ) + ) + ) + ); + var filteredSamples5 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>())); + // 4 acc_tcga + 4 study_genie_pub samples with "NA" AGE data or no AGE data + assertEquals(8, filteredSamples5.size()); + + // NA + UNKNOWN + studyViewFilter.setClinicalDataFilters( + List.of( + newClinicalDataFilter( + "age", List.of( + newDataFilterValue(null, null, "NA"), + newDataFilterValue(null, null, "UNKNOWN") + ) + ) + ) + ); + var filteredSamples6 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>())); + // 8 NA + 1 UNKNOWN + assertEquals(9, filteredSamples6.size()); + } + + private DataFilterValue newDataFilterValue(Double start, Double end, String value) { + DataFilterValue dataFilterValue = new DataFilterValue(); + + dataFilterValue.setStart(start == null ? null : new BigDecimal(start)); + dataFilterValue.setEnd(end == null ? null: new BigDecimal(end)); + dataFilterValue.setValue(value); + + return dataFilterValue; + } + + private ClinicalDataFilter newClinicalDataFilter(String attributeId, List values) { + ClinicalDataFilter clinicalDataFilter = new ClinicalDataFilter(); + + clinicalDataFilter.setAttributeId(attributeId); + clinicalDataFilter.setValues(values); + + return clinicalDataFilter; + } } diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java index 0760a81407e..6f393241a8d 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java @@ -59,8 +59,8 @@ public void getMutationCounts() { assertEquals(2, findClinicaDataCount(mutationsCounts, "4")); assertEquals(4, findClinicaDataCount(mutationsCounts, "2")); assertEquals(2, findClinicaDataCount(mutationsCounts, "1")); - // 1 empty string + 1 'NAN' + 11 samples with no data - assertEquals(13, findClinicaDataCount(mutationsCounts, "NA")); + // 1 empty string + 1 'NAN' + 12 samples with no data + assertEquals(14, findClinicaDataCount(mutationsCounts, "NA")); } @Test @@ -87,8 +87,8 @@ public void getCenterCounts() { assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "MDA")); assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "OHSU")); assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "UCSF")); - // 1 empty string + 1 'NA' + 11 samples with no data - assertEquals(13, findClinicaDataCount(categoricalClinicalDataCounts, "NA")); + // 1 empty string + 1 'NA' + 12 samples with no data + assertEquals(14, findClinicaDataCount(categoricalClinicalDataCounts, "NA")); } @Test @@ -114,8 +114,8 @@ public void getDeadCounts() { assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "NOT RELEASED")); assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "NOT COLLECTED")); assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "UNKNOWN")); - // 1 empty string + 1 'N/A' + 11 samples with no data - assertEquals(13, findClinicaDataCount(categoricalClinicalDataCounts, "NA")); + // 1 empty string + 1 'N/A' + 12 samples with no data + assertEquals(14, findClinicaDataCount(categoricalClinicalDataCounts, "NA")); } @Test @@ -181,7 +181,7 @@ public void getAgeCountsForMultipleStudies() { } private void assertAgeCounts(List ageCounts) { - assertEquals(14, ageCounts.size()); + assertEquals(15, ageCounts.size()); assertEquals(3, findClinicaDataCount(ageCounts, "<18")); assertEquals(1, findClinicaDataCount(ageCounts, "18")); @@ -197,6 +197,7 @@ private void assertAgeCounts(List ageCounts) { assertEquals(2, findClinicaDataCount(ageCounts, "82")); assertEquals(1, findClinicaDataCount(ageCounts, "89")); assertEquals(2, findClinicaDataCount(ageCounts, ">89")); + assertEquals(1, findClinicaDataCount(ageCounts, "UNKNOWN")); } @Test diff --git a/src/test/resources/clickhouse_data.sql b/src/test/resources/clickhouse_data.sql index 99fd2ccfb6c..a63d60456fb 100644 --- a/src/test/resources/clickhouse_data.sql +++ b/src/test/resources/clickhouse_data.sql @@ -133,6 +133,7 @@ insert into patient (internal_id,stable_id,cancer_study_id) values (320,'GENIE-T insert into patient (internal_id,stable_id,cancer_study_id) values (321,'GENIE-TEST-321',3); insert into patient (internal_id,stable_id,cancer_study_id) values (322,'GENIE-TEST-322',3); insert into patient (internal_id,stable_id,cancer_study_id) values (323,'GENIE-TEST-323',3); +insert into patient (internal_id,stable_id,cancer_study_id) values (324,'GENIE-TEST-324',3); insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values(10,'1,2,3,4,5,6,7,8,9,10,11,'); @@ -178,6 +179,7 @@ insert into sample (internal_id,stable_id,sample_type,patient_id) values (320,'G insert into sample (internal_id,stable_id,sample_type,patient_id) values (321,'GENIE-TEST-321-01','primary solid tumor',321); insert into sample (internal_id,stable_id,sample_type,patient_id) values (322,'GENIE-TEST-322-01','primary solid tumor',322); insert into sample (internal_id,stable_id,sample_type,patient_id) values (323,'GENIE-TEST-323-01','primary solid tumor',323); +insert into sample (internal_id,stable_id,sample_type,patient_id) values (324,'GENIE-TEST-324-01','primary solid tumor',324); insert into mutation_event (mutation_event_id,entrez_gene_id,chr,start_position,end_position,reference_allele,tumor_seq_allele,protein_change,mutation_type,ncbi_build,strand,variant_type,db_snp_rs,db_snp_val_status,refseq_mrna_id,codon_change,uniprot_accession,protein_pos_start,protein_pos_end,canonical_transcript,keyword) values (2038,672,'17',41244748,41244748,'g','a','q934*','nonsense_mutation','37','+','snp','rs80357223','unknown','nm_007294','c.(2800-2802)cag>tag','p38398',934,934,1,'BRCA1 truncating'); @@ -457,6 +459,7 @@ insert into clinical_patient (internal_id,attr_id,attr_value) values (319,'age', insert into clinical_patient (internal_id,attr_id,attr_value) values (320,'age','N/A'); insert into clinical_patient (internal_id,attr_id,attr_value) values (321,'age',''); insert into clinical_patient (internal_id,attr_id,attr_value) values (322,'age','NAN'); +insert into clinical_patient (internal_id,attr_id,attr_value) values (324,'age','UNKNOWN'); insert into clinical_sample (internal_id,attr_id,attr_value) values (1,'other_sample_id','5c631ce8-f96a-4c35-a459-556fc4ab21e1'); insert into clinical_sample (internal_id,attr_id,attr_value) values (1,'days_to_collection','276');