diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml
index d353819a301..2633361188f 100644
--- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml
+++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml
@@ -272,6 +272,74 @@
+
+
+
+
+
+ AND
+
+
+
+
+
+
+ AND
+ ${attribute_value} ILIKE #{dataFilterValue.value}
+
+
+
+ AND match(${attribute_value}, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$')
+
+
+ AND match(${attribute_value}, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$')
+
+
+ AND match(${attribute_value}, '^[-+]?[0-9]*[.,]?[0-9]+$')
+
+
+
+
+
+ AND abs(
+ minus(
+
+
+ ,
+ cast(#{dataFilterValue.start} as float)
+ )
+ ) < exp(-11)
+
+
+
+ AND
+
+
+ > cast(#{dataFilterValue.start} as float)
+
+
+ AND
+
+
+ <= cast(#{dataFilterValue.end} as float)
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
@@ -362,57 +430,15 @@
- SELECT ${unique_id}
- FROM ${table_name}
- WHERE attribute_name = #{clinicalDataFilter.attributeId} AND
- type='${type}'
-
-
-
- AND
-
-
-
-
-
- AND match(attribute_value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$')
-
-
- AND match(attribute_value, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$')
-
-
- AND match(attribute_value, '^[-+]?[0-9]*[.,]?[0-9]+$')
-
-
-
-
- AND abs(
- minus(
-
-
- ,
- cast(#{dataFilterValue.start} as float)
- )
- ) < exp(-11)
-
-
-
- AND
-
-
- > cast(#{dataFilterValue.start} as float)
-
-
- AND
-
-
- <= cast(#{dataFilterValue.end} as float)
-
-
-
-
-
-
+ SELECT ${unique_id}
+ FROM ${table_name}
+ WHERE attribute_name = #{clinicalDataFilter.attributeId} AND
+ type='${type}'
+ AND
+
+
+
+
)
@@ -452,11 +478,7 @@
= 'NA'
- (
-
-
-
- ) ILIKE #{dataFilterValue.value}
+ attribute_value ILIKE #{dataFilterValue.value}
@@ -494,47 +516,10 @@
SELECT DISTINCT sample_unique_id
FROM () AS genomic_numerical_query
WHERE
-
-
-
- AND match(alteration_value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$')
-
-
- AND match(alteration_value, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$')
-
-
- AND match(alteration_value, '^[-+]?[0-9]*[.,]?[0-9]+$')
-
-
-
-
- AND abs(
- minus(
-
-
- ,
- cast(#{dataFilterValue.start} as float)
- )
- ) < exp(-11)
-
-
-
- AND
-
-
- > cast(#{dataFilterValue.start} as float)
-
-
- AND
-
-
- <= cast(#{dataFilterValue.end} as float)
-
-
-
-
-
-
+
+
+
+
@@ -596,53 +581,11 @@
!= 'NA'
-
-
-
- AND
-
-
-
-
-
- AND match(value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$')
-
-
- AND match(value, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$')
-
-
- AND match(value, '^[-+]?[0-9]*[.,]?[0-9]+$')
-
-
-
-
- AND abs(
- minus(
-
-
- ,
- cast(#{dataFilterValue.start} as float)
- )
- ) < exp(-11)
-
-
-
- AND
-
-
- > cast(#{dataFilterValue.start} as float)
-
-
- AND
-
-
- <= cast(#{dataFilterValue.end} as float)
-
-
-
-
-
-
+ AND
+
+
+
+
@@ -662,11 +605,7 @@
= 'NA'
- (
-
-
-
- ) ILIKE #{dataFilterValue.value}
+ value ILIKE #{dataFilterValue.value}
diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/FilteredSamplesTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/FilteredSamplesTest.java
index e313507f578..3795fd25652 100644
--- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/FilteredSamplesTest.java
+++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/FilteredSamplesTest.java
@@ -16,6 +16,7 @@
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringRunner;
+import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -32,6 +33,7 @@ public class FilteredSamplesTest extends AbstractTestcontainers {
private static final String STUDY_TCGA_PUB = "study_tcga_pub";
private static final String STUDY_ACC_TCGA = "acc_tcga";
+ private static final String STUDY_GENIE_PUB = "study_genie_pub";
@Autowired
private StudyViewMapper studyViewMapper;
@@ -57,4 +59,114 @@ public void getFilteredSamples() {
var filteredSamples2 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, Arrays.asList(customSampleIdentifier)));
assertEquals(1, filteredSamples2.size());
}
+
+ @Test
+ public void getSamplesFilteredByClinicalData() {
+ StudyViewFilter studyViewFilter = new StudyViewFilter();
+ studyViewFilter.setStudyIds(Arrays.asList(STUDY_GENIE_PUB, STUDY_ACC_TCGA));
+
+ // samples of patients with AGE <= 20.0
+ studyViewFilter.setClinicalDataFilters(
+ List.of(
+ newClinicalDataFilter(
+ "age", List.of(
+ newDataFilterValue(null, 20.0, null)
+ )
+ )
+ )
+ );
+ var filteredSamples1 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
+ assertEquals(4, filteredSamples1.size());
+
+ // samples of patients with AGE <= 20.0 or (80.0, 82.0]
+ studyViewFilter.setClinicalDataFilters(
+ List.of(
+ newClinicalDataFilter(
+ "age", List.of(
+ newDataFilterValue(null, 20.0, null),
+ newDataFilterValue(80.0, 82.0, null)
+ )
+ )
+ )
+ );
+ var filteredSamples2 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
+ assertEquals(6, filteredSamples2.size());
+
+ // samples of patients with UNKNOWN AGE
+ studyViewFilter.setClinicalDataFilters(
+ List.of(
+ newClinicalDataFilter(
+ "age", List.of(
+ newDataFilterValue(null, null, "Unknown")
+ )
+ )
+ )
+ );
+ var filteredSamples3 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
+ assertEquals(1, filteredSamples3.size());
+
+ // samples of patients with AGE <= 20.0 or (80.0, 82.0] or UNKNOWN
+ // this is a mixed list of filters of both numerical and non-numerical values
+ studyViewFilter.setClinicalDataFilters(
+ List.of(
+ newClinicalDataFilter(
+ "age", List.of(
+ newDataFilterValue(null, 20.0, null),
+ newDataFilterValue(80.0, 82.0, null),
+ newDataFilterValue(null, null, "unknown")
+ )
+ )
+ )
+ );
+ var filteredSamples4 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
+ assertEquals(7, filteredSamples4.size());
+
+ // NA filter
+ studyViewFilter.setClinicalDataFilters(
+ List.of(
+ newClinicalDataFilter(
+ "age", List.of(
+ newDataFilterValue(null, null, "NA")
+ )
+ )
+ )
+ );
+ var filteredSamples5 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
+ // 4 acc_tcga + 4 study_genie_pub samples with "NA" AGE data or no AGE data
+ assertEquals(8, filteredSamples5.size());
+
+ // NA + UNKNOWN
+ studyViewFilter.setClinicalDataFilters(
+ List.of(
+ newClinicalDataFilter(
+ "age", List.of(
+ newDataFilterValue(null, null, "NA"),
+ newDataFilterValue(null, null, "UNKNOWN")
+ )
+ )
+ )
+ );
+ var filteredSamples6 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
+ // 8 NA + 1 UNKNOWN
+ assertEquals(9, filteredSamples6.size());
+ }
+
+ private DataFilterValue newDataFilterValue(Double start, Double end, String value) {
+ DataFilterValue dataFilterValue = new DataFilterValue();
+
+ dataFilterValue.setStart(start == null ? null : new BigDecimal(start));
+ dataFilterValue.setEnd(end == null ? null: new BigDecimal(end));
+ dataFilterValue.setValue(value);
+
+ return dataFilterValue;
+ }
+
+ private ClinicalDataFilter newClinicalDataFilter(String attributeId, List values) {
+ ClinicalDataFilter clinicalDataFilter = new ClinicalDataFilter();
+
+ clinicalDataFilter.setAttributeId(attributeId);
+ clinicalDataFilter.setValues(values);
+
+ return clinicalDataFilter;
+ }
}
diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java
index 0760a81407e..6f393241a8d 100644
--- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java
+++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java
@@ -59,8 +59,8 @@ public void getMutationCounts() {
assertEquals(2, findClinicaDataCount(mutationsCounts, "4"));
assertEquals(4, findClinicaDataCount(mutationsCounts, "2"));
assertEquals(2, findClinicaDataCount(mutationsCounts, "1"));
- // 1 empty string + 1 'NAN' + 11 samples with no data
- assertEquals(13, findClinicaDataCount(mutationsCounts, "NA"));
+ // 1 empty string + 1 'NAN' + 12 samples with no data
+ assertEquals(14, findClinicaDataCount(mutationsCounts, "NA"));
}
@Test
@@ -87,8 +87,8 @@ public void getCenterCounts() {
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "MDA"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "OHSU"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "UCSF"));
- // 1 empty string + 1 'NA' + 11 samples with no data
- assertEquals(13, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));
+ // 1 empty string + 1 'NA' + 12 samples with no data
+ assertEquals(14, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));
}
@Test
@@ -114,8 +114,8 @@ public void getDeadCounts() {
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "NOT RELEASED"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "NOT COLLECTED"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "UNKNOWN"));
- // 1 empty string + 1 'N/A' + 11 samples with no data
- assertEquals(13, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));
+ // 1 empty string + 1 'N/A' + 12 samples with no data
+ assertEquals(14, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));
}
@Test
@@ -181,7 +181,7 @@ public void getAgeCountsForMultipleStudies() {
}
private void assertAgeCounts(List ageCounts) {
- assertEquals(14, ageCounts.size());
+ assertEquals(15, ageCounts.size());
assertEquals(3, findClinicaDataCount(ageCounts, "<18"));
assertEquals(1, findClinicaDataCount(ageCounts, "18"));
@@ -197,6 +197,7 @@ private void assertAgeCounts(List ageCounts) {
assertEquals(2, findClinicaDataCount(ageCounts, "82"));
assertEquals(1, findClinicaDataCount(ageCounts, "89"));
assertEquals(2, findClinicaDataCount(ageCounts, ">89"));
+ assertEquals(1, findClinicaDataCount(ageCounts, "UNKNOWN"));
}
@Test
diff --git a/src/test/resources/clickhouse_data.sql b/src/test/resources/clickhouse_data.sql
index 99fd2ccfb6c..a63d60456fb 100644
--- a/src/test/resources/clickhouse_data.sql
+++ b/src/test/resources/clickhouse_data.sql
@@ -133,6 +133,7 @@ insert into patient (internal_id,stable_id,cancer_study_id) values (320,'GENIE-T
insert into patient (internal_id,stable_id,cancer_study_id) values (321,'GENIE-TEST-321',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (322,'GENIE-TEST-322',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (323,'GENIE-TEST-323',3);
+insert into patient (internal_id,stable_id,cancer_study_id) values (324,'GENIE-TEST-324',3);
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values(10,'1,2,3,4,5,6,7,8,9,10,11,');
@@ -178,6 +179,7 @@ insert into sample (internal_id,stable_id,sample_type,patient_id) values (320,'G
insert into sample (internal_id,stable_id,sample_type,patient_id) values (321,'GENIE-TEST-321-01','primary solid tumor',321);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (322,'GENIE-TEST-322-01','primary solid tumor',322);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (323,'GENIE-TEST-323-01','primary solid tumor',323);
+insert into sample (internal_id,stable_id,sample_type,patient_id) values (324,'GENIE-TEST-324-01','primary solid tumor',324);
insert into mutation_event (mutation_event_id,entrez_gene_id,chr,start_position,end_position,reference_allele,tumor_seq_allele,protein_change,mutation_type,ncbi_build,strand,variant_type,db_snp_rs,db_snp_val_status,refseq_mrna_id,codon_change,uniprot_accession,protein_pos_start,protein_pos_end,canonical_transcript,keyword) values (2038,672,'17',41244748,41244748,'g','a','q934*','nonsense_mutation','37','+','snp','rs80357223','unknown','nm_007294','c.(2800-2802)cag>tag','p38398',934,934,1,'BRCA1 truncating');
@@ -457,6 +459,7 @@ insert into clinical_patient (internal_id,attr_id,attr_value) values (319,'age',
insert into clinical_patient (internal_id,attr_id,attr_value) values (320,'age','N/A');
insert into clinical_patient (internal_id,attr_id,attr_value) values (321,'age','');
insert into clinical_patient (internal_id,attr_id,attr_value) values (322,'age','NAN');
+insert into clinical_patient (internal_id,attr_id,attr_value) values (324,'age','UNKNOWN');
insert into clinical_sample (internal_id,attr_id,attr_value) values (1,'other_sample_id','5c631ce8-f96a-4c35-a459-556fc4ab21e1');
insert into clinical_sample (internal_id,attr_id,attr_value) values (1,'days_to_collection','276');