Skip to content

Commit

Permalink
feat: Make max aggregation bucket size configurable (#44)
Browse files Browse the repository at this point in the history
* Make max aggregation bucket size configurable
* Address comments
* Reflect same changes in ES7 dao
  • Loading branch information
Kerem Sahin authored Nov 11, 2020
1 parent 39a5c92 commit 9485f5c
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public class ESSearchDAO<DOCUMENT extends RecordTemplate> extends BaseSearchDAO<
private BaseSearchConfig<DOCUMENT> _config;
private BaseESAutoCompleteQuery _autoCompleteQueryForLowCardFields;
private BaseESAutoCompleteQuery _autoCompleteQueryForHighCardFields;
private int _maxTermBucketSize = DEFAULT_TERM_BUCKETS_SIZE_100;

// TODO: Currently takes elastic search client, in future, can take other clients such as galene
// TODO: take params and settings needed to create the client
Expand Down Expand Up @@ -255,7 +256,7 @@ private void buildAggregations(@Nonnull SearchSourceBuilder searchSourceBuilder,
@Nullable Filter filter) {
Set<String> facetFields = _config.getFacetFields();
for (String facet : facetFields) {
AggregationBuilder aggBuilder = AggregationBuilders.terms(facet).field(facet).size(DEFAULT_TERM_BUCKETS_SIZE_100);
AggregationBuilder aggBuilder = AggregationBuilders.terms(facet).field(facet).size(_maxTermBucketSize);
Optional.ofNullable(filter).map(Filter::getCriteria).ifPresent(criteria -> {
for (Criterion criterion : criteria) {
if (!facetFields.contains(criterion.getField()) || criterion.getField().equals(facet)) {
Expand Down Expand Up @@ -447,4 +448,16 @@ private Urn getUrnFromSearchHit(@Nonnull SearchHit hit) {
throw new RuntimeException("Invalid urn in search document " + e);
}
}

/**
* Sets max term bucket size in the aggregation results.
*
* <p>The default value might not always be good enough when aggregation happens on a high cardinality field.
* Using a high default instead is also not ideal because of potential query performance degradation.
* Instead, entities which have a rare use case of aggregating over high cardinality fields can use this method
* to configure the aggregation behavior.
*/
public void setMaxTermBucketSize(int maxTermBucketSize) {
_maxTermBucketSize = maxTermBucketSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

Expand Down Expand Up @@ -214,6 +215,25 @@ public void testPreferenceInSearchQuery() {
assertEquals(searchRequest.preference(), preference);
}

@Test
public void testDefaultMaxTermBucketSize() {
String facetFieldName = "value";
Filter filter = QueryUtils.newFilter(Collections.singletonMap(facetFieldName, "dummy"));
SearchRequest searchRequest = _searchDAO.constructSearchQuery("dummy", filter, null, null, 0, 10);
assertEquals(searchRequest.source().aggregations().getAggregatorFactories().iterator().next(),
AggregationBuilders.terms(facetFieldName).field(facetFieldName).size(100));
}

@Test
public void testSetMaxTermBucketSize() {
String facetFieldName = "value";
Filter filter = QueryUtils.newFilter(Collections.singletonMap(facetFieldName, "dummy"));
_searchDAO.setMaxTermBucketSize(5);
SearchRequest searchRequest = _searchDAO.constructSearchQuery("dummy", filter, null, null, 0, 10);
assertEquals(searchRequest.source().aggregations().getAggregatorFactories().iterator().next(),
AggregationBuilders.terms(facetFieldName).field(facetFieldName).size(5));
}

private static SearchHit makeSearchHit(int id) {
SearchHit hit = mock(SearchHit.class);
Map<String, Object> sourceMap = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

import com.linkedin.testing.EntityDocument;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import javax.annotation.Nonnull;

public class TestSearchConfig extends BaseSearchConfig<EntityDocument> {
@Override
@Nonnull
public Set<String> getFacetFields() {
return Collections.unmodifiableSet(new HashSet<>());
return Collections.singleton("value");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public class ESSearchDAO<DOCUMENT extends RecordTemplate> extends BaseSearchDAO<
private BaseSearchConfig<DOCUMENT> _config;
private BaseESAutoCompleteQuery _autoCompleteQueryForLowCardFields;
private BaseESAutoCompleteQuery _autoCompleteQueryForHighCardFields;
private int _maxTermBucketSize = DEFAULT_TERM_BUCKETS_SIZE_100;

// TODO: Currently takes elastic search client, in future, can take other clients such as galene
// TODO: take params and settings needed to create the client
Expand Down Expand Up @@ -254,7 +255,7 @@ private void buildAggregations(@Nonnull SearchSourceBuilder searchSourceBuilder,
@Nullable Filter filter) {
Set<String> facetFields = _config.getFacetFields();
for (String facet : facetFields) {
AggregationBuilder aggBuilder = AggregationBuilders.terms(facet).field(facet).size(DEFAULT_TERM_BUCKETS_SIZE_100);
AggregationBuilder aggBuilder = AggregationBuilders.terms(facet).field(facet).size(_maxTermBucketSize);
Optional.ofNullable(filter).map(Filter::getCriteria).ifPresent(criteria -> {
for (Criterion criterion : criteria) {
if (!facetFields.contains(criterion.getField()) || criterion.getField().equals(facet)) {
Expand Down Expand Up @@ -446,4 +447,16 @@ private Urn getUrnFromSearchHit(@Nonnull SearchHit hit) {
throw new RuntimeException("Invalid urn in search document " + e);
}
}

/**
* Sets max term bucket size in the aggregation results.
*
* <p>The default value might not always be good enough when aggregation happens on a high cardinality field.
* Using a high default instead is also not ideal because of potential query performance degradation.
* Instead, entities which have a rare use case of aggregating over high cardinality fields can use this method
* to configure the aggregation behavior.
*/
public void setMaxTermBucketSize(int maxTermBucketSize) {
_maxTermBucketSize = maxTermBucketSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

Expand Down Expand Up @@ -212,6 +213,25 @@ public void testPreferenceInSearchQuery() {
assertEquals(searchRequest.preference(), preference);
}

@Test
public void testDefaultMaxTermBucketSize() {
String facetFieldName = "value";
Filter filter = QueryUtils.newFilter(Collections.singletonMap(facetFieldName, "dummy"));
SearchRequest searchRequest = _searchDAO.constructSearchQuery("dummy", filter, null, null, 0, 10);
assertEquals(searchRequest.source().aggregations().getAggregatorFactories().get(0),
AggregationBuilders.terms(facetFieldName).field(facetFieldName).size(100));
}

@Test
public void testSetMaxTermBucketSize() {
String facetFieldName = "value";
Filter filter = QueryUtils.newFilter(Collections.singletonMap(facetFieldName, "dummy"));
_searchDAO.setMaxTermBucketSize(5);
SearchRequest searchRequest = _searchDAO.constructSearchQuery("dummy", filter, null, null, 0, 10);
assertEquals(searchRequest.source().aggregations().getAggregatorFactories().get(0),
AggregationBuilders.terms(facetFieldName).field(facetFieldName).size(5));
}

private static SearchHit makeSearchHit(int id) {
SearchHit hit = mock(SearchHit.class);
Map<String, Object> sourceMap = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

import com.linkedin.testing.EntityDocument;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import javax.annotation.Nonnull;

public class TestSearchConfig extends BaseSearchConfig<EntityDocument> {
@Override
@Nonnull
public Set<String> getFacetFields() {
return Collections.unmodifiableSet(new HashSet<>());
return Collections.singleton("value");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@ record EntityDocument {
* For unit tests
*/
urn: Urn

/**
* For unit tests
*/
value: optional string
}

0 comments on commit 9485f5c

Please sign in to comment.