Skip to content

Commit

Permalink
support filtering across multiple propertypaths
Browse files Browse the repository at this point in the history
breaking change behind a feature flag (for paths in query params)
- use `.` (period) to delimit path-steps, e.g. `creator.affiliation`
- use `,` (comma) to delimit multiple paths, e.g.
  `creator.affiliation,contributor.affiliation`
  • Loading branch information
aaxelb committed Sep 11, 2023
1 parent b97f248 commit ffa8b32
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 56 deletions.
1 change: 1 addition & 0 deletions share/models/feature_flag.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class FeatureFlag(models.Model):
# flag name constants:
ELASTIC_EIGHT_DEFAULT = 'elastic_eight_default'
IGNORE_SHAREV2_INGEST = 'ignore_sharev2_ingest'
PERIODIC_PROPERTYPATH = 'periodic_propertypath'

# name _should_ be one of the constants above, but that is not enforced by `choices`
name = models.TextField(unique=True)
Expand Down
49 changes: 34 additions & 15 deletions share/search/index_strategy/trove_indexcard_flats.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,25 +640,40 @@ def _valuesearch_date_result(self, date_bucket):
)

def _cardsearch_presence_query(self, search_filter) -> dict:
_path_keyword = iri_path_as_keyword(search_filter.property_path, suffuniq=True)
return {'term': {'iri_paths_present_suffuniq': _path_keyword}}
_filters = [
self._cardsearch_path_presence_query(_path)
for _path in search_filter.propertypath_set
]
if len(_filters) == 1:
return _filters[0]
return {'bool': {'must': _filters}}

def _cardsearch_iri_filter(self, search_filter) -> dict:
_field = '.'.join((
'flat_iri_values_suffuniq',
_iri_path_as_flattened_key(search_filter.property_path),
))
return {'terms': {
_field: [
get_sufficiently_unique_iri(_iri)
for _iri in search_filter.value_set
],
def _cardsearch_path_presence_query(self, path: tuple[str, ...]):
return {'term': {
'iri_paths_present_suffuniq': iri_path_as_keyword(path, suffuniq=True),
}}

def _cardsearch_iri_filter(self, search_filter) -> dict:
_filters = [
self._cardsearch_path_iri_query(_path, search_filter.value_set)
for _path in search_filter.propertypath_set
]
if len(_filters) == 1:
return _filters[0]
return {'bool': {'should': _filters}} # at least one match

def _cardsearch_path_iri_query(self, path, value_set):
return {'terms': {_iri_path_as_flattened_field(path): [
get_sufficiently_unique_iri(_iri)
for _iri in value_set
]}}

def _cardsearch_date_filter(self, search_filter) -> dict:
_propertypath_keyword = iri_path_as_keyword(search_filter.property_path, suffuniq=True)
_filter_list = [
{'term': {'nested_date.suffuniq_path_from_focus': _propertypath_keyword}},
{'terms': {'nested_date.suffuniq_path_from_focus': [
iri_path_as_keyword(_path, suffuniq=True)
for _path in search_filter.propertypath_set
]}},
]
if search_filter.operator == SearchFilter.FilterOperator.BEFORE:
_value = min(search_filter.value_set) # rely on string-comparable isoformat
Expand Down Expand Up @@ -738,7 +753,7 @@ def _cardsearch_response(
and cursor.is_first_page()
and not cursor.first_page_uuids
and any(
_filter.property_path != (RDF.type,) # look for a non-default filter
not _filter.is_type_filter() # look for a non-default filter
for _filter in cardsearch_params.cardsearch_filter_set
)
)
Expand Down Expand Up @@ -923,6 +938,10 @@ def _iri_path_as_flattened_key(path: tuple[str, ...]) -> str:
return base64.b16encode(json.dumps(path).encode()).decode()


def _iri_path_as_flattened_field(path: tuple[str, ...]) -> str:
return f'flat_iri_values_suffuniq.{_iri_path_as_flattened_key(path)}'


@dataclasses.dataclass
class _SimpleCursor:
start_index: int
Expand Down
82 changes: 58 additions & 24 deletions share/search/search_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from django.http import QueryDict

from share.models import FeatureFlag
from share.search import exceptions
from trove.util.queryparams import (
QueryparamDict,
Expand Down Expand Up @@ -36,6 +37,7 @@

DEFAULT_PAGE_SIZE = 13
MAX_PAGE_SIZE = 101
PROPERTYPATH_DELIMITER = '.'


###
Expand Down Expand Up @@ -167,7 +169,7 @@ def is_iri_operator(self):
def is_valueless_operator(self):
return self in (self.IS_PRESENT, self.IS_ABSENT)

property_path: tuple[str]
propertypath_set: frozenset[tuple[str, ...]]
value_set: frozenset[str]
operator: FilterOperator
original_param_name: typing.Optional[str] = None
Expand All @@ -184,11 +186,11 @@ def for_queryparam_family(cls, queryparams: QueryparamDict, queryparam_family: s
@classmethod
def from_filter_param(cls, param_name: QueryparamName, param_value: str):
_operator = None
try: # "filter[<serialized_path>][<operator>]"
(_serialized_path, _operator_value) = param_name.bracketed_names
try: # "filter[<serialized_path_set>][<operator>]"
(_serialized_path_set, _operator_value) = param_name.bracketed_names
except ValueError:
try: # "filter[<serialized_path>]" (with default operator)
(_serialized_path,) = param_name.bracketed_names
try: # "filter[<serialized_path_set>]" (with default operator)
(_serialized_path_set,) = param_name.bracketed_names
except ValueError:
raise exceptions.InvalidSearchParam(
f'expected one or two bracketed queryparam-name segments'
Expand All @@ -200,23 +202,37 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str):
_operator = SearchFilter.FilterOperator.from_shortname(_operator_value)
except ValueError:
raise ValueError(f'unrecognized search-filter operator "{_operator_value}"')
_propertypath = tuple(
osfmap_labeler.iri_for_label(_pathstep, default=_pathstep)
for _pathstep in split_queryparam_value(_serialized_path)
if FeatureFlag.objects.flag_is_up(FeatureFlag.PERIODIC_PROPERTYPATH):
# comma-delimited set of dot-delimited paths
_propertypath_set = frozenset(
tuple(
osfmap_labeler.iri_for_label(_pathstep, default=_pathstep)
for _pathstep in _path.split(PROPERTYPATH_DELIMITER)
)
for _path in split_queryparam_value(_serialized_path_set)
)
else: # single comma-delimited path
_propertypath = tuple(
osfmap_labeler.iri_for_label(_pathstep, default=_pathstep)
for _pathstep in split_queryparam_value(_serialized_path_set)
)
_propertypath_set = frozenset([_propertypath])
_is_date_filter = all(
is_date_property(_path[-1])
for _path in _propertypath_set
)
_is_date_property = is_date_property(_propertypath[-1])
if _operator is None: # default operator
_operator = (
SearchFilter.FilterOperator.AT_DATE
if _is_date_property
if _is_date_filter
else SearchFilter.FilterOperator.ANY_OF
)
if _operator.is_date_operator() and not _is_date_property:
if _operator.is_date_operator() and not _is_date_filter:
raise ValueError(f'cannot use date operator {_operator.value} on non-date property')
_value_list = []
if not _operator.is_valueless_operator():
for _value in split_queryparam_value(param_value):
if _is_date_property:
if _is_date_filter:
_value_list.append(_value) # TODO: vali-date
else:
try:
Expand All @@ -226,13 +242,29 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str):
else:
_value_list.append(_iri)
return cls(
property_path=_propertypath,
propertypath_set=_propertypath_set,
value_set=frozenset(_value_list),
operator=_operator,
original_param_name=str(param_name),
original_param_value=param_value,
)

def is_sameas_filter(self) -> bool:
'''detect the special filter for matching exact identifier iris
'''
return (
self.propertypath_set == {(OWL.sameAs,)}
and self.operator == SearchFilter.FilterOperator.ANY_OF
)

def is_type_filter(self) -> bool:
'''detect the special filter for matching resource type
'''
return (
self.propertypath_set == {(RDF.type,)}
and self.operator == SearchFilter.FilterOperator.ANY_OF
)


@dataclasses.dataclass(frozen=True)
class SortParam:
Expand Down Expand Up @@ -355,19 +387,24 @@ class ValuesearchParams(CardsearchParams):
@staticmethod
def from_queryparams(queryparams: QueryparamDict) -> 'ValuesearchParams':
_valuesearch_text = _get_text_queryparam(queryparams, 'valueSearchText')
_raw_property_path = _get_single_value(queryparams, QueryparamName('valueSearchPropertyPath'))
if not _raw_property_path:
_raw_propertypath = _get_single_value(queryparams, QueryparamName('valueSearchPropertyPath'))
if not _raw_propertypath:
raise ValueError('TODO: 400 valueSearchPropertyPath required')
_split_propertypath = (
_raw_propertypath.split(PROPERTYPATH_DELIMITER)
if FeatureFlag.objects.flag_is_up(FeatureFlag.PERIODIC_PROPERTYPATH)
else split_queryparam_value(_raw_propertypath)
)
return ValuesearchParams(
**CardsearchParams.parse_cardsearch_queryparams(queryparams),
valuesearch_property_path=tuple(
osfmap_labeler.iri_for_label(_pathstep, default=_pathstep)
for _pathstep in split_queryparam_value(_raw_property_path)
for _pathstep in _split_propertypath
),
valuesearch_text=_valuesearch_text,
valuesearch_textsegment_set=Textsegment.split_str(_valuesearch_text),
valuesearch_filter_set=SearchFilter.for_queryparam_family(queryparams, 'valueSearchFilter'),
original_valuesearch_property_path=_raw_property_path,
original_valuesearch_property_path=_raw_propertypath,
)

def to_querydict(self):
Expand All @@ -381,12 +418,12 @@ def to_querydict(self):

def valuesearch_iris(self):
for _filter in self.valuesearch_filter_set:
if _filter.property_path == (OWL.sameAs,) and _filter.operator == SearchFilter.FilterOperator.ANY_OF:
if _filter.is_sameas_filter():
yield from _filter.value_set

def valuesearch_type_iris(self):
for _filter in self.valuesearch_filter_set:
if _filter.property_path == (RDF.type,) and _filter.operator == SearchFilter.FilterOperator.ANY_OF:
if _filter.is_type_filter():
yield from _filter.value_set


Expand Down Expand Up @@ -428,11 +465,8 @@ def _get_related_property_paths(filter_set) -> tuple[tuple[str]]:
# TODO: replace with some dynamism, maybe a 'significant_terms' aggregation
_type_iris = set()
for _filter in filter_set:
if _filter.property_path == (RDF.type,):
if _filter.operator == SearchFilter.FilterOperator.ANY_OF:
_type_iris.update(_filter.value_set)
if _filter.operator == SearchFilter.FilterOperator.NONE_OF:
_type_iris.difference_update(_filter.value_set)
if _filter.is_type_filter():
_type_iris.update(_filter.value_set)
return suggested_property_paths(_type_iris)


Expand Down
12 changes: 12 additions & 0 deletions tests/_testutil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from unittest import mock


def patch_feature_flag(*flag_names, up=True):
from share.models.feature_flag import FeatureFlag
_old_isup = FeatureFlag.objects.flag_is_up

def _patched_isup(flag_name):
if flag_name in flag_names:
return up
return _old_isup(flag_name)
return mock.patch.object(FeatureFlag.objects, 'flag_is_up', new=_patched_isup)
Loading

0 comments on commit ffa8b32

Please sign in to comment.