Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Aug 15, 2024
1 parent 3b2365e commit 74efb4a
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 130 deletions.
30 changes: 13 additions & 17 deletions share/search/index_strategy/trove_indexcard_flats.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,18 +344,14 @@ def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> Cardsear

def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchResponse:
_cursor = _SimpleCursor.from_page_param(valuesearch_params.page)
_is_date_search = all(
is_date_property(_path[-1])
for _path in valuesearch_params.valuesearch_propertypath_set
)
_is_date_search = is_date_property(valuesearch_params.valuesearch_propertypath[-1])
_search_kwargs = dict(
query=self._cardsearch_query(
valuesearch_params.cardsearch_filter_set,
valuesearch_params.cardsearch_textsegment_set,
additional_filters=[{'terms': {'iri_paths_present': [
iri_path_as_keyword(_path)
for _path in valuesearch_params.valuesearch_propertypath_set
]}}],
additional_filters=[{'term': {'iri_paths_present': iri_path_as_keyword(
valuesearch_params.valuesearch_propertypath,
)}}],
),
size=0, # ignore cardsearch hits; just want the aggs
aggs=(
Expand Down Expand Up @@ -454,10 +450,10 @@ def _cardsearch_aggs(self, cardsearch_params):

def _valuesearch_iri_aggs(self, valuesearch_params: ValuesearchParams, cursor: '_SimpleCursor'):
_nested_iri_bool = {
'filter': [{'terms': {'nested_iri.suffuniq_path_from_focus': [
iri_path_as_keyword(_path, suffuniq=True)
for _path in valuesearch_params.valuesearch_propertypath_set
]}}],
'filter': [{'term': {'nested_iri.suffuniq_path_from_focus': iri_path_as_keyword(
valuesearch_params.valuesearch_propertypath,
suffuniq=True,
)}}],
'must': [],
'must_not': [],
'should': [],
Expand Down Expand Up @@ -519,11 +515,11 @@ def _valuesearch_date_aggs(self, valuesearch_params: ValuesearchParams):
'nested': {'path': 'nested_date'},
'aggs': {
'value_at_propertypath': {
'filter': {'terms': {
'nested_date.suffuniq_path_from_focus': [
iri_path_as_keyword(_path, suffuniq=True)
for _path in valuesearch_params.valuesearch_propertypath_set
],
'filter': {'term': {
'nested_date.suffuniq_path_from_focus': iri_path_as_keyword(
valuesearch_params.valuesearch_propertypath,
suffuniq=True,
),
}},
'aggs': {
'count_by_year': {
Expand Down
127 changes: 18 additions & 109 deletions share/search/index_strategy/trovesearch_flattery.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
)
from trove.util.iris import get_sufficiently_unique_iri, is_worthwhile_iri
from trove.vocab.osfmap import is_date_property
from trove.vocab.namespaces import TROVE, FOAF, RDF, RDFS, DCTERMS, OWL, SKOS, OSFMAP
from trove.vocab.namespaces import TROVE, FOAF, RDFS, DCTERMS, OWL, SKOS, OSFMAP


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -99,8 +99,7 @@ def works_with_params(cls, params: CardsearchParams):
return (
not isinstance(params, ValuesearchParams)
or ( # constraints on valuesearch:
len(params.valuesearch_propertypath_set) == 1
and not params.valuesearch_textsegment_set
not params.valuesearch_textsegment_set
and all(
_filter.is_sameas_filter()
for _filter in params.valuesearch_filter_set
Expand Down Expand Up @@ -523,8 +522,8 @@ def _cardsearch_text_boolparts(self) -> Iterator[tuple[str, dict]]:
yield 'must', self._exact_text_query(_textsegment)
else:
yield 'must', self._fuzzy_text_must_query(_textsegment)
# if self.relevance_matters:
# yield 'should', self._fuzzy_text_should_query(_textsegment)
if self.relevance_matters:
yield 'should', self._fuzzy_text_should_query(_textsegment)

def _text_field_name(self, propertypath: Propertypath):
return (
Expand Down Expand Up @@ -569,10 +568,8 @@ class _ValuesearchQueryBuilder(_CardsearchQueryBuilder):
def build(self):
if self._is_date_valuesearch():
_aggs = self._valuesearch_date_aggs()
elif self._can_use_nonnested_aggs():
_aggs = self._valuesearch_nonnested_iri_aggs()
else:
_aggs = self._valuesearch_nested_iri_aggs()
_aggs = self._valuesearch_iri_aggs()
return dict(
query=self._cardsearch_query(),
size=0, # ignore cardsearch hits; just want the aggs
Expand All @@ -589,30 +586,10 @@ def relevance_matters(self) -> bool:
return False # valuesearch always ordered by count

def _is_date_valuesearch(self) -> bool:
return all(
is_date_property(_path[-1])
for _path in self.params.valuesearch_propertypath_set
)

def _can_use_nonnested_aggs(self):
raise RuntimeError('remove this')
return (
len(self.params.valuesearch_propertypath_set) == 1
and not self.params.valuesearch_textsegment_set
and all(
_filter.is_sameas_filter()
for _filter in self.params.valuesearch_filter_set
)
)

def _additional_cardsearch_filters(self) -> list[dict]:
return [{'terms': {'propertypaths_present': [
propertypath_as_keyword(_path)
for _path in self.params.valuesearch_propertypath_set
]}}]
return is_date_property(self.params.valuesearch_propertypath[-1])

def _valuesearch_nonnested_iri_aggs(self):
(_propertypath,) = self.params.valuesearch_propertypath_set
def _valuesearch_iri_aggs(self):
_propertypath = self.params.valuesearch_propertypath
_field = f'iri_by_propertypath.{propertypath_as_field_name(_propertypath)}'
_terms_agg: dict = {'field': _field}
_specific_iris = list(set(self.params.valuesearch_iris()))
Expand All @@ -621,87 +598,19 @@ def _valuesearch_nonnested_iri_aggs(self):
_terms_agg['size'] = len(_specific_iris)
return {'agg_valuesearch': {'terms': _terms_agg}}

def _valuesearch_nested_iri_aggs(self):
raise NotImplementedError('_valuesearch_nested_iri_aggs')
_nested_iri_bool = {
'filter': ...,
'must': [],
'must_not': [],
'should': [],
}
_nested_terms_agg = {
'field': 'nested_iri.iri_value',
# WARNING: terribly inefficient pagination (part one)
'size': cursor.start_index + cursor.page_size + 1,
}
_specific_iris = list(self.params.valuesearch_iris())
if _specific_iris:
_nested_iri_bool['filter'].append({'terms': {
'nested_iri.iri_value': _specific_iris,
}})
_nested_terms_agg['size'] = len(_specific_iris)
_nested_terms_agg['include'] = _specific_iris
_type_iris = list(self.params.valuesearch_type_iris())
if _type_iris:
_nested_iri_bool['filter'].append({'terms': {
'nested_iri.type_iri': _type_iris,
}})
_textq_builder = self._SimpleTextQueryBuilder('nested_iri.value_namelike_text')
for _textsegment in self.params.valuesearch_textsegment_set:
for _boolkey, _textqueries in _textq_builder.textsegment_boolparts(_textsegment).items():
_nested_iri_bool[_boolkey].extend(_textqueries)
return {
'in_nested_iri': {
'nested': {'path': 'nested_iri'},
'aggs': {
'value_at_propertypath': {
'filter': {'bool': _nested_iri_bool},
'aggs': {
'agg_iri_values': {
'terms': _nested_terms_agg,
'aggs': {
'type_iri': {'terms': {
'field': 'nested_iri.type_iri',
}},
'name_text': {'terms': {
'field': 'nested_iri.name_text.raw',
}},
'title_text': {'terms': {
'field': 'nested_iri.title_text.raw',
}},
'label_text': {'terms': {
'field': 'nested_iri.label_text.raw',
}},
},
},
},
},
},
},
}

def _valuesearch_date_aggs(self):
_propertypath = self.params.valuesearch_propertypath
_field = f'date_by_propertypath.{propertypath_as_field_name(_propertypath)}'
_aggs = {
'in_nested_date': {
'nested': {'path': 'nested_date'},
'agg_value_at_propertypath': {
'aggs': {
'value_at_propertypath': {
'filter': {'terms': {
'nested_date.suffuniq_path_from_focus': [
propertypath_as_keyword(_path)
for _path in self.params.valuesearch_propertypath_set
],
}},
'aggs': {
'count_by_year': {
'date_histogram': {
'field': 'nested_date.date_value',
'calendar_interval': 'year',
'format': 'yyyy',
'order': {'_key': 'desc'},
'min_doc_count': 1,
},
},
'count_by_year': {
'date_histogram': {
'field': _field,
'calendar_interval': 'year',
'format': 'yyyy',
'order': {'_key': 'desc'},
'min_doc_count': 1,
},
},
},
Expand Down
7 changes: 7 additions & 0 deletions share/search/index_strategy/trovesearch_nesterly.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,10 @@ def _gather_text_values(self, focus_iri, pathset) -> typing.Iterator[str]:
# override TrovesearchFlatteryIndexStrategy
class _ValuesearchQueryBuilder(FlatteryStrategy._ValuesearchQueryBuilder):
...

# override _CardsearchQueryBuilder
def _additional_cardsearch_filters(self) -> list[dict]:
# TODO: consider
return [{'term': {'propertypaths_present': flattery.propertypath_as_keyword(
self.params.valuesearch_propertypath
)}}]
6 changes: 3 additions & 3 deletions trove/trovesearch/search_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def to_querydict(self) -> QueryDict:
class ValuesearchParams(CardsearchParams):
# includes fields from CardsearchParams, because a
# valuesearch is always in context of a cardsearch
valuesearch_propertypath_set: frozenset[tuple[str, ...]]
valuesearch_propertypath: tuple[str, ...]
valuesearch_textsegment_set: frozenset[Textsegment]
valuesearch_filter_set: frozenset[SearchFilter]

Expand All @@ -485,14 +485,14 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict:
raise trove_exceptions.MissingRequiredQueryParam('valueSearchPropertyPath')
return {
**super().parse_queryparams(queryparams),
'valuesearch_propertypath_set': _parse_propertypath_set(_raw_propertypath, allow_globs=False),
'valuesearch_propertypath': _parse_propertypath(_raw_propertypath, allow_globs=False),
'valuesearch_textsegment_set': Textsegment.from_queryparam_family(queryparams, 'valueSearchText'),
'valuesearch_filter_set': SearchFilter.from_queryparam_family(queryparams, 'valueSearchFilter'),
}

def to_querydict(self):
_querydict = super().to_querydict()
_querydict['valueSearchPropertyPath'] = propertypath_set_key(self.valuesearch_propertypath_set)
_querydict['valueSearchPropertyPath'] = propertypath_set_key(self.valuesearch_propertypath)
for _qp_name, _qp_value in Textsegment.queryparams_from_textsegments('valueSearchText', self.valuesearch_textsegment_set):
_querydict[_qp_name] = _qp_value
for _filter in self.valuesearch_filter_set:
Expand Down
2 changes: 1 addition & 1 deletion trove/trovesearch/trovesearch_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@

@trovesearch_by_indexstrategy.gatherer(TROVE.propertyPath, focustype_iris={TROVE.Valuesearch})
def gather_valuesearch_propertypath(focus, *, search_params, **kwargs):
yield from _multi_propertypath_twoples(search_params.valuesearch_propertypath_set)
yield from _single_propertypath_twoples(search_params.valuesearch_propertypath)


@trovesearch_by_indexstrategy.gatherer(TROVE.valueSearchFilter)
Expand Down

0 comments on commit 74efb4a

Please sign in to comment.