Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Aug 16, 2024
1 parent d05e623 commit 1a1e9b7
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 130 deletions.
94 changes: 86 additions & 8 deletions share/search/index_strategy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,96 @@
from __future__ import annotations
import functools
from types import MappingProxyType

from django.conf import settings

from share.search.exceptions import IndexStrategyError
from share.models import FeatureFlag
from trove.trovesearch import search_params
from .sharev2_elastic5 import Sharev2Elastic5IndexStrategy
from .sharev2_elastic8 import Sharev2Elastic8IndexStrategy
from .trove_indexcard_flats import TroveIndexcardFlatsIndexStrategy
from .trovesearch_flattery import TrovesearchFlatteryIndexStrategy
from .trovesearch_nesterly import TrovesearchNesterlyIndexStrategy
from ._base import IndexStrategy
from ._strategy_selection import (
all_index_strategies,
get_index_strategy,
get_specific_index,
get_index_for_sharev2_search,
get_index_for_trovesearch,
)


__all__ = (
'IndexStrategy',
'all_index_strategies',
'get_index_strategy',
'get_index_for_sharev2_search',
'get_index_for_trovesearch',
'get_index_strategy',
'get_specific_index',
)


@functools.cache
def all_index_strategies() -> MappingProxyType[str, IndexStrategy]:
return MappingProxyType({
_strategy.name: _strategy
for _strategy in _iter_all_index_strategies()
})


def _iter_all_index_strategies():
if settings.ELASTICSEARCH5_URL:
yield Sharev2Elastic5IndexStrategy(name='sharev2_elastic5')
if settings.ELASTICSEARCH8_URL:
yield Sharev2Elastic8IndexStrategy(name='sharev2_elastic8')
yield TroveIndexcardFlatsIndexStrategy(name='trove_indexcard_flats')
yield TrovesearchFlatteryIndexStrategy(name='trovesearch_flattery')
yield TrovesearchNesterlyIndexStrategy(name='trovesearch_nesterly')


def get_index_strategy(strategyname: str) -> IndexStrategy:
try:
return all_index_strategies()[strategyname]
except KeyError:
raise IndexStrategyError(f'unknown index strategy "{strategyname}"')


def get_specific_index(indexname_or_strategyname: str, *, for_search=False) -> IndexStrategy.SpecificIndex:
try:
_strategy = get_index_strategy(indexname_or_strategyname)
return (
_strategy.pls_get_default_for_searching()
if for_search
else _strategy.for_current_index()
)
except IndexStrategyError:
for _index_strategy in all_index_strategies().values():
try:
return _index_strategy.for_specific_index(indexname_or_strategyname)
except IndexStrategyError:
pass
raise IndexStrategyError(f'unrecognized name "{indexname_or_strategyname}"')


def get_index_for_sharev2_search(requested_name=None) -> IndexStrategy.SpecificIndex:
if requested_name:
_name = requested_name
elif (
settings.ELASTICSEARCH5_URL
and not FeatureFlag.objects.flag_is_up(FeatureFlag.ELASTIC_EIGHT_DEFAULT)
):
_name = 'sharev2_elastic5'
elif settings.ELASTICSEARCH8_URL:
_name = 'sharev2_elastic8'
else:
raise IndexStrategyError('no available index for sharev2 search')
return get_specific_index(_name, for_search=True)


def get_index_for_trovesearch(params: search_params.CardsearchParams) -> IndexStrategy.SpecificIndex:
if params.index_strategy_name: # specific strategy requested
_name = params.index_strategy_name
elif not FeatureFlag.objects.flag_is_up(FeatureFlag.USE_FLATTERY_STRATEGY):
_name = 'trove_indexcard_flats'
else:
_name = (
'trovesearch_flattery'
if TrovesearchFlatteryIndexStrategy.works_with_params(params)
else 'trovesearch_nesterly'
)
return get_specific_index(_name, for_search=True)
98 changes: 0 additions & 98 deletions share/search/index_strategy/_strategy_selection.py

This file was deleted.

36 changes: 16 additions & 20 deletions share/search/index_strategy/trovesearch_flattery.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,23 +595,19 @@ def _valuesearch_iri_aggs(self):
if _specific_iris:
_terms_agg['include'] = _specific_iris
_terms_agg['size'] = len(_specific_iris)
return {'agg_valuesearch': {'terms': _terms_agg}}
return {'agg_valuesearch_iris': {'terms': _terms_agg}}

def _valuesearch_date_aggs(self):
_propertypath = self.params.valuesearch_propertypath
_field = f'date_by_propertypath.{propertypath_as_field_name(_propertypath)}'
_aggs = {
'agg_value_at_propertypath': {
'aggs': {
'agg_count_by_year': {
'date_histogram': {
'field': _field,
'calendar_interval': 'year',
'format': 'yyyy',
'order': {'_key': 'desc'},
'min_doc_count': 1,
},
},
'agg_valuesearch_dates': {
'date_histogram': {
'field': _field,
'calendar_interval': 'year',
'format': 'yyyy',
'order': {'_key': 'desc'},
'min_doc_count': 1,
},
},
}
Expand All @@ -626,9 +622,9 @@ def _valuesearch_response(
es8_response: dict,
cursor: '_SimpleCursor',
) -> ValuesearchResponse:
_iri_aggs = es8_response['aggregations'].get('in_nested_iri')
_iri_aggs = es8_response['aggregations'].get('agg_valuesearch_iris')
if _iri_aggs:
_buckets = _iri_aggs['agg_value_at_propertypath']['agg_iri_values']['buckets']
_buckets = _iri_aggs['buckets']
_bucket_count = len(_buckets)
# WARNING: terribly inefficient pagination (part two)
_page_end_index = cursor.start_index + cursor.page_size
Expand All @@ -650,8 +646,7 @@ def _valuesearch_response(
else: # assume date
_year_buckets = (
es8_response['aggregations']
['agg_value_at_propertypath']
['agg_count_by_year']
['agg_valuesearch_dates']
['buckets']
)
return ValuesearchResponse(
Expand All @@ -664,10 +659,11 @@ def _valuesearch_response(
def _valuesearch_iri_result(self, iri_bucket) -> ValuesearchResult:
return ValuesearchResult(
value_iri=iri_bucket['key'],
value_type=_bucketlist(iri_bucket['type_iri']),
name_text=_bucketlist(iri_bucket['name_text']),
title_text=_bucketlist(iri_bucket['title_text']),
label_text=_bucketlist(iri_bucket['label_text']),
# TODO: get type and text somehow
value_type=_bucketlist(iri_bucket.get('type_iri', [])),
name_text=_bucketlist(iri_bucket.get('name_text', [])),
title_text=_bucketlist(iri_bucket.get('title_text', [])),
label_text=_bucketlist(iri_bucket.get('label_text', [])),
match_count=iri_bucket['doc_count'],
)

Expand Down
2 changes: 1 addition & 1 deletion tests/share/bin/test_sharectl.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_purge(self, indexnames):
def _get_specific_index(indexname):
return mock_specific_indexes[indexname]

with mock.patch('share.bin.search.IndexStrategy.get_specific_index', wraps=_get_specific_index) as mock_get_specific:
with mock.patch('share.bin.search.index_strategy.get_specific_index', wraps=_get_specific_index) as mock_get_specific:
run_sharectl('search', 'purge', *indexnames)
assert mock_get_specific.mock_calls == [
mock.call(indexname)
Expand Down
9 changes: 6 additions & 3 deletions tests/share/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
@contextlib.contextmanager
def patch_index_strategies(strategies: dict[str, index_strategy.IndexStrategy]):
index_strategy.all_index_strategies.cache_clear()
with mock.patch(
'share.bin.search.index_strategy._iter_all_index_strategies',
return_value=strategies.items(),
with mock.patch.object(
index_strategy,
'all_index_strategies',
return_value=strategies,
):
breakpoint()
yield
breakpoint()
index_strategy.all_index_strategies.cache_clear()

0 comments on commit 1a1e9b7

Please sign in to comment.