From 6cba85052c808a058af7773ac0a853170b9ccbeb Mon Sep 17 00:00:00 2001 From: Till Prochaska <1512805+tillprochaska@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:04:26 +0200 Subject: [PATCH] Implement search using Xapian --- .env.template | 1 - .github/workflows/backend.yml | 10 +- backend/Dockerfile | 9 +- backend/howtheyvote/api/query.py | 183 +++++++++++---- backend/howtheyvote/cli/system.py | 12 +- backend/howtheyvote/config.py | 5 +- backend/howtheyvote/data/stopwords.txt | 312 +++++++++++++++++++++++++ backend/howtheyvote/meili.py | 88 ------- backend/howtheyvote/search.py | 88 +++++++ backend/howtheyvote/store/index.py | 113 +++++---- backend/poetry.lock | 181 +------------- backend/pyproject.toml | 1 - backend/tests/api/test_query.py | 2 +- backend/tests/api/test_votes_api.py | 4 +- backend/tests/conftest.py | 9 +- docker-compose.override.yml | 6 - docker-compose.yml | 19 +- storage/index/.gitignore | 2 + 18 files changed, 635 insertions(+), 410 deletions(-) create mode 100644 backend/howtheyvote/data/stopwords.txt delete mode 100644 backend/howtheyvote/meili.py create mode 100644 backend/howtheyvote/search.py create mode 100644 storage/index/.gitignore diff --git a/.env.template b/.env.template index dc7184336..52715afc2 100644 --- a/.env.template +++ b/.env.template @@ -1,4 +1,3 @@ HTV_BACKEND_PUBLIC_URL=https://localhost/api HTV_FRONTEND_PUBLIC_URL=https://localhost CADDY_SITE_ADDRESS=localhost -MEILI_MASTER_KEY= diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 1db84a4c8..3e9a31e54 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -13,13 +13,6 @@ jobs: run: working-directory: ./backend - services: - meilisearch: - image: "getmeili/meilisearch:v1.3.1" - ports: ["7700:7700"] - env: - MEILI_MASTER_KEY: "1234567890" - steps: - name: Checkout repo uses: actions/checkout@v4 @@ -51,5 +44,4 @@ jobs: env: HTV_BACKEND_DATABASE_URI: "sqlite:///${{ github.workspace }}/storage/database/database.sqlite3" HTV_BACKEND_USERS_DATABASE_URI: "sqlite:///${{ github.workspace }}/storage/database/users.sqlite3" - MEILI_MASTER_KEY: "1234567890" - MEILI_URL: "http://localhost:7700" + HTV_SEARCH_INDEX_DIR: "${{ github.workspace }}/storage/index" diff --git a/backend/Dockerfile b/backend/Dockerfile index 74b49ad70..900b4a624 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12-alpine3.19 +FROM python:3.12-alpine3.20 RUN apk --update add \ build-base \ @@ -13,7 +13,9 @@ RUN apk --update add \ make \ cargo \ sqlite \ - tmux + tmux \ + xapian-core \ + xapian-bindings-python3 RUN pip install poetry @@ -26,6 +28,9 @@ COPY poetry.lock poetry.lock RUN poetry env use python3.12 RUN poetry install +# Make Python packages installed via apk (e.g. xapian) available in venv +RUN echo "/usr/lib/python3.12/site-packages" > .venv/lib/python3.12/site-packages/system.pth + COPY . . # Install again in order to make the `htv` CLI script available diff --git a/backend/howtheyvote/api/query.py b/backend/howtheyvote/api/query.py index ed6387b2e..386d8312d 100644 --- a/backend/howtheyvote/api/query.py +++ b/backend/howtheyvote/api/query.py @@ -1,14 +1,34 @@ import copy +import datetime import enum from abc import ABC, abstractmethod from typing import Any, Generic, Self, TypedDict, TypeVar from sqlalchemy import desc, func, select from sqlalchemy.sql import ColumnElement +from xapian import ( + BM25Weight, + Database, + Enquire, + QueryParser, + ValuePostingSource, + ValueWeightPostingSource, + Weight, + sortable_unserialise, +) +from xapian import ( + Query as XapianQuery, +) from ..db import Session -from ..meili import get_index from ..models import BaseWithId +from ..search import ( + FIELD_TO_SLOT_MAPPING, + SLOT_IS_FEATURED, + SLOT_TIMESTAMP, + get_index, + get_stopper, +) T = TypeVar("T", bound=BaseWithId) @@ -168,64 +188,63 @@ def where(self, expression: ColumnElement[Any]) -> Self: return query -class MeilisearchSearchParams(TypedDict): - limit: int - offset: int - attributesToRetrieve: list[str] - filter: list[str] - sort: list[str] +class ValueDecayWeightPostingSource(ValuePostingSource): + # https://getting-started-with-xapian.readthedocs.io/en/latest/advanced/postingsource.html + + def set_max_diff(self, max_diff: float | int) -> None: + self.max_diff = max_diff + + def set_origin(self, origin: float | int) -> None: + self.origin = origin + + def get_weight(self) -> int: + value = sortable_unserialise(self.get_value()) + diff = self.origin - value + weight = 1 - min(1, diff / self.max_diff) + + return weight class SearchQuery(Query[T]): + BOOST_FEATURED = 0.075 + BOOST_PHRASE = 0.1 + BOOST_AGE = 0.25 + AGE_DECAY_DAYS = 365 + def __init__(self, model: type[T]): super().__init__(model) self._query: str | None = None def handle(self) -> QueryResponse[T]: - index = get_index(self.model) page = self.get_page() page_size = self.get_page_size() limit = self.get_limit() offset = self.get_offset() - params: MeilisearchSearchParams = { - # In order to determine if there is a next page, we fetch one additional - # result from the search index. - "limit": limit + 1, - "offset": offset, - # Retrieve only IDs from search index as everything else is fetched - # from the database - "attributesToRetrieve": ["id"], - "sort": [], - "filter": [], - } + with get_index(self.model) as index: + query = self._xapian_query(index) + enquire = Enquire(index) + enquire.set_query(query) + enquire.set_weighting_scheme(self._xapian_weight()) - sort = self.get_sort() - q = self.get_query() - - if sort or not q: - # Apply default sorting only if none is specified explicitly and - # no search query is given - if not sort: - sort_field = self.DEFAULT_SORT_FIELD - sort_order = self.DEFAULT_SORT_ORDER + if self.get_sort(): + field, order = self.get_sort() + slot = FIELD_TO_SLOT_MAPPING.get(field) + reverse = order == Order.DESC else: - sort_field, sort_order = sort - - params["sort"] = [f"{sort_field}:{sort_order.value}"] - - for field, value in self.get_filters().items(): - if isinstance(value, bool): - # Meilisearch represents booleans as integers - value = int(value) + slot = None - params["filter"].append(f"{field} = {value}") + if slot is not None: + enquire.set_sort_by_value(slot, reverse) + else: + enquire.set_sort_by_relevance_then_value(SLOT_TIMESTAMP, False) - res = index.search(q, params) + # Fetch one extra result to check if there is a next page + mset = enquire.get_mset(offset, limit + 1) # Based on the IDs fetched from the search index, fetch full records # from the database - ids = [int(hit["id"]) for hit in res["hits"]] + ids = [int(match.docid) for match in mset] # Remove the extra item fetched only to test if there is a next page ids = ids[:limit] @@ -237,11 +256,11 @@ def handle(self) -> QueryResponse[T]: results = sorted(results, key=lambda r: ids.index(int(r.id))) response: QueryResponse[T] = { - "total": res["estimatedTotalHits"], + "total": mset.get_matches_estimated(), "page": page, "page_size": page_size, "has_prev": page > 1, - "has_next": len(res["hits"]) > limit, + "has_next": mset.size() > limit, "results": results, } @@ -254,3 +273,85 @@ def query(self, query: str | None = None) -> Self: def get_query(self) -> str: return self._query or "" + + def _xapian_query_parser(self, index: Database) -> QueryParser: + parser = QueryParser() + parser.set_stopper(get_stopper()) + parser.set_database(index) + + return parser + + def _xapian_query(self, index: Database) -> XapianQuery: + parser = self._xapian_query_parser(index) + query = parser.parse_query(self.get_query()) + + if query.empty(): + query = XapianQuery.MatchAll + else: + query = XapianQuery( + XapianQuery.OP_AND_MAYBE, + query, + self._xapian_featured_subquery(), + ) + + query = XapianQuery( + XapianQuery.OP_AND_MAYBE, + query, + self._xapian_age_subquery(), + ) + + query = XapianQuery( + XapianQuery.OP_AND_MAYBE, + query, + self._xapian_phrase_subquery(index), + ) + + return query + + def _xapian_phrase_subquery(self, index: Database) -> XapianQuery: + # This is a phrase subquery, i.e. it matches documents that contain the terms of the + # search query in the original order. It's used to boost phrase matches even if + # a user hasn't explicitly specified a phrase query. + parser = self._xapian_query_parser(index) + parser.set_default_op(XapianQuery.OP_PHRASE) + query = parser.parse_query(self.get_query()) + + return XapianQuery( + XapianQuery.OP_SCALE_WEIGHT, + query, + self.BOOST_PHRASE, + ) + + def _xapian_featured_subquery(self) -> XapianQuery: + # This subquery matches documents that are featured. + return XapianQuery( + XapianQuery.OP_SCALE_WEIGHT, + XapianQuery(ValueWeightPostingSource(SLOT_IS_FEATURED)), + self.BOOST_FEATURED, + ) + + def _xapian_age_subquery(self) -> XapianQuery: + # This subquery assigns a decreasing weight based on age, i.e. documents + # that are newer get a higher weight. + now = datetime.datetime.now().timestamp() + max_diff = datetime.timedelta(days=self.AGE_DECAY_DAYS).total_seconds() + + age_source = ValueDecayWeightPostingSource(SLOT_TIMESTAMP) + age_source.set_max_diff(max_diff) + age_source.set_origin(now) + + return XapianQuery( + XapianQuery.OP_SCALE_WEIGHT, + XapianQuery(age_source), + self.BOOST_AGE, + ) + + def _xapian_weight(self) -> Weight: + # https://xapian.org/docs/apidoc/html/classXapian_1_1BM25Weight.html + k1 = 0 + k2 = 0 + k3 = 1 + b = 0 + min_normlen = 0.5 + + return BM25Weight(k1, k2, k3, b, min_normlen) diff --git a/backend/howtheyvote/cli/system.py b/backend/howtheyvote/cli/system.py index 0e192abc8..1edc60644 100644 --- a/backend/howtheyvote/cli/system.py +++ b/backend/howtheyvote/cli/system.py @@ -1,8 +1,7 @@ import click from ..db import migrate as _migrate -from ..meili import configure_indexes as _configure_indexes -from ..meili import delete_indexes as _delete_indexes +from ..search import delete_indexes as _delete_indexes @click.group() @@ -11,15 +10,9 @@ def system() -> None: pass -@system.command() -def configure_indexes() -> None: - """Configure Meilisearch indexes.""" - _configure_indexes() - - @system.command() def delete_indexes() -> None: - """Delete Meilisearch indexes.""" + """Delete search indexes.""" _delete_indexes() @@ -32,5 +25,4 @@ def migrate() -> None: @system.command() def upgrade() -> None: """Equivalent of running the `migrate` and `configure-indexes` subcommands.""" - _configure_indexes() _migrate() diff --git a/backend/howtheyvote/config.py b/backend/howtheyvote/config.py index 91d7d2a64..9a0b2a236 100644 --- a/backend/howtheyvote/config.py +++ b/backend/howtheyvote/config.py @@ -16,10 +16,6 @@ # File storage FILES_DIR = env.get("HTV_BACKEND_FILES_DIR", "/howtheyvote/files") -# Meilisearch -MEILI_URL = env.get("MEILI_URL") -MEILI_MASTER_KEY = env.get("MEILI_MASTER_KEY") - # Request configuration REQUEST_TIMEOUT = 10 REQUEST_SLEEP = 0.25 @@ -29,3 +25,4 @@ TIMEZONE = "Europe/Brussels" WORKER_PROMETHEUS_PORT = 3000 SEARCH_INDEX_PREFIX = env.get("HTV_SEARCH_INDEX_PREFIX", None) +SEARCH_INDEX_DIR = env.get("HTV_SEARCH_INDEX_DIR", "/howtheyvote/index") diff --git a/backend/howtheyvote/data/stopwords.txt b/backend/howtheyvote/data/stopwords.txt new file mode 100644 index 000000000..aee35c52d --- /dev/null +++ b/backend/howtheyvote/data/stopwords.txt @@ -0,0 +1,312 @@ + + | An English stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | Many of the forms below are quite rare (e.g. "yourselves") but included for + | completeness. + + | PRONOUNS FORMS + | 1st person sing + +i | subject, always in upper case of course + +me | object +my | possessive adjective + | the possessive pronoun `mine' is best suppressed, because of the + | sense of coal-mine etc. +myself | reflexive + | 1st person plural +we | subject + +| us | object + | care is required here because US = United States. It is usually + | safe to remove it if it is in lower case. +our | possessive adjective +ours | possessive pronoun +ourselves | reflexive + | second person (archaic `thou' forms not included) +you | subject and object +your | possessive adjective +yours | possessive pronoun +yourself | reflexive (singular) +yourselves | reflexive (plural) + | third person singular +he | subject +him | object +his | possessive adjective and pronoun +himself | reflexive + +she | subject +her | object and possessive adjective +hers | possessive pronoun +herself | reflexive + +it | subject and object +its | possessive adjective +itself | reflexive + | third person plural +they | subject +them | object +their | possessive adjective +theirs | possessive pronoun +themselves | reflexive + | other forms (demonstratives, interrogatives) +what +which +who +whom +this +that +these +those + + | VERB FORMS (using F.R. Palmer's nomenclature) + | BE +am | 1st person, present +is | -s form (3rd person, present) +are | present +was | 1st person, past +were | past +be | infinitive +been | past participle +being | -ing form + | HAVE +have | simple +has | -s form +had | past +having | -ing form + | DO +do | simple +does | -s form +did | past +doing | -ing form + + | The forms below are, I believe, best omitted, because of the significant + | homonym forms: + + | He made a WILL + | old tin CAN + | merry month of MAY + | a smell of MUST + | fight the good fight with all thy MIGHT + + | would, could, should, ought might however be included + + | | AUXILIARIES + | | WILL + |will + +would + + | | SHALL + |shall + +should + + | | CAN + |can + +could + + | | MAY + |may + |might + | | MUST + |must + | | OUGHT + +ought + + | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing + | pronoun + verb + +i'm +you're +he's +she's +it's +we're +they're +i've +you've +we've +they've +i'd +you'd +he'd +she'd +we'd +they'd +i'll +you'll +he'll +she'll +we'll +they'll + + | verb + negation + +isn't +aren't +wasn't +weren't +hasn't +haven't +hadn't +doesn't +don't +didn't + + | auxiliary + negation + +won't +wouldn't +shan't +shouldn't +can't +cannot +couldn't +mustn't + + | miscellaneous forms + +let's +that's +who's +what's +here's +there's +when's +where's +why's +how's + + | rarer forms + + | daren't needn't + + | doubtful forms + + | oughtn't mightn't + + | ARTICLES +a +an +the + + | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so + | high, that classification is pointless.) +and +but +if +or +because +as +until +while + +of +at +by +for +with +about +against +between +into +through +during +before +after +above +below +to +from +up +down +in +out +on +off +over +under + +again +further +then +once + +here +there +when +where +why +how + +all +any +both +each +few +more +most +other +some +such + +no +nor +not +only +own +same +so +than +too +very + + | Just for the record, the following words are among the commonest in English + + | one + | every + | least + | less + | many + | now + | ever + | never + | say + | says + | said + | also + | get + | go + | goes + | just + | made + | make + | put + | see + | seen + | whether + | like + | well + | back + | even + | still + | way + | take + | since + | another + | however + | two + | three + | four + | five + | first + | second + | new + | old + | high + | long + diff --git a/backend/howtheyvote/meili.py b/backend/howtheyvote/meili.py deleted file mode 100644 index 22047bdb7..000000000 --- a/backend/howtheyvote/meili.py +++ /dev/null @@ -1,88 +0,0 @@ -import meilisearch -from meilisearch.index import Index -from structlog import get_logger - -from . import config -from .models import BaseWithId, Vote - -log = get_logger(__name__) - -meili = meilisearch.Client(config.MEILI_URL or "", config.MEILI_MASTER_KEY) - - -def index_name(name: str) -> str: - prefix = config.SEARCH_INDEX_PREFIX - - if not prefix: - return name - - return f"{prefix}-{name}" - - -def get_index(model_cls: type[BaseWithId]) -> Index: - name = index_name(model_cls.__table__.name) # type: ignore - - if model_cls != Vote: - raise ValueError(f'Cannot get index "{name}" for model "{model_cls.__name__}"') - - return meili.index(name) - - -votes_index = get_index(Vote) - - -def configure_indexes() -> None: - uid = votes_index.uid - - """Configure Meilisearch indexes.""" - log.info("Creating index.", uid=uid) - meili.create_index(uid, {"primaryKey": "id"}) - - log.info("Updating index settings.", uid=uid) - votes_index.update_displayed_attributes( - [ - "id", - "display_title", - "timestamp", - "reference", - "procedure_reference", - "description", - "is_featured", - "geo_areas", - ] - ) - votes_index.update_searchable_attributes( - [ - "display_title", - "reference", - "procedure_reference", - "geo_areas", - "keywords", - ] - ) - votes_index.update_sortable_attributes(["id", "timestamp"]) - votes_index.update_filterable_attributes(["is_featured"]) - votes_index.update_typo_tolerance( - { - "disableOnAttributes": ["reference", "procedure_reference"], - } - ) - votes_index.update_ranking_rules( - [ - "words", - "typo", - "proximity", - "attribute", - "sort", - "exactness", - # Promote featured and recent votes by default - "timestamp:desc", - "is_featured:desc", - ] - ) - - -def delete_indexes() -> None: - """Delete all Meilisearch indexes.""" - log.info("Deleting index", uid=votes_index.uid) - votes_index.delete() diff --git a/backend/howtheyvote/search.py b/backend/howtheyvote/search.py new file mode 100644 index 000000000..a3135d839 --- /dev/null +++ b/backend/howtheyvote/search.py @@ -0,0 +1,88 @@ +import enum +import pathlib +import shutil +from collections.abc import Iterator +from contextlib import contextmanager +from typing import Literal, overload + +from structlog import get_logger +from xapian import DB_CREATE_OR_OPEN, Database, WritableDatabase, SimpleStopper + +from . import config +from .data import DATA_DIR +from .models import BaseWithId + +log = get_logger(__name__) + + +class AccessType(enum.Enum): + READ = "READ" + WRITE = "WRITE" + + +PREFIX_REFERENCE = "XDR" +PREFIX_PROCEDURE_REFERENCE = "XPR" + +SLOT_TIMESTAMP = 0 +SLOT_IS_FEATURED = 1 + +FIELD_TO_SLOT_MAPPING = { + "timestamp": SLOT_TIMESTAMP, + "is_featured": SLOT_IS_FEATURED, +} + +BOOST_DISPLAY_TITLE = 15 +BOOST_EUROVOC_CONCEPTS = 2 +BOOST_GEO_AREAS = 2 + + +@overload +@contextmanager +def get_index( + model_cls: type[BaseWithId], + access_type: Literal[AccessType.READ], +) -> Iterator[Database]: ... + + +@overload +@contextmanager +def get_index( + model_cls: type[BaseWithId], + access_type: Literal[AccessType.WRITE], +) -> Iterator[WritableDatabase]: ... + + +@contextmanager +def get_index( + model_cls: type[BaseWithId], + access_type: AccessType = AccessType.READ, +) -> Iterator[Database | WritableDatabase]: + name = model_cls.__table__.name # type: ignore + path = str(pathlib.Path(config.SEARCH_INDEX_DIR).joinpath(name)) + + if access_type == AccessType.WRITE: + index = WritableDatabase(path, DB_CREATE_OR_OPEN) + else: + index = Database(path) + + try: + yield index + finally: + index.close() + + +def get_stopper() -> SimpleStopper: + path = DATA_DIR.joinpath("stopwords.txt") + return SimpleStopper(str(path)) + + +def delete_indexes() -> None: + """Delete all search indexes.""" + root = pathlib.Path(config.SEARCH_INDEX_DIR) + + for path in root.iterdir(): + if not path.is_dir(): + continue + + log.info("Deleting index", path=path.name) + shutil.rmtree(path) diff --git a/backend/howtheyvote/store/index.py b/backend/howtheyvote/store/index.py index e39f48380..2c852fe2e 100644 --- a/backend/howtheyvote/store/index.py +++ b/backend/howtheyvote/store/index.py @@ -1,13 +1,25 @@ from collections.abc import Iterable, Iterator -from typing import Literal, TypedDict, TypeVar, cast +from typing import TypeVar, cast from sqlalchemy.dialects.sqlite import insert from structlog import get_logger +from xapian import Document, TermGenerator, sortable_serialise from ..db import Session from ..helpers import chunks -from ..meili import votes_index from ..models import BaseWithId, Vote +from ..search import ( + BOOST_DISPLAY_TITLE, + BOOST_EUROVOC_CONCEPTS, + BOOST_GEO_AREAS, + PREFIX_PROCEDURE_REFERENCE, + PREFIX_REFERENCE, + SLOT_IS_FEATURED, + SLOT_TIMESTAMP, + AccessType, + get_index, + get_stopper, +) log = get_logger(__name__) @@ -64,60 +76,75 @@ def index_db(model_cls: type[RecordType], records: Iterable[RecordType]) -> None def index_search( model_cls: type[RecordType], records: Iterable[RecordType], - sync: bool = False, ) -> None: - # At the moment, only votes are indexed in Meilisearch + # At the moment, only votes are searchable if model_cls != Vote: return votes = cast(Iterable[Vote], records) - formatted_records = [_serialize_vote(vote) for vote in votes if vote.is_main] + filtered_votes = [vote for vote in votes if vote.is_main and vote.display_title] - if not len(formatted_records): + if not filtered_votes: log.warning("Skipping indexing to search index as list of records is empty") return - log.info("Writing aggregated records to search index", count=len(formatted_records)) + log.info("Indexing aggregated records", count=len(filtered_votes)) - # `Index.add_documents` requires `list[dict[str, any]]` which is incompatible with - # the `SerializedVote` typed dict. See https://github.com/python/mypy/issues/4976 - documents = [dict(td) for td in formatted_records] - task = votes_index.add_documents(documents) + with get_index(Vote, AccessType.WRITE) as index: + generator = TermGenerator() + generator.set_database(index) + generator.set_stopper(get_stopper()) + generator.set_stopper_strategy(TermGenerator.STOP_ALL) + generator.set_flags(TermGenerator.FLAG_SPELLING) - if sync: - # This is primarily used in tests - votes_index.wait_for_task(task.task_uid) + for vote in filtered_votes: + doc = _serialize_vote(vote, generator) + index.replace_document(int(vote.id), doc) -class SerializedVote(TypedDict): - id: int - timestamp: float - display_title: str | None - reference: str | None - procedure_reference: str | None - description: str | None - is_featured: Literal[0, 1] - geo_areas: list[str] - keywords: list[str] +def _serialize_vote(vote: Vote, generator: TermGenerator) -> Document: + doc = Document() + generator.set_document(doc) + generator.index_text(vote.display_title, BOOST_DISPLAY_TITLE) + generator.increase_termpos() -def _serialize_vote(vote: Vote) -> SerializedVote: - keywords = set() - + # Index EuroVoc concepts for full-text search for concept in vote.eurovoc_concepts: - keywords.add(concept.label) - keywords.update(concept.alt_labels) - keywords.update(bc.label for bc in concept.broader) - - return { - "id": vote.id, - # Meilisearch requires dates to be indexed as a numeric timestamp - "timestamp": vote.timestamp.timestamp(), - "display_title": vote.display_title, - "reference": vote.reference, - "procedure_reference": vote.procedure_reference, - "description": vote.description, - "is_featured": 1 if vote.is_featured else 0, - "geo_areas": [country.label for country in vote.geo_areas], - "keywords": list(keywords), - } + for term in set([concept.label, *concept.alt_labels]): + generator.index_text(term, BOOST_EUROVOC_CONCEPTS) + generator.increase_termpos() + + for broader in concept.broader: + for term in set([broader.label, *broader.alt_labels]): + # Index broader concepts, too, but do not boost them as this may + # cause too many high-ranking false positives + generator.index_text(term) + generator.increase_termpos() + + # Index geographic areas for full-text search + for geo_area in vote.geo_areas: + generator.index_text(geo_area.label, BOOST_GEO_AREAS) + generator.increase_termpos() + + # Index rapporteur name + if vote.rapporteur: + generator.index_text(vote.rapporteur) + + # Store timestamp and is_featured as sortable values for ranking + timestamp = sortable_serialise(vote.timestamp.timestamp()) + doc.add_value(SLOT_TIMESTAMP, timestamp) + + is_featured = sortable_serialise(int(vote.is_featured)) + doc.add_value(SLOT_IS_FEATURED, is_featured) + + # Store document and procedure references as boolean terms for filtering + if vote.reference: + term = f"{PREFIX_REFERENCE}{vote.reference.lower()}" + doc.add_boolean_term(term) + + if vote.procedure_reference: + term = f"{PREFIX_PROCEDURE_REFERENCE}{vote.procedure_reference.lower()}" + doc.add_boolean_term(term) + + return doc diff --git a/backend/poetry.lock b/backend/poetry.lock index 11ac166fb..2f7ddf98c 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -19,17 +19,6 @@ typing-extensions = ">=4" [package.extras] tz = ["backports.zoneinfo"] -[[package]] -name = "annotated-types" -version = "0.6.0" -description = "Reusable constraint types to use with typing.Annotated" -optional = false -python-versions = ">=3.8" -files = [ - {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, - {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, -] - [[package]] name = "apispec" version = "6.6.1" @@ -113,23 +102,6 @@ files = [ {file = "cachetools-5.4.0.tar.gz", hash = "sha256:b8adc2e7c07f105ced7bc56dbb6dfbe7c4a00acce20e2227b3f355be89bc6827"}, ] -[[package]] -name = "camel-converter" -version = "3.1.1" -description = "Converts a string from snake case to camel case or camel case to snake case" -optional = false -python-versions = ">=3.8,<4.0" -files = [ - {file = "camel_converter-3.1.1-py3-none-any.whl", hash = "sha256:f9f4ad446e46cc317d612a435b653bcb8c3572f1fb2252b0620c5e4fd3b50ebf"}, - {file = "camel_converter-3.1.1.tar.gz", hash = "sha256:73c1e31801d0f7baf08fe2a44e6a712e685496e490cab3cd9ce7222845502ef7"}, -] - -[package.dependencies] -pydantic = {version = ">=1.8.2", optional = true, markers = "extra == \"pydantic\""} - -[package.extras] -pydantic = ["pydantic (>=1.8.2)"] - [[package]] name = "certifi" version = "2024.7.4" @@ -759,21 +731,6 @@ files = [ {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] -[[package]] -name = "meilisearch" -version = "0.31.4" -description = "The python client for Meilisearch API." -optional = false -python-versions = ">=3.8" -files = [ - {file = "meilisearch-0.31.4-py3-none-any.whl", hash = "sha256:a36f345334689e1880a72f22babadd49a4b583f260a47fe7794b4030760ea759"}, - {file = "meilisearch-0.31.4.tar.gz", hash = "sha256:c4d578640d073e2d6106b4d653f55d698d2520b51db57e3ce7375707e15c86d1"}, -] - -[package.dependencies] -camel-converter = {version = "*", extras = ["pydantic"]} -requests = "*" - [[package]] name = "mypy" version = "1.11.1" @@ -968,142 +925,6 @@ files = [ [package.extras] twisted = ["twisted"] -[[package]] -name = "pydantic" -version = "2.5.2" -description = "Data validation using Python type hints" -optional = false -python-versions = ">=3.7" -files = [ - {file = "pydantic-2.5.2-py3-none-any.whl", hash = "sha256:80c50fb8e3dcecfddae1adbcc00ec5822918490c99ab31f6cf6140ca1c1429f0"}, - {file = "pydantic-2.5.2.tar.gz", hash = "sha256:ff177ba64c6faf73d7afa2e8cad38fd456c0dbe01c9954e71038001cd15a6edd"}, -] - -[package.dependencies] -annotated-types = ">=0.4.0" -pydantic-core = "2.14.5" -typing-extensions = ">=4.6.1" - -[package.extras] -email = ["email-validator (>=2.0.0)"] - -[[package]] -name = "pydantic-core" -version = "2.14.5" -description = "" -optional = false -python-versions = ">=3.7" -files = [ - {file = "pydantic_core-2.14.5-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:7e88f5696153dc516ba6e79f82cc4747e87027205f0e02390c21f7cb3bd8abfd"}, - {file = "pydantic_core-2.14.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4641e8ad4efb697f38a9b64ca0523b557c7931c5f84e0fd377a9a3b05121f0de"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:774de879d212db5ce02dfbf5b0da9a0ea386aeba12b0b95674a4ce0593df3d07"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ebb4e035e28f49b6f1a7032920bb9a0c064aedbbabe52c543343d39341a5b2a3"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b53e9ad053cd064f7e473a5f29b37fc4cc9dc6d35f341e6afc0155ea257fc911"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aa1768c151cf562a9992462239dfc356b3d1037cc5a3ac829bb7f3bda7cc1f9"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eac5c82fc632c599f4639a5886f96867ffced74458c7db61bc9a66ccb8ee3113"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2ae91f50ccc5810b2f1b6b858257c9ad2e08da70bf890dee02de1775a387c66"}, - {file = "pydantic_core-2.14.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6b9ff467ffbab9110e80e8c8de3bcfce8e8b0fd5661ac44a09ae5901668ba997"}, - {file = "pydantic_core-2.14.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:61ea96a78378e3bd5a0be99b0e5ed00057b71f66115f5404d0dae4819f495093"}, - {file = "pydantic_core-2.14.5-cp310-none-win32.whl", hash = "sha256:bb4c2eda937a5e74c38a41b33d8c77220380a388d689bcdb9b187cf6224c9720"}, - {file = "pydantic_core-2.14.5-cp310-none-win_amd64.whl", hash = "sha256:b7851992faf25eac90bfcb7bfd19e1f5ffa00afd57daec8a0042e63c74a4551b"}, - {file = "pydantic_core-2.14.5-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:4e40f2bd0d57dac3feb3a3aed50f17d83436c9e6b09b16af271b6230a2915459"}, - {file = "pydantic_core-2.14.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ab1cdb0f14dc161ebc268c09db04d2c9e6f70027f3b42446fa11c153521c0e88"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aae7ea3a1c5bb40c93cad361b3e869b180ac174656120c42b9fadebf685d121b"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:60b7607753ba62cf0739177913b858140f11b8af72f22860c28eabb2f0a61937"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2248485b0322c75aee7565d95ad0e16f1c67403a470d02f94da7344184be770f"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:823fcc638f67035137a5cd3f1584a4542d35a951c3cc68c6ead1df7dac825c26"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96581cfefa9123accc465a5fd0cc833ac4d75d55cc30b633b402e00e7ced00a6"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a33324437018bf6ba1bb0f921788788641439e0ed654b233285b9c69704c27b4"}, - {file = "pydantic_core-2.14.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9bd18fee0923ca10f9a3ff67d4851c9d3e22b7bc63d1eddc12f439f436f2aada"}, - {file = "pydantic_core-2.14.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:853a2295c00f1d4429db4c0fb9475958543ee80cfd310814b5c0ef502de24dda"}, - {file = "pydantic_core-2.14.5-cp311-none-win32.whl", hash = "sha256:cb774298da62aea5c80a89bd58c40205ab4c2abf4834453b5de207d59d2e1651"}, - {file = "pydantic_core-2.14.5-cp311-none-win_amd64.whl", hash = "sha256:e87fc540c6cac7f29ede02e0f989d4233f88ad439c5cdee56f693cc9c1c78077"}, - {file = "pydantic_core-2.14.5-cp311-none-win_arm64.whl", hash = "sha256:57d52fa717ff445cb0a5ab5237db502e6be50809b43a596fb569630c665abddf"}, - {file = "pydantic_core-2.14.5-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:e60f112ac88db9261ad3a52032ea46388378034f3279c643499edb982536a093"}, - {file = "pydantic_core-2.14.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6e227c40c02fd873c2a73a98c1280c10315cbebe26734c196ef4514776120aeb"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0cbc7fff06a90bbd875cc201f94ef0ee3929dfbd5c55a06674b60857b8b85ed"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:103ef8d5b58596a731b690112819501ba1db7a36f4ee99f7892c40da02c3e189"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c949f04ecad823f81b1ba94e7d189d9dfb81edbb94ed3f8acfce41e682e48cef"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1452a1acdf914d194159439eb21e56b89aa903f2e1c65c60b9d874f9b950e5d"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb4679d4c2b089e5ef89756bc73e1926745e995d76e11925e3e96a76d5fa51fc"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf9d3fe53b1ee360e2421be95e62ca9b3296bf3f2fb2d3b83ca49ad3f925835e"}, - {file = "pydantic_core-2.14.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:70f4b4851dbb500129681d04cc955be2a90b2248d69273a787dda120d5cf1f69"}, - {file = "pydantic_core-2.14.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:59986de5710ad9613ff61dd9b02bdd2f615f1a7052304b79cc8fa2eb4e336d2d"}, - {file = "pydantic_core-2.14.5-cp312-none-win32.whl", hash = "sha256:699156034181e2ce106c89ddb4b6504c30db8caa86e0c30de47b3e0654543260"}, - {file = "pydantic_core-2.14.5-cp312-none-win_amd64.whl", hash = "sha256:5baab5455c7a538ac7e8bf1feec4278a66436197592a9bed538160a2e7d11e36"}, - {file = "pydantic_core-2.14.5-cp312-none-win_arm64.whl", hash = "sha256:e47e9a08bcc04d20975b6434cc50bf82665fbc751bcce739d04a3120428f3e27"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:af36f36538418f3806048f3b242a1777e2540ff9efaa667c27da63d2749dbce0"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:45e95333b8418ded64745f14574aa9bfc212cb4fbeed7a687b0c6e53b5e188cd"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e47a76848f92529879ecfc417ff88a2806438f57be4a6a8bf2961e8f9ca9ec7"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d81e6987b27bc7d101c8597e1cd2bcaa2fee5e8e0f356735c7ed34368c471550"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34708cc82c330e303f4ce87758828ef6e457681b58ce0e921b6e97937dd1e2a3"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:652c1988019752138b974c28f43751528116bcceadad85f33a258869e641d753"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e4d090e73e0725b2904fdbdd8d73b8802ddd691ef9254577b708d413bf3006e"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5c7d5b5005f177764e96bd584d7bf28d6e26e96f2a541fdddb934c486e36fd59"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a71891847f0a73b1b9eb86d089baee301477abef45f7eaf303495cd1473613e4"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a717aef6971208f0851a2420b075338e33083111d92041157bbe0e2713b37325"}, - {file = "pydantic_core-2.14.5-cp37-none-win32.whl", hash = "sha256:de790a3b5aa2124b8b78ae5faa033937a72da8efe74b9231698b5a1dd9be3405"}, - {file = "pydantic_core-2.14.5-cp37-none-win_amd64.whl", hash = "sha256:6c327e9cd849b564b234da821236e6bcbe4f359a42ee05050dc79d8ed2a91588"}, - {file = "pydantic_core-2.14.5-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:ef98ca7d5995a82f43ec0ab39c4caf6a9b994cb0b53648ff61716370eadc43cf"}, - {file = "pydantic_core-2.14.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6eae413494a1c3f89055da7a5515f32e05ebc1a234c27674a6956755fb2236f"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcf4e6d85614f7a4956c2de5a56531f44efb973d2fe4a444d7251df5d5c4dcfd"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6637560562134b0e17de333d18e69e312e0458ee4455bdad12c37100b7cad706"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:77fa384d8e118b3077cccfcaf91bf83c31fe4dc850b5e6ee3dc14dc3d61bdba1"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16e29bad40bcf97aac682a58861249ca9dcc57c3f6be22f506501833ddb8939c"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531f4b4252fac6ca476fbe0e6f60f16f5b65d3e6b583bc4d87645e4e5ddde331"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:074f3d86f081ce61414d2dc44901f4f83617329c6f3ab49d2bc6c96948b2c26b"}, - {file = "pydantic_core-2.14.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c2adbe22ab4babbca99c75c5d07aaf74f43c3195384ec07ccbd2f9e3bddaecec"}, - {file = "pydantic_core-2.14.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0f6116a558fd06d1b7c2902d1c4cf64a5bd49d67c3540e61eccca93f41418124"}, - {file = "pydantic_core-2.14.5-cp38-none-win32.whl", hash = "sha256:fe0a5a1025eb797752136ac8b4fa21aa891e3d74fd340f864ff982d649691867"}, - {file = "pydantic_core-2.14.5-cp38-none-win_amd64.whl", hash = "sha256:079206491c435b60778cf2b0ee5fd645e61ffd6e70c47806c9ed51fc75af078d"}, - {file = "pydantic_core-2.14.5-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:a6a16f4a527aae4f49c875da3cdc9508ac7eef26e7977952608610104244e1b7"}, - {file = "pydantic_core-2.14.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:abf058be9517dc877227ec3223f0300034bd0e9f53aebd63cf4456c8cb1e0863"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49b08aae5013640a3bfa25a8eebbd95638ec3f4b2eaf6ed82cf0c7047133f03b"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c2d97e906b4ff36eb464d52a3bc7d720bd6261f64bc4bcdbcd2c557c02081ed2"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3128e0bbc8c091ec4375a1828d6118bc20404883169ac95ffa8d983b293611e6"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88e74ab0cdd84ad0614e2750f903bb0d610cc8af2cc17f72c28163acfcf372a4"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c339dabd8ee15f8259ee0f202679b6324926e5bc9e9a40bf981ce77c038553db"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3387277f1bf659caf1724e1afe8ee7dbc9952a82d90f858ebb931880216ea955"}, - {file = "pydantic_core-2.14.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ba6b6b3846cfc10fdb4c971980a954e49d447cd215ed5a77ec8190bc93dd7bc5"}, - {file = "pydantic_core-2.14.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ca61d858e4107ce5e1330a74724fe757fc7135190eb5ce5c9d0191729f033209"}, - {file = "pydantic_core-2.14.5-cp39-none-win32.whl", hash = "sha256:ec1e72d6412f7126eb7b2e3bfca42b15e6e389e1bc88ea0069d0cc1742f477c6"}, - {file = "pydantic_core-2.14.5-cp39-none-win_amd64.whl", hash = "sha256:c0b97ec434041827935044bbbe52b03d6018c2897349670ff8fe11ed24d1d4ab"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:79e0a2cdbdc7af3f4aee3210b1172ab53d7ddb6a2d8c24119b5706e622b346d0"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:678265f7b14e138d9a541ddabbe033012a2953315739f8cfa6d754cc8063e8ca"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95b15e855ae44f0c6341ceb74df61b606e11f1087e87dcb7482377374aac6abe"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09b0e985fbaf13e6b06a56d21694d12ebca6ce5414b9211edf6f17738d82b0f8"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3ad873900297bb36e4b6b3f7029d88ff9829ecdc15d5cf20161775ce12306f8a"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:2d0ae0d8670164e10accbeb31d5ad45adb71292032d0fdb9079912907f0085f4"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d37f8ec982ead9ba0a22a996129594938138a1503237b87318392a48882d50b7"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:35613015f0ba7e14c29ac6c2483a657ec740e5ac5758d993fdd5870b07a61d8b"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ab4ea451082e684198636565224bbb179575efc1658c48281b2c866bfd4ddf04"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ce601907e99ea5b4adb807ded3570ea62186b17f88e271569144e8cca4409c7"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb2ed8b3fe4bf4506d6dab3b93b83bbc22237e230cba03866d561c3577517d18"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:70f947628e074bb2526ba1b151cee10e4c3b9670af4dbb4d73bc8a89445916b5"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4bc536201426451f06f044dfbf341c09f540b4ebdb9fd8d2c6164d733de5e634"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f4791cf0f8c3104ac668797d8c514afb3431bc3305f5638add0ba1a5a37e0d88"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:038c9f763e650712b899f983076ce783175397c848da04985658e7628cbe873b"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:27548e16c79702f1e03f5628589c6057c9ae17c95b4c449de3c66b589ead0520"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c97bee68898f3f4344eb02fec316db93d9700fb1e6a5b760ffa20d71d9a46ce3"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9b759b77f5337b4ea024f03abc6464c9f35d9718de01cfe6bae9f2e139c397e"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:439c9afe34638ace43a49bf72d201e0ffc1a800295bed8420c2a9ca8d5e3dbb3"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ba39688799094c75ea8a16a6b544eb57b5b0f3328697084f3f2790892510d144"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ccd4d5702bb90b84df13bd491be8d900b92016c5a455b7e14630ad7449eb03f8"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:81982d78a45d1e5396819bbb4ece1fadfe5f079335dd28c4ab3427cd95389944"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:7f8210297b04e53bc3da35db08b7302a6a1f4889c79173af69b72ec9754796b8"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:8c8a8812fe6f43a3a5b054af6ac2d7b8605c7bcab2804a8a7d68b53f3cd86e00"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:206ed23aecd67c71daf5c02c3cd19c0501b01ef3cbf7782db9e4e051426b3d0d"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2027d05c8aebe61d898d4cffd774840a9cb82ed356ba47a90d99ad768f39789"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40180930807ce806aa71eda5a5a5447abb6b6a3c0b4b3b1b1962651906484d68"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:615a0a4bff11c45eb3c1996ceed5bdaa2f7b432425253a7c2eed33bb86d80abc"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5e412d717366e0677ef767eac93566582518fe8be923361a5c204c1a62eaafe"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:513b07e99c0a267b1d954243845d8a833758a6726a3b5d8948306e3fe14675e3"}, - {file = "pydantic_core-2.14.5.tar.gz", hash = "sha256:6d30226dfc816dd0fdf120cae611dd2215117e4f9b124af8c60ab9093b6e8e71"}, -] - -[package.dependencies] -typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" - [[package]] name = "pytest" version = "8.3.2" @@ -1707,4 +1528,4 @@ watchdog = ["watchdog (>=2.3)"] [metadata] lock-version = "2.0" python-versions = ">=3.12,<4.0" -content-hash = "15ce32d6ef3aefdd60acbceb6505d58c536ecde924222bc2900d137ab6473836" +content-hash = "b20d0e667b75d4d0abda503e4a131a1edc6d1e53d93629cf4d74aa2ef7a54270" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index cc053cf78..0cbd11c90 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -25,7 +25,6 @@ click = "^8.1.3" structlog = "^24.4.0" flask = "^3.0.3" cachetools = "^5.4.0" -meilisearch = "^0.31.4" pillow = "^10.4.0" gunicorn = "^22.0.0" apispec = "^6.6.1" diff --git a/backend/tests/api/test_query.py b/backend/tests/api/test_query.py index fc410e3ab..0e36e4f47 100644 --- a/backend/tests/api/test_query.py +++ b/backend/tests/api/test_query.py @@ -35,7 +35,7 @@ def votes(db_session, search_index): db_session.add_all(votes) db_session.commit() - index_search(Vote, votes, sync=True) + index_search(Vote, votes) yield diff --git a/backend/tests/api/test_votes_api.py b/backend/tests/api/test_votes_api.py index 4973be9a3..0ecd39295 100644 --- a/backend/tests/api/test_votes_api.py +++ b/backend/tests/api/test_votes_api.py @@ -156,7 +156,7 @@ def test_votes_api_search(db_session, search_index, api): db_session.add_all([one, two]) db_session.commit() - index_search(Vote, [one, two], sync=True) + index_search(Vote, [one, two]) res = api.get("/api/votes/search") assert res.json["total"] == 2 @@ -202,7 +202,7 @@ def test_votes_api_search_references(db_session, search_index, api): db_session.add_all([one, two]) db_session.commit() - index_search(Vote, [one, two], sync=True) + index_search(Vote, [one, two]) res = api.get("/api/votes/search", query_string={"q": "A9-0043/2024"}) assert res.json["total"] == 1 diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 26bd85e14..07af98d05 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -8,8 +8,8 @@ import pytest import responses as responses_lib +from howtheyvote import config from howtheyvote.db import Session, engine, migrate, session_factory -from howtheyvote.meili import configure_indexes, delete_indexes from howtheyvote.wsgi import app as flask_app @@ -36,10 +36,9 @@ def db_session(migrations): @pytest.fixture() -def search_index(): - """Deletes and recreates search indexes before the test runs.""" - delete_indexes() - configure_indexes() +def search_index(tmp_path): + """Create temporary directory for search index.""" + config.SEARCH_INDEX_DIR = tmp_path yield diff --git a/docker-compose.override.yml b/docker-compose.override.yml index 1b0be88a1..91178dafe 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -27,12 +27,6 @@ services: stdin_open: true tty: true - meilisearch: - ports: - - "7700:7700" - stdin_open: true - tty: true - caddy: stdin_open: true tty: true diff --git a/docker-compose.yml b/docker-compose.yml index 7b8132494..43ee87a25 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,4 @@ volumes: - meilisearch_data: {} caddy_data: {} caddy_config: {} @@ -29,7 +28,6 @@ services: backend: image: "ghcr.io/howtheyvote/howtheyvote-backend:main" depends_on: - - meilisearch - chromium networks: - "default" @@ -40,19 +38,17 @@ services: volumes: - "./storage/database:/howtheyvote/database" - "./storage/files:/howtheyvote/files" + - "./storage/index:/howtheyvote/index" restart: "unless-stopped" environment: HTV_BACKEND_PUBLIC_URL: "${HTV_BACKEND_PUBLIC_URL}" HTV_FRONTEND_PUBLIC_URL: "${HTV_FRONTEND_PUBLIC_URL}" HTV_FRONTEND_PRIVATE_URL: "http://frontend:8000" - MEILI_URL: "http://meilisearch:7700" - MEILI_MASTER_KEY: "${MEILI_MASTER_KEY}" worker: image: "ghcr.io/howtheyvote/howtheyvote-backend:main" command: "htv worker" depends_on: - - meilisearch - chromium ports: # Make Prometheus metrics available on host machine @@ -60,22 +56,11 @@ services: volumes: - "./storage/database:/howtheyvote/database" - "./storage/files:/howtheyvote/files" + - "./storage/index:/howtheyvote/index" restart: "unless-stopped" environment: HTV_BACKEND_PUBLIC_URL: "${HTV_BACKEND_PUBLIC_URL}" HTV_FRONTEND_PUBLIC_URL: "${HTV_FRONTEND_PUBLIC_URL}" - MEILI_URL: "http://meilisearch:7700" - MEILI_MASTER_KEY: "${MEILI_MASTER_KEY}" - - meilisearch: - image: "getmeili/meilisearch:v1.3.1" - expose: - - "7700" - volumes: - - "meilisearch_data:/meili_data" - restart: "unless-stopped" - environment: - MEILI_MASTER_KEY: "${MEILI_MASTER_KEY}" caddy: image: "caddy:2.7.6-alpine" diff --git a/storage/index/.gitignore b/storage/index/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/storage/index/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore