From 8ee54c7498be6207da96b1e2c31d16d0d0f7453f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 23:03:41 +0200 Subject: [PATCH] feat: meilisearch backend for notes search This is a very simple and basic backend. It is based on Django signals, just like the Elasticsearch backend. But it is much simpler, in the sense that there are just two signals: one for saving documents and one for deletion. This backend is limited, in the sense that it does not support highlighting -- but that's probably not such a big deal. To start using this backend, define the following settings: ES_DISABLED = True MEILISEARCH_ENABLED = True MEILISEARCH_URL = "http://meilisearch:7700" MEILISEARCH_API_KEY = "s3cr3t" MEILISEARCH_INDEX = "tutor_student_notes" --- notesapi/v1/views/__init__.py | 5 +- notesapi/v1/views/meilisearch.py | 177 +++++++++++++++++++++++++++++++ requirements/base.in | 1 + requirements/base.txt | 16 ++- requirements/test.txt | 21 ++++ 5 files changed, 218 insertions(+), 2 deletions(-) create mode 100644 notesapi/v1/views/meilisearch.py diff --git a/notesapi/v1/views/__init__.py b/notesapi/v1/views/__init__.py index eef7e9b4..f1794265 100644 --- a/notesapi/v1/views/__init__.py +++ b/notesapi/v1/views/__init__.py @@ -13,7 +13,10 @@ def get_views_module(): Import views from either mysql or elasticsearch backend """ if settings.ES_DISABLED: - from . import common as backend_module + if getattr(settings, "MEILISEARCH_ENABLED", False): + from . import meilisearch as backend_module + else: + from . import common as backend_module else: from . import elasticsearch as backend_module return backend_module diff --git a/notesapi/v1/views/meilisearch.py b/notesapi/v1/views/meilisearch.py new file mode 100644 index 00000000..853d1b64 --- /dev/null +++ b/notesapi/v1/views/meilisearch.py @@ -0,0 +1,177 @@ +""" +Meilisearch views to search for annotations. + +To enable this backend, define the following settings: + +ES_DISABLED = True +MEILISEARCH_ENABLED = True + +Then check the Client class for more information about Meilisearch credential settings. + +When you start using this backend, you might want to re-index all your content. To do that, run: + + ./manage.py shell -c "from notesapi.v1.views.meilisearch import reindex; reindex()" +""" + +import traceback + +import meilisearch +from django.conf import settings +from django.core.paginator import Paginator +from django.db.models import signals +from django.dispatch import receiver + +from notesapi.v1.models import Note + +from .common import AnnotationSearchView as BaseAnnotationSearchView +from .exceptions import SearchViewRuntimeError + + +class Client: + """ + Simple Meilisearch client class + + It depends on the following Django settings: + + - MEILISEARCH_URL + - MEILISEARCH_API_KEY + - MEILISEARCH_INDEX + """ + + _CLIENT = None + _INDEX = None + FILTERABLES = ["user_id", "course_id"] + + @property + def meilisearch_client(self) -> meilisearch.Client: + """ + Return a meilisearch client. + """ + if self._CLIENT is None: + self._CLIENT = meilisearch.Client( + getattr(settings, "MEILISEARCH_URL", "http://meilisearch:7700"), + getattr(settings, "MEILISEARCH_API_KEY", ""), + ) + return self._CLIENT + + @property + def meilisearch_index(self) -> meilisearch.index.Index: + """ + Return the meilisearch index used to store annotations. + + If the index does not exist, it is created. And if it does not have the right + filterable fields, then it is updated. + """ + if self._INDEX is None: + index_name = getattr(settings, "MEILISEARCH_INDEX", "student_notes") + try: + self._INDEX = self.meilisearch_client.get_index(index_name) + except meilisearch.errors.MeilisearchApiError: + task = self.meilisearch_client.create_index( + index_name, {"primaryKey": "id"} + ) + self.meilisearch_client.wait_for_task(task.task_uid, timeout_in_ms=2000) + self._INDEX = self.meilisearch_client.get_index(index_name) + + # Checking filterable attributes + existing_filterables = set(self._INDEX.get_filterable_attributes()) + if not set(self.FILTERABLES).issubset(existing_filterables): + all_filterables = list(existing_filterables.union(self.FILTERABLES)) + self._INDEX.update_filterable_attributes(all_filterables) + + return self._INDEX + + +class AnnotationSearchView(BaseAnnotationSearchView): + def get_queryset(self): + """ + Simple result filtering method based on test search. + + We simply include in the query only those that match the text search query. Note + that this backend does not support highlighting (yet). + """ + if not self.is_text_search: + return super().get_queryset() + + queryset = Note.objects.filter(**self.query_params).order_by("-updated") + + # Define meilisearch params + filters = [ + f"user_id = '{self.params['user']}'", + f"course_id = '{self.params['course_id']}'", + ] + page_size = int(self.params["page_size"]) + offset = (int(self.params["page"]) - 1) * page_size + + # Perform search + search_results = Client().meilisearch_index.search( + self.params["text"], + {"offset": offset, "limit": page_size, "filter": filters}, + ) + + # Limit to these ID + queryset = queryset.filter(id__in=[r["id"] for r in search_results["hits"]]) + return queryset + + +@receiver(signals.post_save, sender=Note) +def on_note_save(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Create or update a document. + """ + add_documents([instance]) + + +@receiver(signals.post_delete, sender=Note) +def on_note_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Delete a document. + """ + Client().meilisearch_index.delete_document(instance.id) + + +def reindex(): + """ + Re-index all notes, in batches of 100. + """ + paginator = Paginator(Note.objects.all(), 100) + for page_number in paginator.page_range: + page = paginator.page(page_number) + add_documents(page.object_list) + + +def add_documents(notes): + """ + Convert some Note objects and insert them in the index. + """ + documents_to_add = [ + { + "id": note.id, + "user_id": note.user_id, + "course_id": note.course_id, + "text": note.text, + } + for note in notes + ] + if documents_to_add: + Client().meilisearch_index.add_documents(documents_to_add) + + +def heartbeat(): + """ + Check that the meilisearch client is healthy. + """ + if not Client().meilisearch_client.is_healthy(): + raise SearchViewRuntimeError("meilisearch") + + +def selftest(): + """ + Check that we can access the meilisearch index. + """ + try: + return {"meilisearch": Client().meilisearch_index.created_at} + except meilisearch.errors.MeilisearchError as e: + raise SearchViewRuntimeError( + {"meilisearch_error": traceback.format_exc()} + ) from e diff --git a/requirements/base.in b/requirements/base.in index d0978e72..24974afc 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -10,6 +10,7 @@ elasticsearch-dsl django-elasticsearch-dsl django-elasticsearch-dsl-drf django-cors-headers +meilisearch mysqlclient PyJWT gunicorn # MIT diff --git a/requirements/base.txt b/requirements/base.txt index 05398f1d..cab434bd 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,12 +4,16 @@ # # make upgrade # +annotated-types==0.7.0 + # via pydantic asgiref==3.8.1 # via django attrs==24.2.0 # via # jsonschema # referencing +camel-converter[pydantic]==4.0.1 + # via meilisearch certifi==2024.8.30 # via # elasticsearch @@ -103,6 +107,8 @@ jsonschema==4.23.0 # via drf-spectacular jsonschema-specifications==2023.12.1 # via jsonschema +meilisearch==0.31.5 + # via -r requirements/base.in mysqlclient==2.2.4 # via -r requirements/base.in newrelic==10.0.0 @@ -123,6 +129,10 @@ psutil==6.0.0 # via edx-django-utils pycparser==2.22 # via cffi +pydantic==2.9.2 + # via camel-converter +pydantic-core==2.23.4 + # via pydantic pyjwt[crypto]==2.9.0 # via # -r requirements/base.in @@ -152,6 +162,7 @@ requests==2.25.0 # -c requirements/constraints.txt # -r requirements/base.in # edx-drf-extensions + # meilisearch rpds-py==0.20.0 # via # jsonschema @@ -172,7 +183,10 @@ stevedore==5.3.0 # edx-django-utils # edx-opaque-keys typing-extensions==4.12.2 - # via edx-opaque-keys + # via + # edx-opaque-keys + # pydantic + # pydantic-core uritemplate==4.1.1 # via drf-spectacular urllib3==1.26.20 diff --git a/requirements/test.txt b/requirements/test.txt index ea169672..06cd74b5 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,6 +4,10 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -18,6 +22,10 @@ attrs==24.2.0 # -r requirements/base.txt # jsonschema # referencing +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # meilisearch certifi==2024.8.30 # via # -r requirements/base.txt @@ -171,6 +179,8 @@ lazy-object-proxy==1.10.0 # via astroid markupsafe==2.1.5 # via jinja2 +meilisearch==0.31.5 + # via -r requirements/base.txt more-itertools==5.0.0 # via # -c requirements/constraints.txt @@ -215,6 +225,14 @@ pycparser==2.22 # via # -r requirements/base.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # pydantic pygments==2.18.0 # via diff-cover pyjwt[crypto]==2.9.0 @@ -269,6 +287,7 @@ requests==2.25.0 # -c requirements/constraints.txt # -r requirements/base.txt # edx-drf-extensions + # meilisearch rpds-py==0.20.0 # via # -r requirements/base.txt @@ -309,6 +328,8 @@ typing-extensions==4.12.2 # -r requirements/base.txt # edx-opaque-keys # faker + # pydantic + # pydantic-core uritemplate==4.1.1 # via # -r requirements/base.txt