From ef60540fa524db0483a3dfe8c66d333febf9395c Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Tue, 11 Jun 2024 17:26:29 +0500 Subject: [PATCH 01/13] feat: added new auto suggest endpoint. --- search/api.py | 37 +++++++++++++++++++++++++++++++++++++ search/urls.py | 1 + search/views.py | 14 +++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/search/api.py b/search/api.py index 43d2ed18..22c5a9a8 100644 --- a/search/api.py +++ b/search/api.py @@ -1,6 +1,8 @@ """ search business logic implementations """ from datetime import datetime + +import meilisearch from django.conf import settings from eventtracking import tracker as track @@ -158,3 +160,38 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary ) return results + + +def auto_suggest_search_api(q, course_id, limit=30): + response = {"total": 0, "results": []} + + match (getattr(settings,"MEILISEARCH_ENABLED",False)): + case True: + client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + index_name = settings.MEILISEARCH_INDEX_PREFIX + "studio_content" + results = client.index(index_name).search(q, { + "facets": ["block_type", "tags"], "filter": [ + f"context_key='{course_id}'" + ], "limit": limit + }) + results = list(map(lambda it: { + "id": it["id"], + "display_name": it["display_name"], + "usage_key": it["usage_key"], + }, results["hits"])) + case False: + searcher = SearchEngine.get_search_engine( + getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") + ) + results = searcher.search( + query_string=q, + size=limit, + field_dictionary={"course": course_id} + ) + + results = list(map(lambda it: { + "id": it["_id"], + "display_name": it["data"]["content"]["display_name"], + }, results["results"])) + + return results diff --git a/search/urls.py b/search/urls.py index 10439d54..e9600fbf 100644 --- a/search/urls.py +++ b/search/urls.py @@ -11,5 +11,6 @@ urlpatterns = [ path('', views.do_search, name='do_search'), re_path(r'^{}$'.format(COURSE_ID_PATTERN), views.do_search, name='do_search'), + re_path(r'^{}/auto_suggest_search$'.format(COURSE_ID_PATTERN), views.auto_suggest_search, name='auto_suggest_search'), path('course_discovery/', views.course_discovery, name='course_discovery'), ] diff --git a/search/views.py b/search/views.py index cda536b2..f7d8b297 100644 --- a/search/views.py +++ b/search/views.py @@ -3,13 +3,14 @@ import logging +import meilisearch from django.conf import settings from django.http import JsonResponse from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from eventtracking import tracker as track -from .api import perform_search, course_discovery_search, course_discovery_filter_fields +from .api import perform_search, course_discovery_search, course_discovery_filter_fields, auto_suggest_search_api from .initializer import SearchInitializer # log appears to be standard name used for logger @@ -219,3 +220,14 @@ def course_discovery(request): ) return JsonResponse(results, status=status_code) + + +def auto_suggest_search(request, course_id=None): + q = request.GET.get('q', None) + limit = request.GET.get('limit', 30) + + search_results = auto_suggest_search_api(q, course_id=course_id, limit=limit) + + return JsonResponse({ + "results": search_results + }) From c60f5772f51ea8e2f8d3d9a146d58bbcbe278adf Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Tue, 11 Jun 2024 18:14:44 +0500 Subject: [PATCH 02/13] fix: code improvements --- search/api.py | 62 ++++++++++++++++++++++++------------------------- search/views.py | 8 +++---- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/search/api.py b/search/api.py index 22c5a9a8..86a4cb85 100644 --- a/search/api.py +++ b/search/api.py @@ -162,36 +162,36 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary return results -def auto_suggest_search_api(q, course_id, limit=30): +def auto_suggest_search_api(term, course_id, limit=30): response = {"total": 0, "results": []} - match (getattr(settings,"MEILISEARCH_ENABLED",False)): - case True: - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - index_name = settings.MEILISEARCH_INDEX_PREFIX + "studio_content" - results = client.index(index_name).search(q, { - "facets": ["block_type", "tags"], "filter": [ - f"context_key='{course_id}'" - ], "limit": limit - }) - results = list(map(lambda it: { - "id": it["id"], - "display_name": it["display_name"], - "usage_key": it["usage_key"], - }, results["hits"])) - case False: - searcher = SearchEngine.get_search_engine( - getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") - ) - results = searcher.search( - query_string=q, - size=limit, - field_dictionary={"course": course_id} - ) - - results = list(map(lambda it: { - "id": it["_id"], - "display_name": it["data"]["content"]["display_name"], - }, results["results"])) - - return results + if getattr(settings,"MEILISEARCH_ENABLED",False): + client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + index_name = settings.MEILISEARCH_INDEX_PREFIX + "studio_content" + results = client.index(index_name).search(term, { + "facets": ["block_type", "tags"], "filter": [ + f"context_key='{course_id}'" + ], "limit": limit + }) + results = list(map(lambda it: { + "id": it["id"], + "display_name": it["display_name"], + "usage_key": it["usage_key"], + }, results["hits"])) + else: + searcher = SearchEngine.get_search_engine( + getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") + ) + results = searcher.search( + query_string=term, + size=limit, + field_dictionary={"course": course_id} + ) + + results = list(map(lambda it: { + "id": it["_id"], + "display_name": it["data"]["content"]["display_name"], + }, results["results"])) + + response.update(results=results) + return response diff --git a/search/views.py b/search/views.py index f7d8b297..331e7b5e 100644 --- a/search/views.py +++ b/search/views.py @@ -223,11 +223,9 @@ def course_discovery(request): def auto_suggest_search(request, course_id=None): - q = request.GET.get('q', None) + term = request.GET.get('term', None) limit = request.GET.get('limit', 30) - search_results = auto_suggest_search_api(q, course_id=course_id, limit=limit) + search_results = auto_suggest_search_api(term, course_id=course_id, limit=limit) - return JsonResponse({ - "results": search_results - }) + return JsonResponse(search_results) From d2b92fc482fc765bc589700f59d6824450e505a0 Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Tue, 11 Jun 2024 18:19:45 +0500 Subject: [PATCH 03/13] fix: code improvements --- search/api.py | 114 +++++++++++++++++++++++++++++++++++++----------- search/views.py | 15 +++++++ 2 files changed, 103 insertions(+), 26 deletions(-) diff --git a/search/api.py b/search/api.py index 86a4cb85..1bb496a0 100644 --- a/search/api.py +++ b/search/api.py @@ -162,36 +162,98 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary return results +def _elasticsearch_auto_suggest_search_api(term, course_id, limit=30): + """ + Perform an auto-suggest search using the Elasticsearch search engine. + + Args: + term (str): The search term. + course_id (str): The ID of the course to filter the search results. + limit (int, optional): The maximum number of results to return. Defaults to 30. + + Returns: + list: A list of dictionaries containing the search results with 'id', 'display_name', and 'usage_key'. + """ + # Create a client instance for MeiliSearch + client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + + # Define the index name + index_name = settings.MEILISEARCH_INDEX_PREFIX + "studio_content" + + # Perform the search with specified facets and filters + results = client.index(index_name).search(term, { + "facets": ["block_type", "tags"], + "filter": [f"context_key='{course_id}'"], + "limit": limit + }) + + # Process the search hits to extract relevant fields + results = list(map(lambda it: { + "id": it["id"], + "display_name": it["display_name"], + "usage_key": it["usage_key"], + }, results["hits"])) + + return results + + +def _meilisearch_auto_suggest_search_api(term, course_id, limit=30): + """ + Perform an auto-suggest search using the MeiliSearch search engine. + + Args: + term (str): The search term. + course_id (str): The ID of the course to filter the search results. + limit (int, optional): The maximum number of results to return. Defaults to 30. + + Returns: + list: A list of dictionaries containing the search results with 'id' and 'display_name'. + """ + # Get the search engine instance + searcher = SearchEngine.get_search_engine( + getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") + ) + + # Perform the search with the specified query string, size, and field dictionary + results = searcher.search( + query_string=term, + size=limit, + field_dictionary={"course": course_id} + ) + + # Process the search results to extract relevant fields + results = list(map(lambda it: { + "id": it["_id"], + "display_name": it["data"]["content"]["display_name"], + }, results["results"])) + + return results + + def auto_suggest_search_api(term, course_id, limit=30): + """ + Perform an auto-suggest search using either Elasticsearch or MeiliSearch based on configuration. + + Args: + term (str): The search term. + course_id (str): The ID of the course to filter the search results. + limit (int, optional): The maximum number of results to return. Defaults to 30. + + Returns: + dict: A dictionary with 'total' number of results and a list of 'results'. + """ + # Initialize response dictionary response = {"total": 0, "results": []} - if getattr(settings,"MEILISEARCH_ENABLED",False): - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - index_name = settings.MEILISEARCH_INDEX_PREFIX + "studio_content" - results = client.index(index_name).search(term, { - "facets": ["block_type", "tags"], "filter": [ - f"context_key='{course_id}'" - ], "limit": limit - }) - results = list(map(lambda it: { - "id": it["id"], - "display_name": it["display_name"], - "usage_key": it["usage_key"], - }, results["hits"])) + # Check which search engine to use based on settings + if getattr(settings, "MEILISEARCH_ENABLED", False): + # Use Elasticsearch if MEILISEARCH_ENABLED is set to True + results = _elasticsearch_auto_suggest_search_api(term, course_id, limit) else: - searcher = SearchEngine.get_search_engine( - getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") - ) - results = searcher.search( - query_string=term, - size=limit, - field_dictionary={"course": course_id} - ) - - results = list(map(lambda it: { - "id": it["_id"], - "display_name": it["data"]["content"]["display_name"], - }, results["results"])) + # Use MeiliSearch otherwise + results = _meilisearch_auto_suggest_search_api(term, course_id, limit) + # Update response with the search results response.update(results=results) + return response diff --git a/search/views.py b/search/views.py index 331e7b5e..6d033f1d 100644 --- a/search/views.py +++ b/search/views.py @@ -223,9 +223,24 @@ def course_discovery(request): def auto_suggest_search(request, course_id=None): + """ + Django view to perform an auto-suggest search and return the results as a JSON response. + + Args: + request (HttpRequest): The HTTP request object. + course_id (str, optional): The ID of the course to filter the search results. Defaults to None. + + Returns: + JsonResponse: A JSON response containing the search results. + """ + # Get the search term from the request parameters, defaulting to None if not provided term = request.GET.get('term', None) + + # Get the limit for the number of results from the request parameters, defaulting to 30 if not provided limit = request.GET.get('limit', 30) + # Call the search API function to perform the search search_results = auto_suggest_search_api(term, course_id=course_id, limit=limit) + # Return the search results as a JSON response return JsonResponse(search_results) From f9d2de1b852c72d5ba011292452c72c36723d92b Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Wed, 12 Jun 2024 13:26:46 +0500 Subject: [PATCH 04/13] fix: code improvements --- search/api.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/search/api.py b/search/api.py index 1bb496a0..c74ff4a0 100644 --- a/search/api.py +++ b/search/api.py @@ -162,7 +162,7 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary return results -def _elasticsearch_auto_suggest_search_api(term, course_id, limit=30): +def _meilisearch_auto_suggest_search_api(term, course_id, limit=30): """ Perform an auto-suggest search using the Elasticsearch search engine. @@ -197,9 +197,9 @@ def _elasticsearch_auto_suggest_search_api(term, course_id, limit=30): return results -def _meilisearch_auto_suggest_search_api(term, course_id, limit=30): +def _elasticsearch_auto_suggest_search_api(term, course_id, limit=30): """ - Perform an auto-suggest search using the MeiliSearch search engine. + Perform an auto-suggest search using either Elasticsearch or MeiliSearch based on configuration. Args: term (str): The search term. @@ -207,8 +207,9 @@ def _meilisearch_auto_suggest_search_api(term, course_id, limit=30): limit (int, optional): The maximum number of results to return. Defaults to 30. Returns: - list: A list of dictionaries containing the search results with 'id' and 'display_name'. + list: A list of dictionaries containing the search results with 'id', 'display_name' and 'usage_key'. """ + # Get the search engine instance searcher = SearchEngine.get_search_engine( getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") @@ -225,6 +226,7 @@ def _meilisearch_auto_suggest_search_api(term, course_id, limit=30): results = list(map(lambda it: { "id": it["_id"], "display_name": it["data"]["content"]["display_name"], + "usage_key": it["_id"], }, results["results"])) return results @@ -232,7 +234,7 @@ def _meilisearch_auto_suggest_search_api(term, course_id, limit=30): def auto_suggest_search_api(term, course_id, limit=30): """ - Perform an auto-suggest search using either Elasticsearch or MeiliSearch based on configuration. + Perform an auto-suggest search using the MeiliSearch search engine. Args: term (str): The search term. @@ -240,18 +242,18 @@ def auto_suggest_search_api(term, course_id, limit=30): limit (int, optional): The maximum number of results to return. Defaults to 30. Returns: - dict: A dictionary with 'total' number of results and a list of 'results'. + list: A list of dictionaries containing the search results with 'id', 'display_name' and 'usage_key'. """ # Initialize response dictionary - response = {"total": 0, "results": []} + response = {"results": []} # Check which search engine to use based on settings if getattr(settings, "MEILISEARCH_ENABLED", False): - # Use Elasticsearch if MEILISEARCH_ENABLED is set to True - results = _elasticsearch_auto_suggest_search_api(term, course_id, limit) - else: # Use MeiliSearch otherwise results = _meilisearch_auto_suggest_search_api(term, course_id, limit) + else: + # Use Elasticsearch if MEILISEARCH_ENABLED is set to True + results = _elasticsearch_auto_suggest_search_api(term, course_id, limit) # Update response with the search results response.update(results=results) From 62ac4b073b62b5166f7bb4995a8959f4c9b2e65c Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Thu, 13 Jun 2024 15:19:22 +0500 Subject: [PATCH 05/13] =?UTF-8?q?=F0=9F=A7=AAadded=20testcase=20for=20new?= =?UTF-8?q?=20auto-suggest=20view.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- edxsearch/settings.py | 20 +++++++++++++++++++- search/tests/test_views.py | 19 +++++++++++++++++-- search/tests/utils.py | 9 +++++++++ 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/edxsearch/settings.py b/edxsearch/settings.py index c6aee223..b73afbf7 100644 --- a/edxsearch/settings.py +++ b/edxsearch/settings.py @@ -26,7 +26,7 @@ # This is just a container for running tests DEBUG = True -ALLOWED_HOSTS = [] +ALLOWED_HOSTS = ['*'] TEMPLATES = [ { @@ -99,3 +99,21 @@ # https://docs.djangoproject.com/en/1.6/howto/static-files/ STATIC_URL = '/static/' +################### Using ElasticSearch ################### + +SEARCH_ENGINE = os.getenv('SEARCH_ENGINE', 'search.elastic.ElasticSearchEngine') + +################### Using Meilisearch (Beta) ################### + +# Enable Studio search features (powered by Meilisearch) (beta, off by default) +MEILISEARCH_ENABLED = False +# Meilisearch URL that the python backend can use. Often points to another docker container or k8s service. +MEILISEARCH_URL = os.getenv('MEILISEARCH_URL', 'http://localhost:7700') +# URL that browsers (end users) can use to reach Meilisearch. Should be HTTPS in production. +MEILISEARCH_PUBLIC_URL = os.getenv('MEILISEARCH_PUBLIC_URL', 'http://localhost:7700') +# To support multi-tenancy, you can prefix all indexes with a common key like "sandbox7-" +# and use a restricted tenant token in place of an API key, so that this Open edX instance +# can only use the index(es) that start with this prefix. +# See https://www.meilisearch.com/docs/learn/security/tenant_tokens +MEILISEARCH_INDEX_PREFIX = os.getenv('MEILISEARCH_INDEX_PREFIX', '') +MEILISEARCH_API_KEY = os.getenv('MEILISEARCH_API_KEY', 'masterKey') diff --git a/search/tests/test_views.py b/search/tests/test_views.py index 7c3aff46..85acc290 100644 --- a/search/tests/test_views.py +++ b/search/tests/test_views.py @@ -4,7 +4,7 @@ from unittest.mock import patch, call import ddt -from django.urls import Resolver404, resolve +from django.urls import Resolver404, resolve, reverse from django.test import TestCase from django.test.utils import override_settings from waffle.testutils import override_switch @@ -13,7 +13,7 @@ from search.search_engine_base import DEFAULT_ELASTIC_SEARCH_SWITCH from search.elastic import ElasticSearchEngine from search.tests.mock_search_engine import MockSearchEngine -from search.tests.utils import post_request, SearcherMixin, TEST_INDEX_NAME +from search.tests.utils import post_request, SearcherMixin, TEST_INDEX_NAME, get_request # Any class that inherits from TestCase will cause too-many-public-methods pylint error @@ -497,3 +497,18 @@ def test_valid_search(self, query, course_id, result_count): code, results = post_request({"search_string": query}, course_id) self.assertTrue(199 < code < 300) self.assertEqual(results["total"], result_count) + + +class TestAutoSuggestView(TestCase): + @override_settings(MEILISEARCH_ENABLED=True) + def test_valid_search_with_meilisearch(self): + endpoint = reverse('auto_suggest_search', args={'course-v1:Demo+DM101+2024'}) + print(endpoint) + status, results = get_request(f'{endpoint}?term=open') + self.assertTrue(status == 200) + + @override_settings(MEILISEARCH_ENABLED=False) + def test_valid_search_with_elastic(self): + endpoint = reverse('auto_suggest_search', args={'course-v1:Demo+DM101+2024'}) + status, results = get_request(f'{endpoint}?term=open') + self.assertTrue(status == 200) diff --git a/search/tests/utils.py b/search/tests/utils.py index 5d38d616..ad3c53df 100644 --- a/search/tests/utils.py +++ b/search/tests/utils.py @@ -83,3 +83,12 @@ class ErroringElasticImpl(Elasticsearch): def search(self, **kwargs): # pylint: disable=arguments-differ """ this will definitely fail """ raise exceptions.ElasticsearchException("This search operation failed") + + +def get_request(url): + """ + Helper method to get the request and process the response + """ + response = Client().post(url) + + return getattr(response, "status_code", 500), json.loads(getattr(response, "content", None).decode('utf-8')) From b0e810935a8a0711bcbabe40731d498f4618ce95 Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Thu, 13 Jun 2024 15:32:17 +0500 Subject: [PATCH 06/13] =?UTF-8?q?=F0=9F=93=8Ddependency=20added?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements/base.in | 1 + requirements/base.txt | 41 ++++++++++++++++++++----------- requirements/testing.txt | 52 +++++++++++++++++++++++++++------------- 3 files changed, 64 insertions(+), 30 deletions(-) diff --git a/requirements/base.in b/requirements/base.in index 034b08a6..2b1c9f5e 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -15,3 +15,4 @@ Django # Web application framework elasticsearch>=7.8.0,<8.0.0 edx-toggles event-tracking +meilisearch==0.31.1 diff --git a/requirements/base.txt b/requirements/base.txt index 78cb9cd8..11c64c5f 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,28 +1,31 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# make upgrade +# pip-compile --output-file=requirements/base.txt requirements/base.in # amqp==5.2.0 # via kombu +annotated-types==0.7.0 + # via pydantic asgiref==3.7.2 # via django attrs==23.2.0 # via openedx-events -backports-zoneinfo[tzdata]==0.2.1 ; python_version < "3.9" - # via - # -c requirements/constraints.txt - # celery - # kombu billiard==4.2.0 # via celery +camel-converter[pydantic]==3.1.2 + # via meilisearch celery==5.3.6 # via event-tracking certifi==2024.2.2 - # via elasticsearch + # via + # elasticsearch + # requests cffi==1.16.0 # via pynacl +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # celery @@ -76,12 +79,16 @@ event-tracking==2.3.0 # via -r requirements/base.in fastavro==1.9.4 # via openedx-events +idna==3.7 + # via requests jinja2==3.1.3 # via code-annotations kombu==5.3.5 # via celery markupsafe==2.1.5 # via jinja2 +meilisearch==0.31.1 + # via -r requirements/base.in newrelic==9.6.0 # via edx-django-utils openedx-events==9.5.2 @@ -94,6 +101,10 @@ psutil==5.9.8 # via edx-django-utils pycparser==2.21 # via cffi +pydantic==2.7.4 + # via camel-converter +pydantic-core==2.18.4 + # via pydantic pymongo==3.13.0 # via # edx-opaque-keys @@ -110,6 +121,8 @@ pytz==2024.1 # event-tracking pyyaml==6.0.1 # via code-annotations +requests==2.32.3 + # via meilisearch six==1.16.0 # via # event-tracking @@ -125,15 +138,15 @@ text-unidecode==1.3 # via python-slugify typing-extensions==4.9.0 # via - # asgiref # edx-opaque-keys - # kombu + # pydantic + # pydantic-core tzdata==2024.1 - # via - # backports-zoneinfo - # celery + # via celery urllib3==1.26.18 - # via elasticsearch + # via + # elasticsearch + # requests vine==5.1.0 # via # amqp diff --git a/requirements/testing.txt b/requirements/testing.txt index 9a1d1263..ce8d6e69 100644 --- a/requirements/testing.txt +++ b/requirements/testing.txt @@ -1,13 +1,17 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# make upgrade +# pip-compile --output-file=requirements/testing.txt requirements/testing.in # amqp==5.2.0 # via # -r requirements/base.txt # kombu +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # pydantic asgiref==3.7.2 # via # -r requirements/base.txt @@ -16,16 +20,14 @@ attrs==23.2.0 # via # -r requirements/base.txt # openedx-events -backports-zoneinfo[tzdata]==0.2.1 ; python_version < "3.9" +billiard==4.2.0 # via - # -c requirements/constraints.txt # -r requirements/base.txt # celery - # kombu -billiard==4.2.0 +camel-converter[pydantic]==3.1.2 # via # -r requirements/base.txt - # celery + # meilisearch celery==5.3.6 # via # -r requirements/base.txt @@ -34,10 +36,15 @@ certifi==2024.2.2 # via # -r requirements/base.txt # elasticsearch + # requests cffi==1.16.0 # via # -r requirements/base.txt # pynacl +charset-normalizer==3.3.2 + # via + # -r requirements/base.txt + # requests click==8.1.7 # via # -r requirements/base.txt @@ -71,6 +78,7 @@ ddt==1.3.1 # via # -c requirements/constraints.txt # -r requirements/testing.in +django==3.2.24 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/base.txt @@ -110,12 +118,14 @@ elasticsearch==7.13.4 # -r requirements/base.txt event-tracking==2.3.0 # via -r requirements/base.txt -exceptiongroup==1.2.0 - # via pytest fastavro==1.9.4 # via # -r requirements/base.txt # openedx-events +idna==3.7 + # via + # -r requirements/base.txt + # requests iniconfig==2.0.0 # via pytest jinja2==3.1.3 @@ -130,6 +140,8 @@ markupsafe==2.1.5 # via # -r requirements/base.txt # jinja2 +meilisearch==0.31.1 + # via -r requirements/base.txt mock==5.1.0 # via -r requirements/testing.in newrelic==9.6.0 @@ -160,6 +172,14 @@ pycparser==2.21 # via # -r requirements/base.txt # cffi +pydantic==2.7.4 + # via + # -r requirements/base.txt + # camel-converter +pydantic-core==2.18.4 + # via + # -r requirements/base.txt + # pydantic pymongo==3.13.0 # via # -r requirements/base.txt @@ -190,6 +210,10 @@ pyyaml==6.0.1 # via # -r requirements/base.txt # code-annotations +requests==2.32.3 + # via + # -r requirements/base.txt + # meilisearch six==1.16.0 # via # -r requirements/base.txt @@ -209,25 +233,21 @@ text-unidecode==1.3 # via # -r requirements/base.txt # python-slugify -tomli==2.0.1 - # via - # coverage - # pytest typing-extensions==4.9.0 # via # -r requirements/base.txt - # asgiref # edx-opaque-keys - # kombu + # pydantic + # pydantic-core tzdata==2024.1 # via # -r requirements/base.txt - # backports-zoneinfo # celery urllib3==1.26.18 # via # -r requirements/base.txt # elasticsearch + # requests vine==5.1.0 # via # -r requirements/base.txt From 1e4261ad66291b255fa35b7a1c3f16103576c8e7 Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Thu, 20 Jun 2024 17:32:43 +0500 Subject: [PATCH 07/13] add meilisearch backend --- search/elastic.py | 2 +- search/meilisearch.py | 309 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 search/meilisearch.py diff --git a/search/elastic.py b/search/elastic.py index 6a942ce5..c6bf7db1 100644 --- a/search/elastic.py +++ b/search/elastic.py @@ -244,7 +244,7 @@ class ElasticSearchEngine(SearchEngine): """ ElasticSearch implementation of SearchEngine abstraction """ - + backend_name = "elasticsearch" @staticmethod def get_cache_item_name(index_name): """ diff --git a/search/meilisearch.py b/search/meilisearch.py new file mode 100644 index 00000000..6f3561ac --- /dev/null +++ b/search/meilisearch.py @@ -0,0 +1,309 @@ +""" +Meilisearch implementation for courseware search index +""" +import copy +import hashlib +import logging +from datetime import datetime + +from django.conf import settings +from django.core.cache import cache +from meilisearch import Client, errors +from opaque_keys.edx.keys import UsageKey + +from search.search_engine_base import SearchEngine +from search.utils import ValueRange, _is_iterable + +# log appears to be standard name used for logger +log = logging.getLogger(__name__) + +RESERVED_CHARACTERS = "+=> str: + return hashlib.md5(f"{id}".encode('utf-8')).hexdigest() + + +def sanitized_id(source: dict, create_usage_key=True) -> dict: + if "id" not in source: + return source + + try: + usage_key = UsageKey.from_string(source['id']) + if create_usage_key: + source["usage_key"] = source["id"] + source["id"] = usage_key.block_id + except Exception as ex: + source["id"] = sanitize_id(source["id"]) + log.info(f"{str(ex)} - {source['id']} - {type(ex)}") + + return source + + +def filter_builder(_filters: list[dict]) -> list[str]: + if not _filters: + return [] + str_filters = [] + + for f in _filters: + if "id" in f: + f.update(**sanitized_id(f.copy(), create_usage_key=False)) + for key, val in f.items(): + str_filters.append(f"{key}=val") + + return [ + " OR ".join(str_filters) + ] + + +def serialize_datetimes(source): + """ + Recursively convert all datetime objects in a dictionary to strings. + """ + if isinstance(source.get("id"), str): + source.update(**sanitized_id(source.copy())) + + for key, value in source.items(): + if isinstance(value, datetime): + source[key] = value.isoformat() + elif isinstance(value, dict): + serialize_datetimes(value) + elif isinstance(value, list): + for item in value: + if isinstance(item, datetime): + item = item.isoformat() + elif isinstance(item, dict): + serialize_datetimes(item) + return source + + +def _translate_hits(ms_response): + """ + Provide result set in our desired format from Meilisearch results. + """ + + def translate_result(result): + """ + Any conversion from Meilisearch result syntax into our search engine syntax + """ + translated_result = copy.copy(result) + translated_result["data"] = translated_result.pop("data", {}) + translated_result["score"] = translated_result.pop("_score", 1.0) + return translated_result + + results = list(map(translate_result, ms_response["hits"])) + response = { + "took": ms_response["processingTimeMs"], + "total": ms_response["estimatedTotalHits"], + "max_score": max(result["score"] for result in results) if results else None, + "results": results, + } + if "aggregations" in ms_response: + response["aggs"] = ms_response["aggregations"] + + return response + + +def _get_filter_field(field_name, field_value): + """ + Return field to apply into filter. + """ + filter_query_field = {field_name: field_value} + if isinstance(field_value, ValueRange): + filter_query_field = { + field_name: [field_value.lower, field_value.upper] + } + elif _is_iterable(field_value): + filter_query_field = { + field_name: field_value, + } + return filter_query_field + + +def _process_field_queries(field_dictionary): + """ + Prepare Meilisearch query which must be in the Meilisearch record set. + """ + return [ + _get_filter_field(field, field_value) + for field, field_value in field_dictionary.items() + ] + + +def _process_filters(filter_dictionary): + """ + Build list for filtering. + """ + for field, value in filter_dictionary.items(): + if value: + yield _get_filter_field(field, value) + + +def _process_exclude_dictionary(exclude_dictionary): + """ + Build a list of term fields which will be excluded from result set. + """ + for exclude_property, exclude_values in exclude_dictionary.items(): + if not isinstance(exclude_values, list): + exclude_values = (exclude_values,) + yield from ( + {exclude_property: exclude_value} + for exclude_value in exclude_values + ) + + +def _process_aggregation_terms(aggregation_terms): + """ + Meilisearch does not support aggregations natively as Elasticsearch. + """ + return aggregation_terms + + +class MeiliSearchEngine(SearchEngine): + """ + Meilisearch implementation of SearchEngine abstraction + """ + backend_name = "meilisearch" + + @staticmethod + def get_cache_item_name(index_name): + """ + Name-formatter for cache_item_name + """ + return f"meili_search_mappings_{index_name}" + + @classmethod + def get_mappings(cls, index_name): + """ + Fetch mapped-items structure from cache + """ + return cache.get(cls.get_cache_item_name(index_name), {}) + + @classmethod + def set_mappings(cls, index_name, mappings): + """ + Set new mapped-items structure into cache + """ + cache.set(cls.get_cache_item_name(index_name), mappings) + + @classmethod + def log_indexing_error(cls, indexing_errors): + """ + Logs indexing errors and raises a general Meilisearch Exception + """ + raise errors.MeilisearchApiError(', '.join(map(str, indexing_errors))) + + @property + def mappings(self): + """ + Get mapping of current index. + + Mappings format in Meilisearch is different from Elasticsearch. + """ + mapping = MeiliSearchEngine.get_mappings(self._prefixed_index_name) + if not mapping: + # Assuming Meilisearch mappings are pre-defined elsewhere + mapping = {} # Update this if there's a way to fetch mappings + if mapping: + MeiliSearchEngine.set_mappings(self._prefixed_index_name, mapping) + return mapping + + def _clear_mapping(self): + """ + Remove the cached mappings. + """ + MeiliSearchEngine.set_mappings(self._prefixed_index_name, {}) + + def __init__(self, index=None): + super().__init__(index) + MEILISEARCH_URL = getattr(settings, "MEILISEARCH_URL", 'http://127.0.0.1:7700') + MEILISEARCH_API_KEY = getattr(settings, "MEILISEARCH_API_KEY", "masterKey") + self._ms = Client(MEILISEARCH_URL, MEILISEARCH_API_KEY) + self._index = self._ms.index(self._prefixed_index_name) + # Ensure index exists + try: + self._index.fetch_info() + except errors.MeilisearchApiError: + self._ms.create_index(self._prefixed_index_name) + + @property + def _prefixed_index_name(self): + """ + Property that returns the defined index_name with the configured prefix. + """ + prefix = getattr(settings, "MEILISEARCH_INDEX_PREFIX", "") + return prefix + self.index_name + + def _check_mappings(self, body): + """ + Meilisearch doesn't require explicit mappings like Elasticsearch. + """ + pass + + def index(self, sources, **kwargs): + """ + Implements call to add documents to the Meilisearch index. + """ + try: + serialized_sources = list(map(lambda s: serialize_datetimes(s), sources)) + self._index.add_documents(serialized_sources, primary_key='id') + except errors.MeilisearchApiError as ex: + log.exception("Error during Meilisearch bulk operation.") + raise + + def remove(self, doc_ids, **kwargs): + """ + Implements call to remove the documents from the index. + """ + try: + for doc_id in doc_ids: + log.debug("Removing document with id %s", doc_id) + self._index.delete_document(doc_id) + except errors.MeilisearchApiError as ex: + log.exception("An error occurred while removing documents from the index.") + raise + + def search(self, + query_string=None, + field_dictionary=None, + filter_dictionary=None, + exclude_dictionary=None, + aggregation_terms=None, + exclude_ids=None, + use_field_match=False, + log_search_params=False, + **kwargs): + """ + Implements call to search the index for the desired content. + """ + + log.debug("searching index with %s", query_string) + filters = [] + + if query_string: + query_string = query_string.translate( + query_string.maketrans("", "", RESERVED_CHARACTERS) + ) + + if field_dictionary: + filters.extend(filter_builder(_process_field_queries(field_dictionary))) + + if filter_dictionary: + filters.extend(filter_builder(_process_filters(filter_dictionary))) + + if exclude_dictionary: + exclude_filters = list(_process_exclude_dictionary(exclude_dictionary)) + filters.extend(filter_builder(exclude_filters)) + search_params = { + "filter": filters, + } + if log_search_params: + log.info(f"full meili search body {search_params}") + + try: + ms_response = self._index.search(query_string, search_params) + except errors.MeilisearchApiError as ex: + log.exception("error while searching index - %r", ex) + raise + + return _translate_hits(ms_response) From 4bccabb3f9451410ed6822d09e0ee97f1924c886 Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Thu, 11 Jul 2024 11:30:23 +0500 Subject: [PATCH 08/13] fix: add meilisearch backend --- search/meilisearch.py | 124 +++++++++++++++++++++++++++++++++--------- 1 file changed, 98 insertions(+), 26 deletions(-) diff --git a/search/meilisearch.py b/search/meilisearch.py index 6f3561ac..98205779 100644 --- a/search/meilisearch.py +++ b/search/meilisearch.py @@ -9,22 +9,62 @@ from django.conf import settings from django.core.cache import cache from meilisearch import Client, errors +from opaque_keys import InvalidKeyError from opaque_keys.edx.keys import UsageKey - from search.search_engine_base import SearchEngine from search.utils import ValueRange, _is_iterable # log appears to be standard name used for logger log = logging.getLogger(__name__) +prefix = getattr(settings, "MEILISEARCH_INDEX_PREFIX", "") RESERVED_CHARACTERS = "+=> str: - return hashlib.md5(f"{id}".encode('utf-8')).hexdigest() +INDEX_SETTINGS = { + f"{prefix}library_index": { + "filterableAttributes": [ + "library", + "id" + ], + "facets": [ + "library" + ] + }, + f"{prefix}courseware_content": { + "filterableAttributes": [ + "id", + "course", + "org" + ], + "facets": [ + "org" + ] + }, + f"{prefix}course_info": { + "filterableAttributes": [ + "id", + "org", + "course", + "start", + "enrollment_start" + ], + "facets": [ + "org", + ] + }, +} + + +def sanitize_id(_id: str | int) -> str: + return hashlib.md5(f"{_id}".encode('utf-8')).hexdigest() def sanitized_id(source: dict, create_usage_key=True) -> dict: + """ + Sanitize the Id key to avoid restricted objects + :param source: + :param create_usage_key: + :return: + """ if "id" not in source: return source @@ -33,14 +73,40 @@ def sanitized_id(source: dict, create_usage_key=True) -> dict: if create_usage_key: source["usage_key"] = source["id"] source["id"] = usage_key.block_id - except Exception as ex: + except (Exception, InvalidKeyError) as ex: # pylint: disable=broad-except source["id"] = sanitize_id(source["id"]) log.info(f"{str(ex)} - {source['id']} - {type(ex)}") return source +def build_filter(key, val): + """ + This function is making meilisearch compatible filters + :param key: name of attribute + :param val: value of attribute + :return: + """ + if isinstance(val, list): + if len(val) == 2 and type(val[0]) in (datetime, type(None)) and type(val[1]) in (datetime, type(None)): + f = "" + if val[0]: + f += f"{str(key).lower()} > \"{val[0].isoformat(timespec='milliseconds')}\"" + if val[1]: + f += f" and {str(key).lower()} >= \"{val[1].isoformat(timespec='milliseconds')}\"" + return f"({f})" + + elif isinstance(val, dict): + log.info('Dict Filter Not Handled') + return f"{str(key).lower()}='{val}'" + + def filter_builder(_filters: list[dict]) -> list[str]: + """ + Create meilisearch compatible filter queries + :param _filters: + :return: + """ if not _filters: return [] str_filters = [] @@ -49,8 +115,7 @@ def filter_builder(_filters: list[dict]) -> list[str]: if "id" in f: f.update(**sanitized_id(f.copy(), create_usage_key=False)) for key, val in f.items(): - str_filters.append(f"{key}=val") - + str_filters.append(build_filter(key, val)) return [ " OR ".join(str_filters) ] @@ -87,7 +152,7 @@ def translate_result(result): Any conversion from Meilisearch result syntax into our search engine syntax """ translated_result = copy.copy(result) - translated_result["data"] = translated_result.pop("data", {}) + translated_result["data"] = {**translated_result} translated_result["score"] = translated_result.pop("_score", 1.0) return translated_result @@ -98,8 +163,11 @@ def translate_result(result): "max_score": max(result["score"] for result in results) if results else None, "results": results, } - if "aggregations" in ms_response: - response["aggs"] = ms_response["aggregations"] + if "facetDistribution" in ms_response: + aggs = {} + for key, terms in ms_response["facetDistribution"].items(): + aggs[key] = {"terms": terms} + response["aggs"] = aggs return response @@ -186,13 +254,6 @@ def set_mappings(cls, index_name, mappings): """ cache.set(cls.get_cache_item_name(index_name), mappings) - @classmethod - def log_indexing_error(cls, indexing_errors): - """ - Logs indexing errors and raises a general Meilisearch Exception - """ - raise errors.MeilisearchApiError(', '.join(map(str, indexing_errors))) - @property def mappings(self): """ @@ -214,7 +275,7 @@ def _clear_mapping(self): """ MeiliSearchEngine.set_mappings(self._prefixed_index_name, {}) - def __init__(self, index=None): + def __init__(self, index=None, options=None): super().__init__(index) MEILISEARCH_URL = getattr(settings, "MEILISEARCH_URL", 'http://127.0.0.1:7700') MEILISEARCH_API_KEY = getattr(settings, "MEILISEARCH_API_KEY", "masterKey") @@ -224,28 +285,27 @@ def __init__(self, index=None): try: self._index.fetch_info() except errors.MeilisearchApiError: - self._ms.create_index(self._prefixed_index_name) + self._ms.create_index(self._prefixed_index_name, options=options) + self.update_settings() @property def _prefixed_index_name(self): """ Property that returns the defined index_name with the configured prefix. """ - prefix = getattr(settings, "MEILISEARCH_INDEX_PREFIX", "") return prefix + self.index_name def _check_mappings(self, body): """ Meilisearch doesn't require explicit mappings like Elasticsearch. """ - pass def index(self, sources, **kwargs): """ Implements call to add documents to the Meilisearch index. """ try: - serialized_sources = list(map(lambda s: serialize_datetimes(s), sources)) + serialized_sources = list(map(serialize_datetimes, sources)) self._index.add_documents(serialized_sources, primary_key='id') except errors.MeilisearchApiError as ex: log.exception("Error during Meilisearch bulk operation.") @@ -279,6 +339,7 @@ def search(self, log.debug("searching index with %s", query_string) filters = [] + filterables = INDEX_SETTINGS.get(self._prefixed_index_name, {}).get('filterableAttributes', []) if query_string: query_string = query_string.translate( @@ -289,13 +350,15 @@ def search(self, filters.extend(filter_builder(_process_field_queries(field_dictionary))) if filter_dictionary: + filter_dictionary = { + fl: filter_dictionary.get(fl, None) for fl in filterables if + filter_dictionary.get(fl, None) + } filters.extend(filter_builder(_process_filters(filter_dictionary))) - if exclude_dictionary: - exclude_filters = list(_process_exclude_dictionary(exclude_dictionary)) - filters.extend(filter_builder(exclude_filters)) search_params = { "filter": filters, + "facets": INDEX_SETTINGS.get(self._prefixed_index_name, {}).get('facets', []) } if log_search_params: log.info(f"full meili search body {search_params}") @@ -307,3 +370,12 @@ def search(self, raise return _translate_hits(ms_response) + + def update_settings(self): + """ + update index specific settings + :return: + """ + # Define filterable attributes + + return self._index.update_settings(INDEX_SETTINGS.get(self._prefixed_index_name, {})) From edee7aea0100c6418af0a12417c0a7b8e25d72ae Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Thu, 11 Jul 2024 13:30:55 +0500 Subject: [PATCH 09/13] fix: added searchable --- search/meilisearch.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/search/meilisearch.py b/search/meilisearch.py index 98205779..71265874 100644 --- a/search/meilisearch.py +++ b/search/meilisearch.py @@ -51,6 +51,12 @@ "org", ] }, + f"{prefix}course_team_index": { + "filterableAttributes": [ + "course_id" + ] + }, + "facets": [] } From e64a5bc1ccac99dc4e9bd083705928dad4375a2c Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Thu, 11 Jul 2024 17:36:48 +0500 Subject: [PATCH 10/13] fix: added searchable --- search/meilisearch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/search/meilisearch.py b/search/meilisearch.py index 71265874..45af6593 100644 --- a/search/meilisearch.py +++ b/search/meilisearch.py @@ -53,7 +53,9 @@ }, f"{prefix}course_team_index": { "filterableAttributes": [ - "course_id" + "topic_id", + "organization_protected", + "course_id", ] }, "facets": [] From 3a240df12c0b888ef97433cc9ccb241ce384a2d5 Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Mon, 15 Jul 2024 11:34:11 +0500 Subject: [PATCH 11/13] fix: remove auto suggest --- edxsearch/settings.py | 4 +- search/api.py | 101 ------------------------------------- search/meilisearch.py | 46 ++--------------- search/tests/test_views.py | 15 ------ search/tests/utils.py | 9 ---- search/urls.py | 1 - search/views.py | 27 +--------- 7 files changed, 5 insertions(+), 198 deletions(-) diff --git a/edxsearch/settings.py b/edxsearch/settings.py index b73afbf7..1c2cfd7e 100644 --- a/edxsearch/settings.py +++ b/edxsearch/settings.py @@ -26,7 +26,7 @@ # This is just a container for running tests DEBUG = True -ALLOWED_HOSTS = ['*'] +ALLOWED_HOSTS = [] TEMPLATES = [ { @@ -105,8 +105,6 @@ ################### Using Meilisearch (Beta) ################### -# Enable Studio search features (powered by Meilisearch) (beta, off by default) -MEILISEARCH_ENABLED = False # Meilisearch URL that the python backend can use. Often points to another docker container or k8s service. MEILISEARCH_URL = os.getenv('MEILISEARCH_URL', 'http://localhost:7700') # URL that browsers (end users) can use to reach Meilisearch. Should be HTTPS in production. diff --git a/search/api.py b/search/api.py index c74ff4a0..43d2ed18 100644 --- a/search/api.py +++ b/search/api.py @@ -1,8 +1,6 @@ """ search business logic implementations """ from datetime import datetime - -import meilisearch from django.conf import settings from eventtracking import tracker as track @@ -160,102 +158,3 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary ) return results - - -def _meilisearch_auto_suggest_search_api(term, course_id, limit=30): - """ - Perform an auto-suggest search using the Elasticsearch search engine. - - Args: - term (str): The search term. - course_id (str): The ID of the course to filter the search results. - limit (int, optional): The maximum number of results to return. Defaults to 30. - - Returns: - list: A list of dictionaries containing the search results with 'id', 'display_name', and 'usage_key'. - """ - # Create a client instance for MeiliSearch - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - - # Define the index name - index_name = settings.MEILISEARCH_INDEX_PREFIX + "studio_content" - - # Perform the search with specified facets and filters - results = client.index(index_name).search(term, { - "facets": ["block_type", "tags"], - "filter": [f"context_key='{course_id}'"], - "limit": limit - }) - - # Process the search hits to extract relevant fields - results = list(map(lambda it: { - "id": it["id"], - "display_name": it["display_name"], - "usage_key": it["usage_key"], - }, results["hits"])) - - return results - - -def _elasticsearch_auto_suggest_search_api(term, course_id, limit=30): - """ - Perform an auto-suggest search using either Elasticsearch or MeiliSearch based on configuration. - - Args: - term (str): The search term. - course_id (str): The ID of the course to filter the search results. - limit (int, optional): The maximum number of results to return. Defaults to 30. - - Returns: - list: A list of dictionaries containing the search results with 'id', 'display_name' and 'usage_key'. - """ - - # Get the search engine instance - searcher = SearchEngine.get_search_engine( - getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") - ) - - # Perform the search with the specified query string, size, and field dictionary - results = searcher.search( - query_string=term, - size=limit, - field_dictionary={"course": course_id} - ) - - # Process the search results to extract relevant fields - results = list(map(lambda it: { - "id": it["_id"], - "display_name": it["data"]["content"]["display_name"], - "usage_key": it["_id"], - }, results["results"])) - - return results - - -def auto_suggest_search_api(term, course_id, limit=30): - """ - Perform an auto-suggest search using the MeiliSearch search engine. - - Args: - term (str): The search term. - course_id (str): The ID of the course to filter the search results. - limit (int, optional): The maximum number of results to return. Defaults to 30. - - Returns: - list: A list of dictionaries containing the search results with 'id', 'display_name' and 'usage_key'. - """ - # Initialize response dictionary - response = {"results": []} - - # Check which search engine to use based on settings - if getattr(settings, "MEILISEARCH_ENABLED", False): - # Use MeiliSearch otherwise - results = _meilisearch_auto_suggest_search_api(term, course_id, limit) - else: - # Use Elasticsearch if MEILISEARCH_ENABLED is set to True - results = _elasticsearch_auto_suggest_search_api(term, course_id, limit) - - # Update response with the search results - response.update(results=results) - - return response diff --git a/search/meilisearch.py b/search/meilisearch.py index 45af6593..46a62706 100644 --- a/search/meilisearch.py +++ b/search/meilisearch.py @@ -1,5 +1,5 @@ """ -Meilisearch implementation for courseware search index +Meilisearch backend """ import copy import hashlib @@ -248,45 +248,10 @@ def get_cache_item_name(index_name): """ return f"meili_search_mappings_{index_name}" - @classmethod - def get_mappings(cls, index_name): - """ - Fetch mapped-items structure from cache - """ - return cache.get(cls.get_cache_item_name(index_name), {}) - - @classmethod - def set_mappings(cls, index_name, mappings): - """ - Set new mapped-items structure into cache - """ - cache.set(cls.get_cache_item_name(index_name), mappings) - - @property - def mappings(self): - """ - Get mapping of current index. - - Mappings format in Meilisearch is different from Elasticsearch. - """ - mapping = MeiliSearchEngine.get_mappings(self._prefixed_index_name) - if not mapping: - # Assuming Meilisearch mappings are pre-defined elsewhere - mapping = {} # Update this if there's a way to fetch mappings - if mapping: - MeiliSearchEngine.set_mappings(self._prefixed_index_name, mapping) - return mapping - - def _clear_mapping(self): - """ - Remove the cached mappings. - """ - MeiliSearchEngine.set_mappings(self._prefixed_index_name, {}) - def __init__(self, index=None, options=None): super().__init__(index) - MEILISEARCH_URL = getattr(settings, "MEILISEARCH_URL", 'http://127.0.0.1:7700') - MEILISEARCH_API_KEY = getattr(settings, "MEILISEARCH_API_KEY", "masterKey") + MEILISEARCH_URL = getattr(settings, "MEILISEARCH_URL") + MEILISEARCH_API_KEY = getattr(settings, "MEILISEARCH_API_KEY") self._ms = Client(MEILISEARCH_URL, MEILISEARCH_API_KEY) self._index = self._ms.index(self._prefixed_index_name) # Ensure index exists @@ -303,11 +268,6 @@ def _prefixed_index_name(self): """ return prefix + self.index_name - def _check_mappings(self, body): - """ - Meilisearch doesn't require explicit mappings like Elasticsearch. - """ - def index(self, sources, **kwargs): """ Implements call to add documents to the Meilisearch index. diff --git a/search/tests/test_views.py b/search/tests/test_views.py index 85acc290..65861dde 100644 --- a/search/tests/test_views.py +++ b/search/tests/test_views.py @@ -497,18 +497,3 @@ def test_valid_search(self, query, course_id, result_count): code, results = post_request({"search_string": query}, course_id) self.assertTrue(199 < code < 300) self.assertEqual(results["total"], result_count) - - -class TestAutoSuggestView(TestCase): - @override_settings(MEILISEARCH_ENABLED=True) - def test_valid_search_with_meilisearch(self): - endpoint = reverse('auto_suggest_search', args={'course-v1:Demo+DM101+2024'}) - print(endpoint) - status, results = get_request(f'{endpoint}?term=open') - self.assertTrue(status == 200) - - @override_settings(MEILISEARCH_ENABLED=False) - def test_valid_search_with_elastic(self): - endpoint = reverse('auto_suggest_search', args={'course-v1:Demo+DM101+2024'}) - status, results = get_request(f'{endpoint}?term=open') - self.assertTrue(status == 200) diff --git a/search/tests/utils.py b/search/tests/utils.py index ad3c53df..5d38d616 100644 --- a/search/tests/utils.py +++ b/search/tests/utils.py @@ -83,12 +83,3 @@ class ErroringElasticImpl(Elasticsearch): def search(self, **kwargs): # pylint: disable=arguments-differ """ this will definitely fail """ raise exceptions.ElasticsearchException("This search operation failed") - - -def get_request(url): - """ - Helper method to get the request and process the response - """ - response = Client().post(url) - - return getattr(response, "status_code", 500), json.loads(getattr(response, "content", None).decode('utf-8')) diff --git a/search/urls.py b/search/urls.py index e9600fbf..10439d54 100644 --- a/search/urls.py +++ b/search/urls.py @@ -11,6 +11,5 @@ urlpatterns = [ path('', views.do_search, name='do_search'), re_path(r'^{}$'.format(COURSE_ID_PATTERN), views.do_search, name='do_search'), - re_path(r'^{}/auto_suggest_search$'.format(COURSE_ID_PATTERN), views.auto_suggest_search, name='auto_suggest_search'), path('course_discovery/', views.course_discovery, name='course_discovery'), ] diff --git a/search/views.py b/search/views.py index 6d033f1d..cda536b2 100644 --- a/search/views.py +++ b/search/views.py @@ -3,14 +3,13 @@ import logging -import meilisearch from django.conf import settings from django.http import JsonResponse from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from eventtracking import tracker as track -from .api import perform_search, course_discovery_search, course_discovery_filter_fields, auto_suggest_search_api +from .api import perform_search, course_discovery_search, course_discovery_filter_fields from .initializer import SearchInitializer # log appears to be standard name used for logger @@ -220,27 +219,3 @@ def course_discovery(request): ) return JsonResponse(results, status=status_code) - - -def auto_suggest_search(request, course_id=None): - """ - Django view to perform an auto-suggest search and return the results as a JSON response. - - Args: - request (HttpRequest): The HTTP request object. - course_id (str, optional): The ID of the course to filter the search results. Defaults to None. - - Returns: - JsonResponse: A JSON response containing the search results. - """ - # Get the search term from the request parameters, defaulting to None if not provided - term = request.GET.get('term', None) - - # Get the limit for the number of results from the request parameters, defaulting to 30 if not provided - limit = request.GET.get('limit', 30) - - # Call the search API function to perform the search - search_results = auto_suggest_search_api(term, course_id=course_id, limit=limit) - - # Return the search results as a JSON response - return JsonResponse(search_results) From 2fe2800f8c98c12a1f58898c2467e0b098754939 Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Mon, 15 Jul 2024 11:35:06 +0500 Subject: [PATCH 12/13] fix: remove auto suggest --- search/tests/test_views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/search/tests/test_views.py b/search/tests/test_views.py index 65861dde..7c3aff46 100644 --- a/search/tests/test_views.py +++ b/search/tests/test_views.py @@ -4,7 +4,7 @@ from unittest.mock import patch, call import ddt -from django.urls import Resolver404, resolve, reverse +from django.urls import Resolver404, resolve from django.test import TestCase from django.test.utils import override_settings from waffle.testutils import override_switch @@ -13,7 +13,7 @@ from search.search_engine_base import DEFAULT_ELASTIC_SEARCH_SWITCH from search.elastic import ElasticSearchEngine from search.tests.mock_search_engine import MockSearchEngine -from search.tests.utils import post_request, SearcherMixin, TEST_INDEX_NAME, get_request +from search.tests.utils import post_request, SearcherMixin, TEST_INDEX_NAME # Any class that inherits from TestCase will cause too-many-public-methods pylint error From 4450a37fdc44aabefd121ea55720884598584295 Mon Sep 17 00:00:00 2001 From: qasimgulzar Date: Mon, 15 Jul 2024 11:40:59 +0500 Subject: [PATCH 13/13] fix: move index settings to LMS settings --- search/meilisearch.py | 43 +------------------------------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/search/meilisearch.py b/search/meilisearch.py index 46a62706..7b4e5054 100644 --- a/search/meilisearch.py +++ b/search/meilisearch.py @@ -19,47 +19,7 @@ prefix = getattr(settings, "MEILISEARCH_INDEX_PREFIX", "") RESERVED_CHARACTERS = "+=> str: @@ -83,7 +43,6 @@ def sanitized_id(source: dict, create_usage_key=True) -> dict: source["id"] = usage_key.block_id except (Exception, InvalidKeyError) as ex: # pylint: disable=broad-except source["id"] = sanitize_id(source["id"]) - log.info(f"{str(ex)} - {source['id']} - {type(ex)}") return source