From 6a240d91ecd5294a81c8c92b572cd384aeba087b Mon Sep 17 00:00:00 2001 From: Vinicius Date: Sat, 13 Apr 2024 11:16:26 -0300 Subject: [PATCH] fix: store data in index to increase speed --- bd_api/apps/api/v1/models.py | 59 --------- bd_api/apps/api/v1/search_indexes.py | 188 +++++++++++++++++++++++---- bd_api/apps/api/v1/search_views.py | 85 +++++++++++- 3 files changed, 246 insertions(+), 86 deletions(-) diff --git a/bd_api/apps/api/v1/models.py b/bd_api/apps/api/v1/models.py index 97d945da..63f27784 100644 --- a/bd_api/apps/api/v1/models.py +++ b/bd_api/apps/api/v1/models.py @@ -353,13 +353,6 @@ class Meta: verbose_name_plural = "Tags" ordering = ["slug"] - @property - def as_search_result(self): - return { - "name": self.name, - "slug": self.slug, - } - class Theme(BaseModel): """Theme model""" @@ -383,13 +376,6 @@ class Meta: verbose_name_plural = "Themes" ordering = ["slug"] - @property - def as_search_result(self): - return { - "name": self.name, - "slug": self.slug, - } - class Organization(BaseModel): """Organization model""" @@ -440,17 +426,6 @@ def has_picture(self): return True return False - @property - def as_search_result(self): - return { - "id": self.pk, - "name": self.name, - "slug": self.slug, - "description": self.description, - "picture": getattr(self.picture, "name", None), - "website": self.website, - } - class Status(BaseModel): """Status model""" @@ -681,33 +656,6 @@ def raw_data_source_last_updated_at(self): ] # fmt: skip return max(updates) if updates else None - @property - def as_search_result(self): - return { - "updated_at": self.updated_at, - "id": self.id, - "slug": self.slug, - "name": self.name, - "temporal_coverage": [self.coverage], - "organization": [self.organization.as_search_result], - "tags": [t.as_search_result for t in self.tags.all()], - "themes": [t.as_search_result for t in self.themes.all()], - "entities": self.entities, - "contains_open_data": self.contains_open_data, - "contains_closed_data": self.contains_closed_data, - "contains_tables": self.contains_tables, - "contains_raw_data_sources": self.contains_raw_data_sources, - "contains_information_requests": self.contains_information_requests, - "n_tables": self.n_tables, - "n_raw_data_sources": self.n_raw_data_sources, - "n_information_requests": self.n_information_requests, - "first_table_id": self.first_table_id, - "first_open_table_id": self.first_open_table_id, - "first_closed_table_id": self.first_closed_table_id, - "first_raw_data_source_id": self.first_raw_data_source_id, - "first_information_request_id": self.first_information_request_id, - } - class Update(BaseModel): id = models.UUIDField(primary_key=True, default=uuid4) @@ -1488,13 +1436,6 @@ class Meta: verbose_name_plural = "Entities" ordering = ["slug"] - @property - def as_search_result(self): - return { - "name": self.name, - "slug": self.slug, - } - class ObservationLevel(BaseModel): """Model definition for ObservationLevel.""" diff --git a/bd_api/apps/api/v1/search_indexes.py b/bd_api/apps/api/v1/search_indexes.py index d59e1c38..9c6536fe 100644 --- a/bd_api/apps/api/v1/search_indexes.py +++ b/bd_api/apps/api/v1/search_indexes.py @@ -7,40 +7,184 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) - dataset = indexes.CharField(model_attr="slug", null=True, faceted=True) - dataset_name = indexes.CharField(model_attr="name", null=True) - dataset_description = indexes.CharField(model_attr="description", null=True) + updated_at = indexes.DateTimeField(model_attr="updated_at") - table = indexes.MultiValueField(model_attr="tables__slug", null=True, faceted=True) - table_names = indexes.MultiValueField(model_attr="tables__name", null=True) - table_descriptions = indexes.MultiValueField(model_attr="tables__description", null=True) + dataset_slug = indexes.CharField( + model_attr="slug", + indexed=False, + null=True, + ) + dataset_name = indexes.CharField( + model_attr="name", + indexed=False, + null=True, + ) + dataset_description = indexes.CharField( + model_attr="description", + indexed=False, + null=True, + ) + + table_id = indexes.MultiValueField( + model_attr="tables__pk", + indexed=False, + null=True, + ) + table_slug = indexes.MultiValueField( + model_attr="tables__slug", + indexed=False, + null=True, + ) + table_name = indexes.MultiValueField( + model_attr="tables__name", + indexed=False, + null=True, + ) + table_description = indexes.MultiValueField( + model_attr="tables__description", indexed=False, null=True + ) + + organization_pk = indexes.MultiValueField( + model_attr="organization__pk", + faceted=True, + indexed=False, + null=True, + ) + organization_slug = indexes.MultiValueField( + model_attr="organization__slug", + faceted=True, + indexed=False, + null=True, + ) + organization_name = indexes.MultiValueField( + model_attr="organization__name", + indexed=False, + null=True, + ) + organization_picture = indexes.MultiValueField( + model_attr="organization__picture", + indexed=False, + null=True, + ) + organization_website = indexes.MultiValueField( + model_attr="organization__website", + indexed=False, + null=True, + ) + organization_description = indexes.MultiValueField( + model_attr="organization__description", + indexed=False, + null=True, + ) + + tag_slug = indexes.MultiValueField( + model_attr="tags__slug", + faceted=True, + indexed=False, + null=True, + ) + tag_name = indexes.MultiValueField( + model_attr="tags__name", + indexed=False, + null=True, + ) - organization = indexes.CharField(model_attr="organization__slug", null=True, faceted=True) - organization_names = indexes.CharField(model_attr="organization__name", null=True) - organization_descriptions = indexes.CharField(model_attr="organization__description", null=True) + theme_slug = indexes.MultiValueField( + model_attr="themes__slug", + faceted=True, + indexed=False, + null=True, + ) + theme_name = indexes.MultiValueField( + model_attr="themes__name", + indexed=False, + null=True, + ) - tag = indexes.MultiValueField(model_attr="tags__slug", null=True, faceted=True) - tag_names = indexes.MultiValueField(model_attr="tags__name", null=True) + entity_slug = indexes.MultiValueField( + model_attr="tables__observation_levels__entity__slug", + faceted=True, + indexed=False, + null=True, + ) + entity_name = indexes.MultiValueField( + model_attr="tables__observation_levels__entity__name", + faceted=True, + indexed=False, + null=True, + ) - theme = indexes.MultiValueField(model_attr="themes__slug", null=True, faceted=True) - theme_names = indexes.MultiValueField(model_attr="themes__name", null=True) + temporal_coverage = indexes.MultiValueField( + model_attr="coverage", + indexed=False, + ) - entity = indexes.MultiValueField( - model_attr="tables__observation_levels__entity__slug", null=True, faceted=True + contains_open_data = indexes.BooleanField( + model_attr="contains_open_data", + indexed=False, + ) + contains_closed_data = indexes.BooleanField( + model_attr="contains_closed_data", + indexed=False, + ) + + contains_tables = indexes.BooleanField( + model_attr="contains_tables", + indexed=False, + ) + contains_raw_data_sources = indexes.BooleanField( + model_attr="contains_raw_data_sources", + indexed=False, ) - entity_names = indexes.MultiValueField( - model_attr="tables__observation_levels__entity__name", null=True, faceted=True + contains_information_requests = indexes.BooleanField( + model_attr="contains_information_requests", + indexed=False, ) - contains_open_data = indexes.BooleanField(model_attr="contains_open_data") - contains_closed_data = indexes.BooleanField(model_attr="contains_closed_data") + n_tables = indexes.IntegerField( + model_attr="n_tables", + indexed=False, + ) + n_raw_data_sources = indexes.IntegerField( + model_attr="n_raw_data_sources", + indexed=False, + ) + n_information_requests = indexes.IntegerField( + model_attr="n_information_requests", + indexed=False, + ) - contains_tables = indexes.BooleanField(model_attr="contains_tables") - contains_raw_data_sources = indexes.BooleanField(model_attr="contains_raw_data_sources") - contains_information_requests = indexes.BooleanField(model_attr="contains_information_requests") + first_table_id = indexes.CharField( + model_attr="first_table_id", + indexed=False, + null=True, + ) + first_open_table_id = indexes.CharField( + model_attr="first_open_table_id", + indexed=False, + null=True, + ) + first_closed_table_id = indexes.CharField( + model_attr="first_closed_table_id", + indexed=False, + null=True, + ) + first_raw_data_source_id = indexes.CharField( + model_attr="first_raw_data_source_id", + indexed=False, + null=True, + ) + first_information_request_id = indexes.CharField( + model_attr="first_information_request_id", + indexed=False, + null=True, + ) def get_model(self): return Dataset def index_queryset(self, using=None): return self.get_model().objects.exclude(status__slug="under_review").all() + + def prepare_organization_picture(self, obj): + return getattr(obj.organization.picture, "name", None) diff --git a/bd_api/apps/api/v1/search_views.py b/bd_api/apps/api/v1/search_views.py index ce37f900..89dd9486 100644 --- a/bd_api/apps/api/v1/search_views.py +++ b/bd_api/apps/api/v1/search_views.py @@ -2,6 +2,7 @@ from django.http import JsonResponse from haystack.forms import FacetedSearchForm from haystack.generic_views import FacetedSearchView +from haystack.models import SearchResult from haystack.query import SearchQuerySet from bd_api.apps.api.v1.models import Entity, Organization, Tag, Theme @@ -51,10 +52,10 @@ def get_facets(self, sqs: SearchQuerySet): } ) for key, model in [ - ("tag", Tag), - ("theme", Theme), - ("entity", Entity), - ("organization", Organization), + ("tag_slug", Tag), + ("theme_slug", Theme), + ("entity_slug", Entity), + ("organization_slug", Organization), ]: m = model.objects.values("slug", "name") m = {mi["slug"]: mi["name"] for mi in m.all()} @@ -67,4 +68,78 @@ def key(r): return (r.contains_tables, r.score) results = sorted(sqs.all(), key=key, reverse=True) - return [r.object.as_search_result for r in results] + return [as_search_result(r) for r in results] + + +def as_search_result(result: SearchResult): + organization = [] + for pk, slug, name, picture, website, description in zip( + result.organization_pk, + result.organization_slug, + result.organization_name, + result.organization_picture, + result.organization_website, + result.organization_description, + ): + organization.append( + { + "pk": pk, + "slug": slug, + "name": name, + "picture": picture, + "website": website, + "description": description, + } + ) + + tags = [] + for slug, name in zip(result.tag_slug, result.tag_name): + tags.append( + { + "slug": slug, + "name": name, + } + ) + + themes = [] + for slug, name in zip(result.theme_slug, result.theme_name): + themes.append( + { + "slug": slug, + "name": name, + } + ) + + entities = [] + for slug, name in zip(result.entity_slug, result.entity_name): + entities.append( + { + "slug": slug, + "name": name, + } + ) + + return { + "updated_at": result.updated_at, + "id": result.dataset_pk, + "slug": result.dataset_slug, + "name": result.dataset_name, + "organization": organization, + "tags": tags, + "themes": themes, + "entities": entities, + "temporal_coverage": result.temporal_coverage, + "contains_open_data": result.contains_open_data, + "contains_closed_data": result.contains_closed_data, + "contains_tables": result.contains_tables, + "contains_raw_data_sources": result.contains_raw_data_sources, + "contains_information_requests": result.contains_information_requests, + "n_tables": result.n_tables, + "n_raw_data_sources": result.n_raw_data_sources, + "n_information_requests": result.n_information_requests, + "first_table_id": result.first_table_id, + "first_open_table_id": result.first_open_table_id, + "first_closed_table_id": result.first_closed_table_id, + "first_raw_data_source_id": result.first_raw_data_source_id, + "first_information_request_id": result.first_information_request_id, + }