diff --git a/backend/apps/api/v1/admin.py b/backend/apps/api/v1/admin.py index 74940767..f35445c4 100644 --- a/backend/apps/api/v1/admin.py +++ b/backend/apps/api/v1/admin.py @@ -214,7 +214,7 @@ class DateTimeRangeInline(admin.StackedInline): extra = 0 show_change_link = True fields = [ - "unit", + "units", ] @@ -548,7 +548,7 @@ class TableAdmin(OrderedInlineModelAdminMixin, TabbedTranslationAdmin): "partitions", "created_at", "updated_at", - "coverage_datetime_unit", + "coverage_datetime_units", ] search_fields = [ "name", @@ -736,6 +736,15 @@ def queryset(self, request, queryset): return queryset.filter(key__isnull=False) +class UnitsInline(admin.TabularInline): + model = DateTimeRange.units.through + extra = 0 + fields = ["column"] + raw_id_fields = ["column"] + verbose_name = "Unit" + verbose_name_plural = "Units" + + class DateTimeRangeAdmin(admin.ModelAdmin): list_display = ["__str__", "coverage"] readonly_fields = ["id"] @@ -753,8 +762,12 @@ class DateTimeRangeAdmin(admin.ModelAdmin): "end_hour", "end_minute", "end_second", + "units", ] + inlines = [UnitsInline] + raw_id_fields = ["coverage"] + class CoverageAdmin(admin.ModelAdmin): readonly_fields = ["id"] diff --git a/backend/apps/api/v1/migrations/0036_datetimerange_unit.py b/backend/apps/api/v1/migrations/0036_datetimerange_unit.py deleted file mode 100644 index 7243660e..00000000 --- a/backend/apps/api/v1/migrations/0036_datetimerange_unit.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 4.2.13 on 2024-07-03 00:43 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("v1", "0035_alter_poll_entity_alter_poll_frequency_and_more"), - ] - - operations = [ - migrations.AddField( - model_name="datetimerange", - name="unit", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="datetime_ranges", - to="v1.column", - ), - ), - ] diff --git a/backend/apps/api/v1/migrations/0036_datetimerange_units.py b/backend/apps/api/v1/migrations/0036_datetimerange_units.py new file mode 100644 index 00000000..b70314a9 --- /dev/null +++ b/backend/apps/api/v1/migrations/0036_datetimerange_units.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.13 on 2024-09-23 01:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('v1', '0035_alter_poll_entity_alter_poll_frequency_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='datetimerange', + name='units', + field=models.ManyToManyField(blank=True, related_name='datetime_ranges', to='v1.column'), + ), + ] diff --git a/backend/apps/api/v1/models.py b/backend/apps/api/v1/models.py index bfba80f4..dc6bdcc8 100644 --- a/backend/apps/api/v1/models.py +++ b/backend/apps/api/v1/models.py @@ -979,12 +979,16 @@ def data_cleaned_by_info(self) -> dict: } @property - def coverage_datetime_unit(self) -> str: + def coverage_datetime_units(self) -> str: units = [] for coverage in self.coverages.all(): for datetime_range in coverage.datetime_ranges.all(): - units.append(datetime_range.unit.name) - most_common_unit = max(set(units), key=units.count) + units.extend([unit.name for unit in datetime_range.units.all()]) + + if not units: + return None + + most_common_unit = list(set(units)) return most_common_unit def get_similarity_of_area(self, other: "Table"): @@ -1066,7 +1070,6 @@ def clean(self): """ Clean method for Table model - Coverages must not overlap - - Temporal coverage units must refer to the same column. """ errors = {} try: @@ -1103,16 +1106,6 @@ def clean(self): except ValueError: pass - def all_same(items): - return all(x == items[0] for x in items) - units = [] - for coverage in self.coverages.all(): - for datetime_range in coverage.datetime_ranges.all(): - if datetime_range.unit: - units.append(datetime_range.unit.id) - if not all_same(units): - errors['datetime_range_units'] = f"Datetime range units do not refer all to the same column." - if errors: raise ValidationError(errors) return super().clean() @@ -1688,9 +1681,10 @@ class DateTimeRange(BaseModel): end_minute = models.IntegerField(blank=True, null=True) end_second = models.IntegerField(blank=True, null=True) interval = models.IntegerField(blank=True, null=True) - unit = models.ForeignKey( - "Column", on_delete=models.SET_NULL, related_name="datetime_ranges", - null=True, blank=True + units = models.ManyToManyField( + "Column", + related_name="datetime_ranges", + blank=True, ) is_closed = models.BooleanField("Is Closed", default=False) @@ -1770,13 +1764,22 @@ def get_similarity_of_datetime(self, other: "DateTimeRange"): return 0 def clean(self): + errors = {} try: if self.since and self.until and self.since > self.until: - raise ValidationError("Start date must be less than or equal to end date") + errors['date_range'] = "Start date must be less than or equal to end date" if self.since and self.until and not self.interval: - raise ValidationError("Interval must exist in ranges with start and end dates") - except ValueError as exp: - raise ValidationError(str(exp)) + errors['interval'] = "Interval must exist in ranges with start and end dates" + + # Add validation for units + #for unit in self.units.all(): + # if unit.bigquery_type.name not in ['DATE', 'DATETIME', 'TIME', 'TIMESTAMP']: + # errors['units'] = f"Column '{unit.name}' is not a valid datetime unit" + except Exception as e: + errors['general'] = f"An error occurred: {str(e)}" + + if errors: + raise ValidationError(errors) return super().clean() diff --git a/backend/apps/api/v1/schemas.py b/backend/apps/api/v1/schemas.py index b118028e..64ee7cac 100644 --- a/backend/apps/api/v1/schemas.py +++ b/backend/apps/api/v1/schemas.py @@ -6,26 +6,33 @@ class Organization(BaseModel): id: str - name: str slug: str + name_pt: str + name_en: str + name_es: str picture: Optional[str] - website: Optional[str] - description: Optional[str] class Theme(BaseModel): - name: str slug: str + name_pt: str + name_en: str + name_es: str class Tag(BaseModel): - name: str slug: str + name_pt: str + name_en: str + name_es: str class Entity(BaseModel): - name: str keyword: str + slug: str + name_pt: str + name_en: str + name_es: str class RawDataSource(BaseModel): diff --git a/backend/apps/api/v1/search_engines.py b/backend/apps/api/v1/search_engines.py index aa17a408..3140bdf7 100644 --- a/backend/apps/api/v1/search_engines.py +++ b/backend/apps/api/v1/search_engines.py @@ -23,14 +23,19 @@ def __init__(self, *args, **kwargs): "tokenizer": "edgengram", "filter": ["asciifolding", "lowercase"], }, + "snowball_pt": { + "type": "snowball", + "language": "Portuguese", + "filter": ["asciifolding"], + }, "snowball_en": { "type": "snowball", "language": "English", "filter": ["asciifolding"], }, - "snowball_pt": { + "snowball_es": { "type": "snowball", - "language": "Portuguese", + "language": "Spanish", "filter": ["asciifolding"], }, } @@ -63,6 +68,7 @@ def build_schema(self, fields): "edgengram": {"type": "text", "analyzer": "edgengram"}, "snowball_pt": {"type": "text", "analyzer": "snowball_pt"}, "snowball_en": {"type": "text", "analyzer": "snowball_en"}, + "snowball_es": {"type": "text", "analyzer": "snowball_es"}, } mapping.update({field_class.index_fieldname: field_mapping}) return (content_field_name, mapping) diff --git a/backend/apps/api/v1/search_indexes.py b/backend/apps/api/v1/search_indexes.py index 173d14e1..3488d837 100644 --- a/backend/apps/api/v1/search_indexes.py +++ b/backend/apps/api/v1/search_indexes.py @@ -17,13 +17,34 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable): model_attr="slug", indexed=False, ) - dataset_name = indexes.CharField( - model_attr="name", + dataset_name_pt = indexes.CharField( + model_attr="name_pt", + null=True, indexed=False, ) - dataset_description = indexes.CharField( - model_attr="description", - default="", + dataset_name_en = indexes.CharField( + model_attr="name_en", + null=True, + indexed=False, + ) + dataset_name_es = indexes.CharField( + model_attr="name_es", + null=True, + indexed=False, + ) + dataset_description_pt = indexes.CharField( + model_attr="description_pt", + null=True, + indexed=False, + ) + dataset_description_en = indexes.CharField( + model_attr="description_en", + null=True, + indexed=False, + ) + dataset_description_es = indexes.CharField( + model_attr="description_es", + null=True, indexed=False, ) @@ -35,13 +56,34 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable): model_attr="tables__slug", indexed=False, ) - table_name = indexes.MultiValueField( - model_attr="tables__name", + table_name_pt = indexes.MultiValueField( + model_attr="tables__name_pt", + null=True, indexed=False, ) - table_description = indexes.MultiValueField( - model_attr="tables__description", - default="", + table_name_en = indexes.MultiValueField( + model_attr="tables__name_en", + null=True, + indexed=False, + ) + table_name_es = indexes.MultiValueField( + model_attr="tables__name_es", + null=True, + indexed=False, + ) + table_description_pt = indexes.MultiValueField( + model_attr="tables__description_pt", + null=True, + indexed=False, + ) + table_description_en = indexes.MultiValueField( + model_attr="tables__description_en", + null=True, + indexed=False, + ) + table_description_es = indexes.MultiValueField( + model_attr="tables__description_es", + null=True, indexed=False, ) @@ -55,8 +97,22 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable): faceted=True, indexed=False, ) - organization_name = indexes.MultiValueField( - model_attr="organization__name", + organization_name_pt = indexes.MultiValueField( + model_attr="organization__name_pt", + null=True, + faceted=True, + indexed=False, + ) + organization_name_en = indexes.MultiValueField( + model_attr="organization__name_en", + null=True, + faceted=True, + indexed=False, + ) + organization_name_es = indexes.MultiValueField( + model_attr="organization__name_es", + null=True, + faceted=True, indexed=False, ) organization_picture = indexes.MultiValueField( @@ -69,9 +125,19 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable): default="", indexed=False, ) - organization_description = indexes.MultiValueField( - model_attr="organization__description", - default="", + organization_description_pt = indexes.MultiValueField( + model_attr="organization__description_pt", + null=True, + indexed=False, + ) + organization_description_en = indexes.MultiValueField( + model_attr="organization__description_en", + null=True, + indexed=False, + ) + organization_description_es = indexes.MultiValueField( + model_attr="organization__description_es", + null=True, indexed=False, ) @@ -81,37 +147,72 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable): faceted=True, indexed=False, ) - tag_name = indexes.MultiValueField( - model_attr="tags__name", - default="", + tag_name_pt = indexes.MultiValueField( + model_attr="tags__name_pt", + null=True, + faceted=True, + indexed=False, + ) + tag_name_en = indexes.MultiValueField( + model_attr="tags__name_en", + null=True, + faceted=True, + indexed=False, + ) + tag_name_es = indexes.MultiValueField( + model_attr="tags__name_es", + null=True, + faceted=True, indexed=False, ) - theme_slug = indexes.MultiValueField( model_attr="themes__slug", default="", faceted=True, indexed=False, ) - theme_name = indexes.MultiValueField( - model_attr="themes__name", - default="", + theme_name_pt = indexes.MultiValueField( + model_attr="themes__name_pt", + null=True, + faceted=True, + indexed=False, + ) + theme_name_en = indexes.MultiValueField( + model_attr="themes__name_en", + null=True, + faceted=True, + indexed=False, + ) + theme_name_es = indexes.MultiValueField( + model_attr="themes__name_es", + null=True, + faceted=True, indexed=False, ) - entity_slug = indexes.MultiValueField( model_attr="tables__observation_levels__entity__slug", default="", faceted=True, indexed=False, ) - entity_name = indexes.MultiValueField( - model_attr="tables__observation_levels__entity__name", - default="", + entity_name_pt = indexes.MultiValueField( + model_attr="tables__observation_levels__entity__name_pt", + null=True, + faceted=True, + indexed=False, + ) + entity_name_en = indexes.MultiValueField( + model_attr="tables__observation_levels__entity__name_en", + null=True, + faceted=True, + indexed=False, + ) + entity_name_es = indexes.MultiValueField( + model_attr="tables__observation_levels__entity__name_es", + null=True, faceted=True, indexed=False, ) - temporal_coverage = indexes.MultiValueField( default="", model_attr="coverage", diff --git a/backend/apps/api/v1/search_views.py b/backend/apps/api/v1/search_views.py index c418d8e8..b52a0a8e 100644 --- a/backend/apps/api/v1/search_views.py +++ b/backend/apps/api/v1/search_views.py @@ -10,6 +10,8 @@ from backend.apps.api.v1.models import Entity, Organization, Tag, Theme +import logging +logger = logging.getLogger(__name__) class DatasetSearchForm(FacetedSearchForm): load_all: bool = True @@ -19,6 +21,7 @@ def __init__(self, *args, **kwargs): self.theme = kwargs.pop("theme", None) or [] self.organization = kwargs.pop("organization", None) or [] self.observation_level = kwargs.pop("observation_level", None) or [] + self.locale = kwargs.pop("locale", "pt") super().__init__(*args, **kwargs) def search(self): @@ -30,9 +33,8 @@ def search(self): self.searchqueryset .auto_query(q) .filter_and(**{"text.edgengram": q}) - .filter_or(**{"text.snowball_pt": q}) - .filter_or(**{"text.snowball_en": q}) - ) # fmt: skip + .filter_or(**{f"text.snowball_{self.locale}": q}) + ) else: sqs = self.no_query_found() @@ -82,6 +84,10 @@ def page_size(self): except (TypeError, ValueError): return 10 + @property + def locale(self): + return self.request.GET.get('locale', 'pt') + def get_form_kwargs(self): kwargs = super().get_form_kwargs() kwargs.update({"contains": self.request.GET.getlist("contains")}) @@ -89,6 +95,7 @@ def get_form_kwargs(self): kwargs.update({"theme": self.request.GET.getlist("theme")}) kwargs.update({"organization": self.request.GET.getlist("organization")}) kwargs.update({"observation_level": self.request.GET.getlist("observation_level")}) + kwargs.update({"locale": self.locale}) return kwargs def get(self, request, *args, **kwargs): @@ -100,6 +107,7 @@ def get(self, request, *args, **kwargs): "count": sqs.count(), "results": self.get_results(sqs), "aggregations": self.get_facets(sqs), + "locale": self.locale, } ) @@ -127,11 +135,16 @@ def get_facets(self, sqs: SearchQuerySet, facet_size=22): ("entity_slug", "observation_levels", Entity), ("organization_slug", "organizations", Organization), ]: - to_name = model.objects.values("slug", "name") - to_name = {e["slug"]: e["name"] for e in to_name.all()} + to_name = model.objects.values("slug", f"name_{self.locale}", "name") + to_name = {e["slug"]: { + "name": e[f"name_{self.locale}"] or e["name"] or e["slug"], + "fallback": e[f"name_{self.locale}"] is None + } for e in to_name.all()} facets[key_front] = facets.pop(key_back, None) for field in facets[key_front] or []: - field["name"] = to_name.get(field["key"], "") + translated_name = to_name.get(field["key"], {}) + field["name"] = translated_name.get("name", field["key"]) + field["fallback"] = translated_name.get("fallback", True) return facets def get_results(self, sqs: SearchQuerySet): @@ -142,55 +155,52 @@ def key(r): since = (self.page - 1) * self.page_size results = sorted(sqs.all(), key=key, reverse=True) - return [as_search_result(r) for r in results[since:until]] + return [as_search_result(r, self.locale) for r in results[since:until]] -def as_search_result(result: SearchResult): - tag = [] - for slug, name in zip(result.tag_slug or [], result.tag_name or []): - tag.append( +def as_search_result(result: SearchResult, locale='pt'): + + tags = [] + for slug, name in zip(result.tag_slug or [], getattr(result, f"tag_name_{locale}") or []): + tags.append( { "slug": slug, "name": name, } ) - theme = [] - for slug, name in zip(result.theme_slug or [], result.theme_name or []): - theme.append( + themes = [] + for slug, name in zip(result.theme_slug or [], getattr(result, f"theme_name_{locale}") or []): + themes.append( { "slug": slug, "name": name, } ) - entity = [] - for slug, name in zip(result.entity_slug or [], result.entity_name or []): - entity.append( + entities = [] + for slug, name in zip(result.entity_slug or [], getattr(result, f"entity_name_{locale}") or []): + entities.append( { "slug": slug, "name": name, } ) - organization = [] - for pk, slug, name, picture, website, description in zip( + organizations = [] + for pk, slug, name, picture in zip( result.organization_id or [], result.organization_slug or [], - result.organization_name or [], + [(getattr(result, f"organization_name_{locale}") or []) or result.organization_name or result.organization_slug], result.organization_picture or [], - result.organization_website or [], - result.organization_description or [], ): picture = storage.url(picture) - organization.append( + organizations.append( { "id": pk, "slug": slug, "name": name, "picture": picture, - "website": website, - "description": description, } ) @@ -198,12 +208,12 @@ def as_search_result(result: SearchResult): "updated_at": result.updated_at, "id": result.dataset_id, "slug": result.dataset_slug, - "name": result.dataset_name, - "description": result.dataset_description, - "tags": tag, - "themes": theme, - "entities": entity, - "organizations": organization, + "name": getattr(result, f"dataset_name_{locale}") or result.dataset_name or result.dataset_slug, + "description": getattr(result, f"dataset_description_{locale}") or result.dataset_description, + "tags": tags, + "themes": themes, + "entities": entities, + "organizations": organizations, "temporal_coverages": result.temporal_coverage, "contains_open_data": result.contains_open_data, "contains_closed_data": result.contains_closed_data,