diff --git a/bd_api/apps/account/urls.py b/bd_api/apps/account/urls.py index 9d62942d..83bba52c 100644 --- a/bd_api/apps/account/urls.py +++ b/bd_api/apps/account/urls.py @@ -10,22 +10,22 @@ urlpatterns = [ path( - "account_activate//", + "account/account_activate//", AccountActivateView.as_view(), name="activate", ), path( - "account_activate_confirm///", + "account/account_activate_confirm///", AccountActivateConfirmView.as_view(), name="activate", ), path( - "password_reset//", + "account/password_reset//", PasswordResetView.as_view(), name="password_reset", ), path( - "password_reset_confirm///", + "account/password_reset_confirm///", PasswordResetConfirmView.as_view(), name="password_reset_confirm", ), diff --git a/bd_api/apps/api/v1/search_engines.py b/bd_api/apps/api/v1/search_engines.py index 138e7077..3deb6668 100644 --- a/bd_api/apps/api/v1/search_engines.py +++ b/bd_api/apps/api/v1/search_engines.py @@ -13,93 +13,33 @@ class ASCIIFoldingElasticBackend(es_backend.Elasticsearch7SearchBackend, metaclass=ABCMeta): def __init__(self, *args, **kwargs): - super(ASCIIFoldingElasticBackend, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) analyzer = { - "ascii_ngram_analyser": { - "type": "custom", - "tokenizer": "standard", - "filter": ["asciifolding", "lowercase", "haystack_edgengram"], - }, - "standard_analyzer": { - "type": "custom", + "ascii_analyzer": { "tokenizer": "standard", - "filter": ["asciifolding", "lowercase"], + "filter": ["standard", "asciifolding", "lowercase"], }, "ngram_analyzer": { "type": "custom", - "tokenizer": "standard", - "filter": ["asciifolding", "lowercase", "haystack_ngram"], + "tokenizer": "lowercase", + "filter": ["haystack_ngram", "asciifolding"], }, "edgengram_analyzer": { "type": "custom", - "tokenizer": "my_tokenizer", - "filter": ["asciifolding", "lowercase"], - }, - } - tokenizer = { - "standard": {"type": "standard"}, - "lowercase": {"type": "lowercase"}, - "my_tokenizer": { - "type": "edge_ngram", - "min_gram": 3, - "max_gram": 15, - "token_chars": ["letter", "digit"], + "tokenizer": "lowercase", + "filter": ["haystack_edgengram", "asciifolding"], }, } - filter = { - "haystack_ngram": { - "type": "ngram", - "min_gram": 4, - "max_gram": 5, - }, - "haystack_edgengram": { - "type": "edge_ngram", - "min_gram": 2, - "max_gram": 15, - }, - } - - self.DEFAULT_SETTINGS["settings"]["analysis"]["tokenizer"] = tokenizer self.DEFAULT_SETTINGS["settings"]["analysis"]["analyzer"] = analyzer - self.DEFAULT_SETTINGS["settings"]["analysis"]["filter"] = filter def build_schema(self, fields): - content_field_name, mapping = super(ASCIIFoldingElasticBackend, self).build_schema(fields) - - for field_name, field_class in fields.items(): + content_field_name, mapping = super().build_schema(fields) + for field_class in fields.values(): field_mapping = mapping[field_class.index_fieldname] - if field_mapping["type"] == "text" and field_class.indexed: if not hasattr(field_class, "facet_for"): if field_class.field_type not in ("ngram", "edge_ngram"): - field_mapping["analyzer"] = "ascii_ngram_analyser" - field_mapping["fields"] = { - "exact": { - "type": "text", - "analyzer": "standard_analyzer", - }, - "keyword": { - "type": "keyword", - "ignore_above": 256, - }, - } - else: - field_mapping["analyzer"] = "standard_analyzer" - field_mapping["fields"] = { - "ngram": { - "type": "text", - "analyzer": "ngram_analyzer", - }, - "edgengram": { - "type": "text", - "analyzer": "edgengram_analyzer", - }, - "exact": { - "type": "text", - "analyzer": "standard_analyzer", - }, - } - + field_mapping["analyzer"] = "ascii_analyzer" mapping.update({field_class.index_fieldname: field_mapping}) return (content_field_name, mapping) diff --git a/bd_api/apps/api/v1/search_indexes.py b/bd_api/apps/api/v1/search_indexes.py index 0d399613..26de8977 100644 --- a/bd_api/apps/api/v1/search_indexes.py +++ b/bd_api/apps/api/v1/search_indexes.py @@ -1,196 +1,29 @@ # -*- coding: utf-8 -*- from haystack import indexes -from .models import Dataset - - -def list2dict(data, keys: list[str]): - """Turn multiple lists into a list of dicts - - ``` - keys = ["name", "age"] - data = {"name": ["jose", "maria"], "age": [18, 27]} - dict = [{"name": "jose", "age": 18}, {"name": "maria", "age": 27}] - ``` - """ - multivalues = zip(data.get(key, []) for key in keys) - return [dict(zip(keys, values)) for values in multivalues] +from bd_api.apps.api.v1.models import Dataset class DatasetIndex(indexes.SearchIndex, indexes.Indexable): - updated_at = indexes.DateTimeField(model_attr="updated_at") - text = indexes.CharField(document=True, use_template=True) - slug = indexes.CharField(model_attr="slug") - name = indexes.EdgeNgramField(model_attr="name") - description = indexes.EdgeNgramField(model_attr="description", null=True) - organization_id = indexes.CharField(model_attr="organization__id", null=True) - organization_slug = indexes.CharField(model_attr="organization__slug") - organization_name = indexes.EdgeNgramField(model_attr="organization__name") + organization_slug = indexes.CharField(model_attr="organization__slug", null=True) + organization_name = indexes.CharField(model_attr="organization__name", null=True) organization_description = indexes.CharField(model_attr="organization__description", null=True) - organization_picture = indexes.CharField(model_attr="organization__picture", null=True) - organization_website = indexes.CharField(model_attr="organization__website", null=True) - - table_ids = indexes.MultiValueField(model_attr="tables__id", null=True) - table_slugs = indexes.MultiValueField(model_attr="tables__slug", null=True) - table_names = indexes.EdgeNgramField(model_attr="tables__name", null=True) - table_descriptions = indexes.EdgeNgramField(model_attr="tables__description", null=True) - table_is_closed = indexes.MultiValueField(model_attr="tables__is_closed", null=True) - themes_name = indexes.MultiValueField(model_attr="themes__name", null=True) - themes_slug = indexes.MultiValueField(model_attr="themes__slug", null=True) - themes_keyword = indexes.MultiValueField( - model_attr="themes__slug", null=True, indexed=True, stored=True - ) + dataset_slug = indexes.CharField(model_attr="slug", null=True) + dataset_name = indexes.CharField(model_attr="name", null=True) + dataset_description = indexes.CharField(model_attr="description", null=True) - tags_name = indexes.MultiValueField(model_attr="tags__name", null=True) - tags_slug = indexes.MultiValueField(model_attr="tags__slug", null=True) - tags_keyword = indexes.MultiValueField( - model_attr="tags__slug", null=True, indexed=True, stored=True - ) + table_slugs = indexes.MultiValueField(model_attr="tables__slug", null=True) + table_names = indexes.MultiValueField(model_attr="tables__name", null=True) + table_descriptions = indexes.MultiValueField(model_attr="tables__description", null=True) - coverage = indexes.MultiValueField(model_attr="coverage", null=True) - observation_levels_name = indexes.MultiValueField( - model_attr="tables__observation_levels__entity__name", null=True - ) - observation_levels_keyword = indexes.MultiValueField( - model_attr="tables__observation_levels__entity__slug", null=True - ) - raw_data_sources = indexes.MultiValueField(model_attr="raw_data_sources__id", null=True) - information_requests = indexes.MultiValueField(model_attr="information_requests__id", null=True) - is_closed = indexes.BooleanField(model_attr="is_closed") - contains_tables = indexes.BooleanField(model_attr="contains_tables") - contains_closed_data = indexes.BooleanField(model_attr="contains_closed_data") - contains_open_data = indexes.BooleanField(model_attr="contains_open_data") - contains_raw_data_sources = indexes.BooleanField(model_attr="contains_raw_data_sources") - contains_information_requests = indexes.BooleanField(model_attr="contains_information_requests") + tag_names = indexes.MultiValueField(model_attr="tags__name", null=True) + tag_slugs = indexes.MultiValueField(model_attr="tags__slug", null=True, faceted=True) - status_slug = indexes.MultiValueField(model_attr="status__slug", null=True) + theme_names = indexes.MultiValueField(model_attr="themes__name", null=True) + theme_slugs = indexes.MultiValueField(model_attr="themes__slug", null=True, faceted=True) def get_model(self): return Dataset - - def index_queryset(self, using=None): - return self.get_model().objects.all() - - def prepare(self, obj): - data = super().prepare(obj) - data = self._prepare_tags(obj, data) - data = self._prepare_table(obj, data) - data = self._prepare_theme(obj, data) - data = self._prepare_coverage(obj, data) - data = self._prepare_metadata(obj, data) - data = self._prepare_organization(obj, data) - data = self._prepare_raw_data_source(obj, data) - data = self._prepare_observation_level(obj, data) - data = self._prepare_information_request(obj, data) - return data - - def _prepare_tags(self, obj, data): - if tags := data.get("tags_slug", []): - data["tags"] = [] - for i, _ in enumerate(tags): - data["tags"].append( - { - "name": data["tags_name"][i], - "keyword": data["tags_keyword"][i], - } - ) - return data - - def _prepare_table(self, obj, data): - if table_ids := data.get("table_ids", []): - published_tables = obj.tables.exclude(status__slug__in=["under_review"]) - data["n_tables"] = published_tables.count() - data["first_table_id"] = table_ids[0] - if published_tables.first(): - data["first_table_id"] = published_tables.first().id - - data["tables"] = [] - for i, _ in enumerate(table_ids): - data["tables"].append( - { - "id": data["table_ids"][i], - "name": data["table_names"][i], - "slug": data["table_slugs"][i], - "is_closed": data["table_is_closed"][i], - } - ) - data["total_tables"] = len(table_ids) - else: - data["n_tables"] = 0 - data["total_tables"] = 0 - return data - - def _prepare_theme(self, obj, data): - if themes_slug := data.get("themes_slug", []): - data["themes"] = [] - for i, _ in enumerate(themes_slug): - data["themes"].append( - { - "name": data["themes_name"][i], - "keyword": data["themes_keyword"][i], - } - ) - return data - - def _prepare_coverage(self, obj, data): - coverage = data.get("coverage", "") - if coverage == " - ": - data["coverage"] = "" - return data - - def _prepare_metadata(self, obj, data): - data["status"] = data.get("status__slug", "") - data["is_closed"] = data.get("is_closed", False) - data["contains_tables"] = data.get("contains_tables", False) - data["contains_open_data"] = data.get("contains_open_data", False) - data["contains_closed_data"] = data.get("contains_closed_data", False) - data["contains_raw_data_sources"] = data.get("contains_raw_data_sources", False) - data["contains_information_requests"] = data.get("contains_information_requests", False) - return data - - def _prepare_organization(self, obj, data): - organization_picture = "" - if obj.organization and obj.organization.picture and obj.organization.picture.name: - organization_picture = obj.organization.picture.name - data["organization"] = { - "id": data.get("organization_id", ""), - "name": data.get("organization_name", ""), - "slug": data.get("organization_slug", ""), - "picture": organization_picture, - "website": data.get("organization_website", ""), - "description": data.get("organization_description", ""), - } - return data - - def _prepare_raw_data_source(self, obj, data): - if raw_data_sources := data.get("raw_data_sources", []): - data["n_raw_data_sources"] = len(raw_data_sources) - data["first_raw_data_source_id"] = raw_data_sources[0] - else: - data["n_raw_data_sources"] = 0 - data["first_raw_data_source_id"] = "" - return data - - def _prepare_observation_level(self, obj, data): - if observation_levels_name := data.get("observation_levels_name", []): - data["observation_levels"] = [] - for i, _ in enumerate(observation_levels_name): - data["observation_levels"].append( - { - "name": data["observation_levels_name"][i], - "keyword": data["observation_levels_keyword"][i], - } - ) - return data - - def _prepare_information_request(self, obj, data): - if information_requests := data.get("information_requests", []): - data["n_information_requests"] = len(information_requests) - data["first_information_request_id"] = information_requests[0] - else: - data["n_information_requests"] = 0 - data["first_information_request_id"] = "" - return data diff --git a/bd_api/apps/api/v1/search_views.py b/bd_api/apps/api/v1/search_views.py index 15eaf1d6..0f357fa0 100644 --- a/bd_api/apps/api/v1/search_views.py +++ b/bd_api/apps/api/v1/search_views.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from django.http import JsonResponse -from haystack.forms import SearchForm -from haystack.generic_views import SearchView +from haystack.forms import FacetedSearchForm +from haystack.generic_views import FacetedSearchView -class DatasetSearchForm(SearchForm): +class DatasetSearchForm(FacetedSearchForm): """Dataset search form Note that `load_all=True` avoids lazy loading and possible N+1 problem @@ -16,13 +16,21 @@ class DatasetSearchForm(SearchForm): def query(self): return self.cleaned_data + @property + def facet(self): + return self.sqs.facet_counts() + @property def result(self): return [p.pk for p in self.sqs] @property def response(self): - return {"query": self.query, "result": self.result} + return { + "query": self.query, + "facet": self.facet, + "result": self.result, + } def search(self): self.sqs = super().search() @@ -34,8 +42,9 @@ def no_query_found(self): return self.searchqueryset.all() -class DatasetSearchView(SearchView): +class DatasetSearchView(FacetedSearchView): form_class = DatasetSearchForm + facet_fields = ["tag_slugs", "theme_slugs"] def get(self, request, *args, **kwargs): form = self.get_form() diff --git a/bd_api/apps/api/v1/templates/search/indexes/v1/dataset_text.txt b/bd_api/apps/api/v1/templates/search/indexes/v1/dataset_text.txt index 7fabb196..7132269b 100644 --- a/bd_api/apps/api/v1/templates/search/indexes/v1/dataset_text.txt +++ b/bd_api/apps/api/v1/templates/search/indexes/v1/dataset_text.txt @@ -1,14 +1,17 @@ +{{ object.organization.slug }} +{{ object.organization.name }} +{{ object.organization.description }} + {{ object.slug }} {{ object.name }} {{ object.description }} -{{ object.organization_slug }} -{{ object.organization_name }} -{{ object.organization_description }} -{{ object.table_slugs }} -{{ object.table_names }} -{{ object.table_descriptions }} -{{ object.column_names }} -{{ object.column_descriptions }} -{{ object.themes }} -{{ object.tags }} -{{ object.is_closed }} + +{% for table in object.tables.all %} {{ table.slug }} {% endfor %} +{% for table in object.tables.all %} {{ table.name }} {% endfor %} +{% for table in object.tables.all %} {{ table.description }} {% endfor %} + +{% for tag in object.tags.all %} {{ tag.name }} {% endfor %} +{% for tag in object.tags.all %} {{ tag.slug }} {% endfor %} + +{% for theme in object.themes.all %} {{ theme.name }} {% endfor %} +{% for theme in object.themes.all %} {{ theme.slug }} {% endfor %} diff --git a/bd_api/apps/api/v1/urls.py b/bd_api/apps/api/v1/urls.py index bec38a9a..8bb35a44 100644 --- a/bd_api/apps/api/v1/urls.py +++ b/bd_api/apps/api/v1/urls.py @@ -1,23 +1,27 @@ # -*- coding: utf-8 -*- from django.http import HttpResponseRedirect -from django.urls import path +from django.urls import include, path from django.views.decorators.csrf import csrf_exempt from graphene_file_upload.django import FileUploadGraphQLView +from bd_api.apps.api.v1.search_views import DatasetSearchView +from bd_api.apps.api.v1.views import DatasetRedirectView + def redirect_to_v1(request): - return HttpResponseRedirect("/api/v1/") + return HttpResponseRedirect("/api/v1/graphql") -def redirect_to_v1_graphql(request): - return HttpResponseRedirect("/api/v1/graphql") +def graphql_view(): + return csrf_exempt(FileUploadGraphQLView.as_view(graphiql=True)) urlpatterns = [ - path("", redirect_to_v1), - path("v1/", redirect_to_v1_graphql), - path( - "v1/graphql", - csrf_exempt(FileUploadGraphQLView.as_view(graphiql=True)), - ), + path("api", redirect_to_v1), + path("api/v1/", redirect_to_v1), + path("api/v1/graphql", graphql_view()), + path("search/", DatasetSearchView.as_view()), + path("search/debug/", include("haystack.urls")), + path("dataset/", DatasetRedirectView.as_view()), + path("dataset_redirect/", DatasetRedirectView.as_view()), ] diff --git a/bd_api/apps/api/v1/views.py b/bd_api/apps/api/v1/views.py index a048edbc..05816fd2 100644 --- a/bd_api/apps/api/v1/views.py +++ b/bd_api/apps/api/v1/views.py @@ -1,531 +1,12 @@ # -*- coding: utf-8 -*- from __future__ import annotations -from json import dumps from urllib.parse import urlparse -from django.conf import settings -from django.core.files.storage import default_storage as storage -from django.http import HttpResponseBadRequest, HttpResponseRedirect, JsonResponse, QueryDict +from django.http import HttpResponseRedirect from django.views import View -from elasticsearch import Elasticsearch -from haystack.forms import ModelSearchForm -from haystack.generic_views import SearchView - -from bd_api.apps.api.v1.models import CloudTable, Dataset, Entity, Organization, Theme - - -class DatasetSearchView(SearchView): - def get(self, request, *args, **kwargs): - """ - Handles GET requests and instantiates a blank version of the form. - """ - # Get request arguments - req_args: QueryDict = request.GET.copy() - q = req_args.get("q", None) - es = Elasticsearch(settings.HAYSTACK_CONNECTIONS["default"]["URL"]) - page_size = int(req_args.get("page_size", 10)) - page = int(req_args.get("page", 1)) - # As counts are paginated, we need to get the total number of results - agg_page_size = 1000 - - if not q: - # If query is empty, query all datasets - query = {"match_all": {}} - # Factor to multiply the number of tables by - # Has no effect if no query is passed - n_table_factor = 0 - else: - # If query is not empty, query datasets and tables - query = { - "bool": { - "should": [ - { - "match": { - "description.exact": { - "query": q, - "operator": "AND", - "boost": 10, - } - } - }, - { - "match": { - "name.edgengram": { - "query": q, - "operator": "AND", - } - } - }, - { - "match": { - "table_names.edgengram": { - "query": q, - "operator": "AND", - } - } - }, - { - "match": { - "organization_name.edgengram": { - "query": q, - "operator": "AND", - } - } - }, - ] - } - } - n_table_factor = 2 - - all_filters = [] - - if "organization" in req_args: - all_filters.append( - {"match": {"organization.slug.keyword": req_args.get("organization")}} - ) - - if "theme" in req_args: - filter_theme = [ - {"match": {"themes_slug.keyword": theme}} for theme in req_args.getlist("theme") - ] - for t in filter_theme: - all_filters.append(t) - - if "tag" in req_args: - filter_tag = [{"match": {"tags_slug.keyword": tag}} for tag in req_args.getlist("tag")] - for t in filter_tag: - all_filters.append(t) - - if "contains_table" in req_args: - all_filters.append({"match": {"contains_tables": req_args.get("contains_table")}}) - - if "observation_level" in req_args: - all_filters.append( - {"match": {"observation_levels.keyword": req_args.get("observation_level")}} - ) - - if "datasets_with" or "contains" in req_args: - if "datasets_with" in req_args: - options = req_args.getlist("datasets_with") - else: - options = req_args.getlist("contains") - if "tables" in options: - all_filters.append({"match": {"contains_tables": True}}) - if "closed_data" in options: - all_filters.append({"match": {"contains_closed_data": True}}) - if "open_data" in options: - all_filters.append({"match": {"contains_open_data": True}}) - if "raw_data_sources" in options: - all_filters.append({"match": {"contains_raw_data_sources": True}}) - if "information_requests" in options: - all_filters.append({"match": {"contains_information_requests": True}}) - - raw_query = { - "from": (page - 1) * page_size, - "size": page_size, - "query": { - "function_score": { - "query": { - "bool": { - "must": [ - query, - { - "bool": { - "must": all_filters, - "must_not": [ - {"match": {"status_slug.exact": "under_review"}} - ], - } - }, - ] - } - }, - "functions": [ - { - "field_value_factor": { - "field": "contains_tables", - "modifier": "square", - "factor": 8, - "missing": 0, - } - }, - { - "field_value_factor": { - "field": "n_tables", - "modifier": "square", - "factor": n_table_factor, - "missing": 0, - } - }, - ], - "score_mode": "sum", - "boost_mode": "multiply", - } - }, - "aggs": { - "themes_keyword_counts": { - "terms": { - "field": "themes_slug.keyword", - "size": agg_page_size, - } - }, - "is_closed_counts": { - "terms": { - "field": "is_closed", - "size": agg_page_size, - } - }, - "organization_counts": { - "terms": { - "field": "organization_slug.keyword", - "size": agg_page_size, - } - }, - "tags_slug_counts": { - "terms": { - "field": "tags_slug.keyword", - "size": agg_page_size, - } - }, - "temporal_coverage_counts": { - "terms": { - "field": "coverage.keyword", - "size": agg_page_size, - } - }, - "observation_levels_counts": { - "terms": { - "field": "observation_levels_keyword.keyword", - "size": agg_page_size, - } - }, - "contains_tables_counts": { - "terms": { - "field": "contains_tables", - "size": agg_page_size, - } - }, - "contains_closed_data_counts": { - "terms": { - "field": "contains_closed_data", - "size": agg_page_size, - } - }, - "contains_open_data_counts": { - "terms": { - "field": "contains_open_data", - "size": agg_page_size, - } - }, - "contains_raw_data_sources_counts": { - "terms": { - "field": "contains_raw_data_sources", - "size": agg_page_size, - } - }, - "contains_information_requests_counts": { - "terms": { - "field": "contains_information_requests", - "size": agg_page_size, - } - }, - }, - "sort": [ - {"_score": {"order": "desc"}}, - {"updated_at": {"order": "desc"}}, - ], - } - - form_class = self.get_form_class() - form: ModelSearchForm = self.get_form(form_class) - if not form.is_valid(): - return HttpResponseBadRequest(dumps({"error": "Invalid form"})) - self.queryset = es.search( - index=settings.HAYSTACK_CONNECTIONS["default"]["INDEX_NAME"], body=raw_query - ) - context = self.get_context_data( - **{ - self.form_name: form, - "query": form.cleaned_data.get(self.search_field), - "object_list": self.queryset, - } - ) - - # Get total number of results - count = context["object_list"].get("hits").get("total").get("value") - - # Get results from elasticsearch - es_results = context["object_list"].get("hits").get("hits") - - # Clean results - res = [] - for idx, result in enumerate(es_results): - r = result.get("_source") - cleaned_results = { - "id": r.get("django_id"), - "slug": r.get("slug"), - "name": r.get("name"), - } - - if r.get("updated_at"): - cleaned_results["updated_at"] = r.get("updated_at") - - # organization - organization = r.get("organization", []) - # soon this will become a many-to-many relationship - # for now, we just put the organization within a list - organization = [organization] if organization else [] - if len(organization) > 0: - cleaned_results["organization"] = [] - for _, org in enumerate(organization): - if "picture" in org: - picture = storage.url(org["picture"]) - else: - picture = "" - d = { - "id": org["id"], - "name": org["name"], - "slug": org["slug"], - "picture": picture, - "website": org["website"], - "description": org["description"], - } - cleaned_results["organization"].append(d) - - # themes - if r.get("themes"): - cleaned_results["themes"] = [] - for theme in r.get("themes"): - d = {"name": theme["name"], "slug": theme["keyword"]} - cleaned_results["themes"].append(d) - # tags - if r.get("tags"): - cleaned_results["tags"] = [] - for tag in r.get("tags"): - d = {"name": tag["name"], "slug": tag["keyword"]} - cleaned_results["tags"].append(d) - - # tables - if r.get("tables"): - if len(tables := r.get("tables")) > 0: - cleaned_results["n_tables"] = r.get("n_tables") - cleaned_results["first_table_id"] = r.get("first_table_id") - cleaned_results["first_closed_table_id"] = None - for table in tables: - if table["is_closed"]: - cleaned_results["first_closed_table_id"] = table["id"] - break - - # observation levels - if r.get("observation_levels"): - cleaned_results["entities"] = r.get("observation_levels") - - # raw data sources - cleaned_results["n_raw_data_sources"] = r.get("n_raw_data_sources", 0) - cleaned_results["first_raw_data_source_id"] = r.get("first_raw_data_source_id", []) - - # information requests - cleaned_results["n_information_requests"] = r.get("n_information_requests", 0) - cleaned_results["first_information_request_id"] = r.get( - "first_information_request_id", [] - ) - - # temporal coverage - coverage = r.get("coverage") - if coverage: - if coverage[0] == " - ": - coverage = "" - elif "inf" in coverage[0]: - coverage = coverage.replace("inf", "") - cleaned_results["temporal_coverage"] = coverage - del r["coverage"] - else: - cleaned_results["temporal_coverage"] = "" - - # boolean fields - cleaned_results["is_closed"] = r.get("is_closed", False) - cleaned_results["contains_tables"] = r.get("contains_tables", False) - cleaned_results["contains_closed_data"] = r.get("contains_closed_data", False) - cleaned_results["contains_open_data"] = r.get("contains_open_data", False) - - res.append(cleaned_results) - - # Aggregations - agg = context["object_list"].get("aggregations") - organization_counts = agg["organization_counts"]["buckets"] - themes_slug_counts = agg["themes_keyword_counts"]["buckets"] - tags_slug_counts = agg["tags_slug_counts"]["buckets"] - # temporal_coverage_counts = agg["temporal_coverage_counts"]["buckets"] - observation_levels_counts = agg["observation_levels_counts"]["buckets"] - is_closed_counts = agg["is_closed_counts"]["buckets"] - contains_tables_counts = agg["contains_tables_counts"]["buckets"] - contains_closed_data_counts = agg["contains_closed_data_counts"]["buckets"] - contains_open_data_counts = agg["contains_open_data_counts"]["buckets"] - contains_information_requests_counts = agg["contains_information_requests_counts"][ - "buckets" - ] - contains_raw_data_sources_counts = agg["contains_raw_data_sources_counts"]["buckets"] - - # Getting data from DB to aggregate - orgs = Organization.objects.all().values("slug", "name", "picture") - orgs_dict = {} - for org in orgs: - slug = str(org.pop("slug")) - orgs_dict[slug] = org - - themes = Theme.objects.all().values("slug", "name") - themes_dict = {} - for theme in themes: - slug = str(theme.pop("slug")) - themes_dict[slug] = theme - - entities = Entity.objects.all().values("slug", "name") - entities_dict = {} - for entity in entities: - slug = str(entity.pop("slug")) - entities_dict[slug] = entity - - # Return results - aggregations = dict() - if organization_counts: - agg_organizations = [ - { - "key": org["key"], - "count": org["doc_count"], - "name": orgs_dict.get(org["key"]).get("name") - if orgs_dict.get(org["key"]) - else org["key"], - } - for org in organization_counts - ] - aggregations["organizations"] = agg_organizations - - if themes_slug_counts: - agg_themes = [ - { - "key": theme["key"], - "count": theme["doc_count"], - "name": themes_dict[theme["key"]]["name"], - } - for idx, theme in enumerate(themes_slug_counts) - ] - aggregations["themes"] = agg_themes - - if tags_slug_counts: - agg_tags = [ - { - "key": tag["key"], - "count": tag["doc_count"], - "name": tag["key"], - } - for tag in tags_slug_counts - ] - aggregations["tags"] = agg_tags - - if observation_levels_counts: - agg_observation_levels = [ - { - "key": observation_level["key"], - "count": observation_level["doc_count"], - "name": entities_dict[observation_level["key"]]["name"], - } - for idx, observation_level in enumerate(observation_levels_counts) - ] - aggregations["observation_levels"] = agg_observation_levels - - if is_closed_counts: - agg_is_closed = [ - { - "key": is_closed["key"], - "count": is_closed["doc_count"], - "name": "closed" if is_closed["key"] == 0 else "open", - } - for idx, is_closed in enumerate(is_closed_counts) - ] - aggregations["is_closed"] = agg_is_closed - - if contains_tables_counts: - agg_contains_tables = [ - { - "key": contains_tables["key"], - "count": contains_tables["doc_count"], - "name": "tabelas tratadas" - if contains_tables["key"] == 1 - else "sem tabelas tratadas", - } - for idx, contains_tables in enumerate(contains_tables_counts) - ] - aggregations["contains_tables"] = agg_contains_tables - - if contains_closed_data_counts: - agg_contains_closed_data = [ - { - "key": contains_closed_data["key"], - "count": contains_closed_data["doc_count"], - "name": "dados fechados" - if contains_closed_data["key"] == 1 - else "sem dados fechados", - } - for idx, contains_closed_data in enumerate(contains_closed_data_counts) - ] - aggregations["contains_closed_data"] = agg_contains_closed_data - - if contains_open_data_counts: - agg_contains_open_data = [ - { - "key": contains_open_data["key"], - "count": contains_open_data["doc_count"], - "name": "dados abertos" - if contains_open_data["key"] == 1 - else "sem dados abertos", - } - for idx, contains_open_data in enumerate(contains_open_data_counts) - ] - aggregations["contains_open_data"] = agg_contains_open_data - - if contains_information_requests_counts: - agg_contains_information_requests = [ - { - "key": contains_information_requests["key"], - "count": contains_information_requests["doc_count"], - "name": "pedidos lai" - if contains_information_requests["key"] == 1 - else "sem pedidos lai", - } - for idx, contains_information_requests in enumerate( - contains_information_requests_counts - ) - ] - aggregations["contains_information_requests"] = agg_contains_information_requests - - if contains_raw_data_sources_counts: - agg_contains_raw_data_sources = [ - { - "key": contains_raw_data_sources["key"], - "count": contains_raw_data_sources["doc_count"], - "name": "fontes originais" - if contains_raw_data_sources["key"] == 1 - else "sem fontes originais", - } - for idx, contains_raw_data_sources in enumerate(contains_raw_data_sources_counts) - ] - aggregations["contains_raw_data_sources"] = agg_contains_raw_data_sources - - results = {"count": count, "results": res, "aggregations": aggregations} - max_score = context["object_list"].get("hits").get("max_score") # noqa - - return JsonResponse( - results, - status=200 if len(results) > 0 else 204, - ) - - def get_context_data(self, **kwargs): - kwargs.setdefault("view", self) - if self.extra_context is not None: - kwargs.update(self.extra_context) - return kwargs +from bd_api.apps.api.v1.models import CloudTable, Dataset URL_MAPPING = { "localhost:8080": "http://localhost:3000", diff --git a/bd_api/apps/payment/urls.py b/bd_api/apps/payment/urls.py index 70ade363..b1e5d7de 100644 --- a/bd_api/apps/payment/urls.py +++ b/bd_api/apps/payment/urls.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- from django.urls import include, path -urlpatterns = [path("", include("djstripe.urls", namespace="payment"))] +urlpatterns = [path("payment/", include("djstripe.urls", namespace="payment"))] diff --git a/bd_api/urls.py b/bd_api/urls.py index 63d5bbb7..c33764d9 100644 --- a/bd_api/urls.py +++ b/bd_api/urls.py @@ -18,29 +18,12 @@ from django.conf.urls.static import static from django.contrib import admin from django.urls import include, path -from django.views.decorators.csrf import csrf_exempt -from graphene_file_upload.django import FileUploadGraphQLView - -from bd_api.apps.api.v1.search_views import DatasetSearchView as DatasetSearchV2View -from bd_api.apps.api.v1.views import DatasetRedirectView -from bd_api.apps.api.v1.views import DatasetSearchView as DatasetSearchV1View - - -def graphql_view(): - return csrf_exempt(FileUploadGraphQLView.as_view(graphiql=True)) - urlpatterns = [ path("admin/", admin.site.urls), path("", include("bd_api.apps.core.urls")), - path("api/", include("bd_api.apps.api.v1.urls")), - path("api/graphql/", graphql_view()), - path("account/", include("bd_api.apps.account.urls")), - path("search/", DatasetSearchV1View.as_view()), - path("search/v2/", DatasetSearchV2View.as_view()), - path("search/debug/", include("haystack.urls")), - path("dataset/", DatasetRedirectView.as_view()), - path("dataset_redirect/", DatasetRedirectView.as_view()), - path("payment/", include("bd_api.apps.payment.urls")), + path("", include("bd_api.apps.api.v1.urls")), + path("", include("bd_api.apps.account.urls")), + path("", include("bd_api.apps.payment.urls")), ] urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)