Skip to content

Commit

Permalink
feat: merge exporter and data_registry apps
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Nov 9, 2023
1 parent 128dfc5 commit af64b79
Show file tree
Hide file tree
Showing 10 changed files with 62 additions and 66 deletions.
3 changes: 1 addition & 2 deletions core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,12 @@
"django.contrib.humanize",
"data_registry",
"markdownx",
"exporter",
]

MIDDLEWARE = [
"django.middleware.security.SecurityMiddleware",
# Add before GZipMiddleware to modify its response.
"exporter.middleware.ContentEncodingMiddleware",
"data_registry.middleware.ContentEncodingMiddleware",
# This site is not affected by BREACH.
# https://docs.djangoproject.com/en/4.2/ref/middleware/#django.middleware.gzip.GZipMiddleware
"django.middleware.gzip.GZipMiddleware",
Expand Down
1 change: 0 additions & 1 deletion core/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

urlpatterns = [
path("", include("data_registry.urls"), name="data-registry"),
path("", include("exporter.urls"), name="exporter"),
path("admin/", admin.site.urls),
path("markdownx/", include("markdownx.urls")),
]
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions data_registry/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
path("", views.index, name="index"),
path("search/", views.search, name="search"),
path("publication/<int:id>", views.detail, name="detail"),
path("publication/<int:id>/download", views.download_export, name="download"),
# https://code.djangoproject.com/ticket/26556
path("i18n/setlang/", i18n.set_language, name="set-language"),
# Uncomment after re-integrating Spoonbill.
Expand Down
36 changes: 34 additions & 2 deletions data_registry/views.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
import string
from collections import defaultdict
from datetime import date, datetime, timedelta
Expand All @@ -11,20 +12,21 @@
from django.contrib.postgres.aggregates import ArrayAgg
from django.db.models import Count, OuterRef, Q, Subquery
from django.db.models.functions import Substr
from django.http.response import HttpResponse, JsonResponse
from django.http.response import FileResponse, HttpResponse, HttpResponseBadRequest, HttpResponseNotFound, JsonResponse
from django.shortcuts import get_object_or_404, redirect, render
from django.utils.translation import get_language, get_language_from_request
from django.utils.translation import gettext as _

from data_registry.models import Collection, Job
from data_registry.util import collection_queryset
from exporter.util import Export
from exporter.util import Export, TaskStatus

logger = logging.getLogger(__name__)

alphabets = defaultdict(lambda: string.ascii_uppercase)
# https://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode#Basic_Cyrillic_alphabet
alphabets["ru"] = "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
EXPORT_PATTERN = re.compile(r"\A(full|\d{4})\.(jsonl\.gz|csv\.tar\.gz|xlsx)\Z")


def index(request):
Expand Down Expand Up @@ -174,6 +176,36 @@ def spiders(request):
return JsonResponse(json.get("spiders"), safe=False)


def download_export(request, id):
"""
Returns an exported file as a FileResponse object.
"""
name = request.GET.get("name")

# Guard against path traversal.
if not EXPORT_PATTERN.match(name):
return HttpResponseBadRequest("The name query string parameter is invalid")

collection = get_object_or_404(collection_queryset(request), id=id)

active_job = collection.job.filter(active=True).first()
if not active_job:
return HttpResponseNotFound("This OCDS dataset is not available for download")

export = Export(active_job.id, basename=name)
if export.status != TaskStatus.COMPLETED:
return HttpResponseNotFound("File not found")

return FileResponse(
export.path.open("rb"),
as_attachment=True,
filename=f"{collection.source_id}_{name}",
# Set Content-Encoding to skip GZipMiddleware. (ContentEncodingMiddleware removes the empty header.)
# https://docs.djangoproject.com/en/4.2/ref/middleware/#module-django.middleware.gzip
headers={"Content-Encoding": ""},
)


def excel_data(request, job_id, job_range=None):
job = Job.objects.get(id=job_id)
export = Export(job_id)
Expand Down
5 changes: 0 additions & 5 deletions exporter/apps.py

This file was deleted.

7 changes: 0 additions & 7 deletions exporter/urls.py

This file was deleted.

40 changes: 0 additions & 40 deletions exporter/views.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/data_registry/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def setUpTestData(cls):
@patch("exporter.util.Export.get_files")
def test_detail(self, get_files):
get_files.return_value = {"jsonl": {"by_year": [{"year": 2022, "size": 1}]}}
url = f"/publication/{self.collection.id}/download?name=2022.jsonl.gz"
url = f"/en/publication/{self.collection.id}/download?name=2022.jsonl.gz"

with self.assertNumQueries(2):
response = Client().get(f"/en/publication/{self.collection.id}")
Expand Down
33 changes: 25 additions & 8 deletions tests/exporter/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,49 @@ def setUp(cls):
)
cls.job = cls.collection.job.create(
active=True,
id=2,
)
cls.collection_no_job = Collection.objects.create(
id=3,
title="Test",
source_id="abc",
public=True,
)
cls.collection_no_job.job.create(
active=True,
id=4,
)

def test_collection_not_found(self):
with self.assertNumQueries(1):
response = Client().get("/en/publication/10/download?name=2000.jsonl.gz")

self.assertEqual(response.status_code, 404)

def test_download_export_invalid_suffix(self):
with self.assertNumQueries(0):
response = Client().get("/publication/2/download?name=invalid")
response = Client().get(f"/en/publication/{self.collection.id}/download?name=invalid")

self.assertEqual(response.status_code, 400)
self.assertEqual(response.content, b"The name query string parameter is invalid")

def test_download_export_empty_parameter(self):
with self.assertNumQueries(0):
response = Client().get("/publication/2/download?name=")
response = Client().get(f"/en/publication/{self.collection.id}/download?name=")

self.assertEqual(response.status_code, 400)
self.assertEqual(response.content, b"The name query string parameter is invalid")

def test_download_export_waiting(self):
with self.assertNumQueries(1):
response = Client().get("/publication/1/download?name=2000.jsonl.gz")

with self.assertNumQueries(2):
response = Client().get(f"/en/publication/{self.collection_no_job.id}/download?name=2000.jsonl.gz")
self.assertEqual(response.status_code, 404)
self.assertEqual(response.content, b"File not found")

@patch("exporter.util.Export.lockfile", new_callable=PropertyMock)
def test_download_export_running(self, exists):
with self.assertNumQueries(2):
response = Client().get("/publication/2/download?name=2000.jsonl.gz")
response = Client().get(f"/en/publication/{self.collection.id}/download?name=2000.jsonl.gz")

self.assertEqual(response.status_code, 404)
self.assertEqual(response.content, b"File not found")
Expand All @@ -57,7 +74,7 @@ def test_download_export_completed(self):
with self.subTest(suffix=suffix):
with self.assertNumQueries(2):
response = Client().get(
f"/publication/2/download?name=2000.{suffix}",
f"/en/publication/{self.collection.id}/download?name=2000.{suffix}",
HTTP_ACCEPT_ENCODING="gzip",
)
self.assertEqual(response.status_code, 200)
Expand All @@ -70,7 +87,7 @@ def test_download_export_completed(self):
"Content-Type": content_type,
"Cross-Origin-Opener-Policy": "same-origin",
"Referrer-Policy": "same-origin",
"Vary": "Accept-Language, Cookie",
"Vary": "Cookie",
"X-Content-Type-Options": "nosniff",
"X-Frame-Options": "DENY",
},
Expand Down

0 comments on commit af64b79

Please sign in to comment.