From 7a0dcca602b3bd7d0897ce773a524ac631aa15ba Mon Sep 17 00:00:00 2001 From: Javier Romero Castro Date: Wed, 20 Sep 2023 11:01:39 +0200 Subject: [PATCH] service: display tombstone for deleted record's files * closes https://github.com/inveniosoftware/invenio-rdm-records/issues/1479 --- invenio_records_resources/resources/errors.py | 53 ++++++++- invenio_records_resources/services/errors.py | 9 ++ .../services/files/service.py | 15 ++- tests/mock_module/api.py | 31 ++++++ tests/resources/test_files_resource.py | 101 ++++++++++++++++++ tests/services/files/test_file_service.py | 92 ++++++++++++++++ 6 files changed, 292 insertions(+), 9 deletions(-) diff --git a/invenio_records_resources/resources/errors.py b/invenio_records_resources/resources/errors.py index ac93278a..49e532cc 100644 --- a/invenio_records_resources/resources/errors.py +++ b/invenio_records_resources/resources/errors.py @@ -9,11 +9,14 @@ # details. """Common Errors handling for Resources.""" +import json from json import JSONDecodeError import marshmallow as ma -from flask import jsonify, make_response, request, url_for -from flask_resources import HTTPJSONException, create_error_handler +from flask import g, jsonify, make_response, request, url_for +from flask_resources import HTTPJSONException as _HTTPJSONException +from flask_resources import create_error_handler +from flask_resources.serializers.json import JSONEncoder from invenio_i18n import lazy_gettext as _ from invenio_pidstore.errors import ( PIDAlreadyExists, @@ -36,11 +39,12 @@ FileKeyNotFoundError, PermissionDeniedError, QuerystringValidationError, + RecordDeletedException, RevisionIdMismatchError, ) -class HTTPJSONValidationException(HTTPJSONException): +class HTTPJSONValidationException(_HTTPJSONException): """HTTP exception serializing to JSON and reflecting Marshmallow errors.""" description = "A validation error occurred." @@ -50,7 +54,7 @@ def __init__(self, exception): super().__init__(code=400, errors=validation_error_to_list_errors(exception)) -class HTTPJSONSearchRequestError(HTTPJSONException): +class HTTPJSONSearchRequestError(_HTTPJSONException): """HTTP exception responsible for mapping search engine errors.""" causes_responses = { @@ -70,6 +74,36 @@ def __init__(self, error): super().__init__(code=500, description=_("Internal server error")) +class HTTPJSONException(_HTTPJSONException): + """HTTPJSONException that supports setting some extra body fields.""" + + def __init__(self, code=None, errors=None, **kwargs): + """Constructor.""" + description = kwargs.pop("description", None) + response = kwargs.pop("response", None) + super().__init__(code, errors, description=description, response=response) + self._extra_fields = kwargs + + def get_body(self, environ=None, scope=None): + """Get the response body.""" + body = { + "status": self.code, + "message": self.get_description(environ), + **self._extra_fields, + } + + errors = self.get_errors() + if errors: + body["errors"] = errors + + # TODO: Revisit how to integrate error monitoring services. See issue #56 + # Temporarily kept for expediency and backward-compatibility + if self.code and (self.code >= 500) and hasattr(g, "sentry_event_id"): + body["error_id"] = str(g.sentry_event_id) + + return json.dumps(body, cls=JSONEncoder) + + def create_pid_redirected_error_handler(): """Creates an error handler for `PIDRedirectedError` error.""" @@ -186,4 +220,15 @@ class ErrorHandlersMixin: search.exceptions.RequestError: create_error_handler( lambda e: HTTPJSONSearchRequestError(e) ), + RecordDeletedException: create_error_handler( + lambda e: ( + HTTPJSONException(code=404, description=_("Record not found")) + if not e.record.tombstone.is_visible + else HTTPJSONException( + code=410, + description=_("Record deleted"), + tombstone=e.record.tombstone.dump(), + ) + ) + ), } diff --git a/invenio_records_resources/services/errors.py b/invenio_records_resources/services/errors.py index 1eae0c39..ea1d3270 100644 --- a/invenio_records_resources/services/errors.py +++ b/invenio_records_resources/services/errors.py @@ -69,3 +69,12 @@ def __init__(self, recid, file_key): ) self.recid = recid self.file_key = file_key + + +class RecordDeletedException(Exception): + """Exception denoting that the record was deleted.""" + + def __init__(self, record, result_item=None): + """Constructor.""" + self.record = record + self.result_item = result_item diff --git a/invenio_records_resources/services/files/service.py b/invenio_records_resources/services/files/service.py index 50f8442e..1e452982 100644 --- a/invenio_records_resources/services/files/service.py +++ b/invenio_records_resources/services/files/service.py @@ -10,7 +10,7 @@ """File Service API.""" from ..base import LinksTemplate, Service -from ..errors import FileKeyNotFoundError +from ..errors import FileKeyNotFoundError, RecordDeletedException from ..records.schema import ServiceSchemaWrapper from ..uow import RecordCommitOp, unit_of_work @@ -68,10 +68,11 @@ def _get_record(self, id_, identity, action, file_key=None): # # High-level API # - def list_files(self, identity, id_): + def list_files(self, identity, id_, with_deleted=False): """List the files of a record.""" record = self._get_record(id_, identity, "read_files") - + if record.deletion_status.is_deleted and not with_deleted: + raise RecordDeletedException(record) self.run_components("list_files", id_, identity, record) return self.file_result_list( @@ -119,12 +120,14 @@ def update_file_metadata(self, identity, id_, file_key, data, uow=None): links_tpl=self.file_links_item_tpl(id_), ) - def read_file_metadata(self, identity, id_, file_key): + def read_file_metadata(self, identity, id_, file_key, with_deleted=False): """Read the metadata of a file. :raises FileKeyNotFoundError: If the record has no file for the ``file_key`` """ record = self._get_record(id_, identity, "read_files", file_key=file_key) + if record.deletion_status.is_deleted and not with_deleted: + raise RecordDeletedException(record) self.run_components("read_file_metadata", identity, id_, file_key, record) @@ -259,12 +262,14 @@ def set_file_content( links_tpl=self.file_links_item_tpl(id_), ) - def get_file_content(self, identity, id_, file_key): + def get_file_content(self, identity, id_, file_key, with_deleted=False): """Retrieve file content. :raises FileKeyNotFoundError: If the record has no file for the ``file_key`` """ record = self._get_record(id_, identity, "get_content_files", file_key=file_key) + if record.deletion_status.is_deleted and not with_deleted: + raise RecordDeletedException(record) self.run_components("get_file_content", identity, id_, file_key, record) diff --git a/tests/mock_module/api.py b/tests/mock_module/api.py index 1cebbad3..2e13ca41 100644 --- a/tests/mock_module/api.py +++ b/tests/mock_module/api.py @@ -37,6 +37,33 @@ class FileRecord(FileRecordBase): record_cls = None # is defined below +class DeletionStatus: + """Mock of Deletion status field.""" + + is_deleted = False + + +class Tombstone: + """Mock of Tombstone system field.""" + + is_visible = True + citation_text = "citation text" + removal_date = "2023-09-20T08:19:58.431539" + removed_by = {"user": "system"} + note = "spam" + + def dump(self): + data = { + "note": self.note, + "removed_by": self.removed_by, + "removal_date": self.removal_date, + "citation_text": self.citation_text, + "is_visible": self.is_visible, + } + + return data + + class Record(RecordBase): """Example record API.""" @@ -63,6 +90,10 @@ class Record(RecordBase): ] ) + deletion_status = DeletionStatus() + + tombstone = Tombstone() + class RecordWithRelations(Record): """Example record API with relations.""" diff --git a/tests/resources/test_files_resource.py b/tests/resources/test_files_resource.py index 94493c35..98e3dcf8 100644 --- a/tests/resources/test_files_resource.py +++ b/tests/resources/test_files_resource.py @@ -31,6 +31,12 @@ def service(): return RecordService(ServiceWithFilesConfig) +@pytest.fixture(scope="function") +def service_with_deleted_records(): + ServiceWithFilesConfig.record_cls.deletion_status.is_deleted = True + return RecordService(ServiceWithFilesConfig) + + @pytest.fixture(scope="module") def record_resource(service): """Record Resource.""" @@ -484,3 +490,98 @@ def add(self, fp, *args, **kwargs): files.sort() assert files == ["f1.pdf", "f2.pdf", "f3.pdf"] assert all(f.closed for f in captured_fps) + + +# This test has to be the last one as it's overriding the service +def test_files_api_flow_for_deleted_record( + client, search_clear, headers, input_data, location, service_with_deleted_records +): + """Test deleted record files.""" + # Initialize a draft + res = client.post("/mocks", headers=headers, json=input_data) + assert res.status_code == 201 + id_ = res.json["id"] + assert res.json["links"]["files"].endswith(f"/api/mocks/{id_}/files") + + # Initialize files upload + res = client.post( + f"/mocks/{id_}/files", + headers=headers, + json=[ + {"key": "test.pdf", "title": "Test file"}, + ], + ) + assert res.status_code == 201 + res_file = res.json["entries"][0] + assert res_file["key"] == "test.pdf" + assert res_file["status"] == "pending" + assert res_file["metadata"] == {"title": "Test file"} + assert res_file["links"]["self"].endswith(f"/api/mocks/{id_}/files/test.pdf") + assert res_file["links"]["content"].endswith( + f"/api/mocks/{id_}/files/test.pdf/content" + ) + assert res_file["links"]["commit"].endswith( + f"/api/mocks/{id_}/files/test.pdf/commit" + ) + + # Get the file metadata + res = client.get(f"/mocks/{id_}/files/test.pdf", headers=headers) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" + + # Upload a file + res = client.put( + f"/mocks/{id_}/files/test.pdf/content", + headers={ + "content-type": "application/octet-stream", + "accept": "application/json", + }, + data=BytesIO(b"testfile"), + ) + assert res.status_code == 200 + assert res.json["status"] == "pending" + + # Commit the uploaded file + res = client.post(f"/mocks/{id_}/files/test.pdf/commit", headers=headers) + assert res.status_code == 200 + assert res.json["status"] == "completed" + + # Get the file metadata + res = client.get(f"/mocks/{id_}/files/test.pdf", headers=headers) + assert res.status_code == 410 + + # Read a file's content + res = client.get(f"/mocks/{id_}/files/test.pdf/content", headers=headers) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" + + # Update file metadata + res = client.put( + f"/mocks/{id_}/files/test.pdf", headers=headers, json={"title": "New title"} + ) + assert res.status_code == 200 + assert res.json["key"] == "test.pdf" + assert res.json["status"] == "completed" + assert res.json["metadata"] == {"title": "New title"} + + # Get all files + res = client.get(f"/mocks/{id_}/files", headers=headers) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" + + # Delete a file + res = client.delete(f"/mocks/{id_}/files/test.pdf", headers=headers) + assert res.status_code == 204 + + # Get all files + res = client.get(f"/mocks/{id_}/files", headers=headers) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" diff --git a/tests/services/files/test_file_service.py b/tests/services/files/test_file_service.py index f8d13b82..a37ef95d 100644 --- a/tests/services/files/test_file_service.py +++ b/tests/services/files/test_file_service.py @@ -18,6 +18,7 @@ from invenio_records_resources.services.errors import ( FileKeyNotFoundError, PermissionDeniedError, + RecordDeletedException, ) # @@ -49,6 +50,12 @@ def __exit__(self, *args): return MockRequest() +@pytest.fixture(scope="function") +def service_with_deleted_records(file_service): + file_service.record_cls.deletion_status.is_deleted = True + return file_service + + # # Local files # @@ -450,3 +457,88 @@ def test_read_not_committed_external_file( # Retrieve file with pytest.raises(PermissionDeniedError): file_service.get_file_content(identity_simple, recid, "article.txt") + + +# This test should go as last as it's modifying the service with scope module. +def test_deleted_records_file_flow( + service_with_deleted_records, location, example_file_record, identity_simple +): + """Test the lifecycle of a file. + + - Initialize file saving + - Save 1 files + - Commit the files + - List files of the record (ERROR) + - Read file metadata (ERROR) + - Retrieve a file (ERROR) + - Delete a file + """ + file_service = service_with_deleted_records + recid = example_file_record["id"] + file_to_initialise = [ + { + "key": "article.txt", + "checksum": "md5:c785060c866796cc2a1708c997154c8e", + "size": 17, # 2kB + "metadata": { + "description": "Published article PDF.", + }, + } + ] + # Initialize file saving + result = file_service.init_files(identity_simple, recid, file_to_initialise) + assert result.to_dict()["entries"][0]["key"] == file_to_initialise[0]["key"] + # for to_file in to_files: + content = BytesIO(b"test file content") + result = file_service.set_file_content( + identity_simple, + recid, + file_to_initialise[0]["key"], + content, + content.getbuffer().nbytes, + ) + # TODO figure response for succesfully saved file + assert result.to_dict()["key"] == file_to_initialise[0]["key"] + + result = file_service.commit_file(identity_simple, recid, "article.txt") + # TODO currently there is no status in the json between the initialisation + # and the commiting. + assert result.to_dict()["key"] == file_to_initialise[0]["key"] + + # List files + with pytest.raises(RecordDeletedException): + file_service.list_files(identity_simple, recid) + + result = file_service.list_files(identity_simple, recid, with_deleted=True) + assert result.to_dict()["entries"][0]["key"] == file_to_initialise[0]["key"] + assert result.to_dict()["entries"][0]["storage_class"] == "L" + assert "uri" not in result.to_dict()["entries"][0] + + # Read file metadata + with pytest.raises(RecordDeletedException): + file_service.read_file_metadata(identity_simple, recid, "article.txt") + + result = file_service.read_file_metadata( + identity_simple, recid, "article.txt", with_deleted=True + ) + assert result.to_dict()["key"] == file_to_initialise[0]["key"] + assert result.to_dict()["storage_class"] == "L" + assert "uri" not in result.to_dict() + + # Retrieve file + with pytest.raises(RecordDeletedException): + file_service.get_file_content(identity_simple, recid, "article.txt") + + result = file_service.get_file_content( + identity_simple, recid, "article.txt", with_deleted=True + ) + assert result.file_id == "article.txt" + + # Delete file + result = file_service.delete_file(identity_simple, recid, "article.txt") + assert result.file_id == "article.txt" + + # Assert deleted + result = file_service.list_files(identity_simple, recid, with_deleted=True) + assert result.entries + assert len(list(result.entries)) == 0