From 86ff1fa43c3852717877bb74858321cd817756e4 Mon Sep 17 00:00:00 2001 From: Javier Romero Castro Date: Wed, 20 Sep 2023 11:01:39 +0200 Subject: [PATCH] service: display tombstone for deleted record's files * closes https://github.com/inveniosoftware/invenio-rdm-records/issues/1479 --- invenio_records_resources/resources/errors.py | 54 ++++- .../resources/files/config.py | 3 + .../resources/files/resource.py | 17 +- invenio_records_resources/services/errors.py | 9 + .../services/files/service.py | 26 ++- tests/conftest.py | 47 ++++ tests/mock_module/api.py | 33 +++ tests/mock_module/permissions.py | 1 + tests/resources/test_files_resource.py | 206 ++++++++++++++++++ tests/services/files/test_file_service.py | 107 +++++++++ 10 files changed, 493 insertions(+), 10 deletions(-) diff --git a/invenio_records_resources/resources/errors.py b/invenio_records_resources/resources/errors.py index ac93278a..033b46df 100644 --- a/invenio_records_resources/resources/errors.py +++ b/invenio_records_resources/resources/errors.py @@ -9,11 +9,14 @@ # details. """Common Errors handling for Resources.""" +import json from json import JSONDecodeError import marshmallow as ma -from flask import jsonify, make_response, request, url_for -from flask_resources import HTTPJSONException, create_error_handler +from flask import g, jsonify, make_response, request, url_for +from flask_resources import HTTPJSONException as _HTTPJSONException +from flask_resources import create_error_handler +from flask_resources.serializers.json import JSONEncoder from invenio_i18n import lazy_gettext as _ from invenio_pidstore.errors import ( PIDAlreadyExists, @@ -36,11 +39,12 @@ FileKeyNotFoundError, PermissionDeniedError, QuerystringValidationError, + RecordDeletedException, RevisionIdMismatchError, ) -class HTTPJSONValidationException(HTTPJSONException): +class HTTPJSONValidationException(_HTTPJSONException): """HTTP exception serializing to JSON and reflecting Marshmallow errors.""" description = "A validation error occurred." @@ -50,7 +54,7 @@ def __init__(self, exception): super().__init__(code=400, errors=validation_error_to_list_errors(exception)) -class HTTPJSONSearchRequestError(HTTPJSONException): +class HTTPJSONSearchRequestError(_HTTPJSONException): """HTTP exception responsible for mapping search engine errors.""" causes_responses = { @@ -70,6 +74,37 @@ def __init__(self, error): super().__init__(code=500, description=_("Internal server error")) +class HTTPJSONException(_HTTPJSONException): + """HTTPJSONException that supports setting some extra body fields.""" + + def __init__(self, code=None, errors=None, **kwargs): + """Constructor.""" + description = kwargs.pop("description", None) + response = kwargs.pop("response", None) + super().__init__(code, errors, description=description, response=response) + self._extra_fields = kwargs + + def get_body(self, environ=None, scope=None): + """Get the response body.""" + body = { + "status": self.code, + "message": self.get_description(environ), + **self._extra_fields, + } + + errors = self.get_errors() + if errors: + body["errors"] = errors + + # TODO: Revisit how to integrate error monitoring services. + # See issue https://github.com/inveniosoftware/flask-resources/issues/56 + # Temporarily kept for expediency and backward-compatibility + if self.code and (self.code >= 500) and hasattr(g, "sentry_event_id"): + body["error_id"] = str(g.sentry_event_id) + + return json.dumps(body, cls=JSONEncoder) + + def create_pid_redirected_error_handler(): """Creates an error handler for `PIDRedirectedError` error.""" @@ -186,4 +221,15 @@ class ErrorHandlersMixin: search.exceptions.RequestError: create_error_handler( lambda e: HTTPJSONSearchRequestError(e) ), + RecordDeletedException: create_error_handler( + lambda e: ( + HTTPJSONException(code=404, description=_("Record not found")) + if not e.record.tombstone.is_visible + else HTTPJSONException( + code=410, + description=_("Record deleted"), + tombstone=e.record.tombstone.dump(), + ) + ) + ), } diff --git a/invenio_records_resources/resources/files/config.py b/invenio_records_resources/resources/files/config.py index ed0660e3..641eeb2c 100644 --- a/invenio_records_resources/resources/files/config.py +++ b/invenio_records_resources/resources/files/config.py @@ -9,6 +9,7 @@ """File resource configuration.""" +import marshmallow as ma from flask_resources import ResourceConfig @@ -27,3 +28,5 @@ class FileResourceConfig(ResourceConfig): "item-commit": "/files//commit", "list-archive": "/files-archive", } + + request_extra_args = {"include_deleted": ma.fields.Boolean()} diff --git a/invenio_records_resources/resources/files/resource.py b/invenio_records_resources/resources/files/resource.py index 52f2dfa7..081241d8 100644 --- a/invenio_records_resources/resources/files/resource.py +++ b/invenio_records_resources/resources/files/resource.py @@ -28,6 +28,7 @@ from zipstream import ZIP_STORED, ZipStream from ..errors import ErrorHandlersMixin +from ..records.resource import request_extra_args from .parser import RequestStreamParser # @@ -84,13 +85,16 @@ def create_url_rules(self): ] return url_rules + @request_extra_args @request_view_args @response_handler(many=True) def search(self): """List files.""" + include_deleted = resource_requestctx.args.get("include_deleted", False) files = self.service.list_files( g.identity, resource_requestctx.view_args["pid_value"], + include_deleted=include_deleted, ) return files.to_dict(), 200 @@ -116,14 +120,17 @@ def create(self): ) return item.to_dict(), 201 + @request_extra_args @request_view_args @response_handler() def read(self): """Read a single file.""" + include_deleted = resource_requestctx.args.get("include_deleted", False) item = self.service.read_file_metadata( g.identity, resource_requestctx.view_args["pid_value"], resource_requestctx.view_args["key"], + include_deleted=include_deleted, ) return item.to_dict(), 200 @@ -164,13 +171,16 @@ def create_commit(self): ) return item.to_dict(), 200 + @request_extra_args @request_view_args def read_content(self): """Read file content.""" + include_deleted = resource_requestctx.args.get("include_deleted", False) item = self.service.get_file_content( g.identity, resource_requestctx.view_args["pid_value"], resource_requestctx.view_args["key"], + include_deleted=include_deleted, ) # emit file download stats event @@ -181,12 +191,15 @@ def read_content(self): return item.send_file(), 200 + @request_extra_args @request_view_args def read_archive(self): """Read a zipped version of all files.""" id_ = resource_requestctx.view_args["pid_value"] - files = self.service.list_files(g.identity, id_) - + include_deleted = resource_requestctx.args.get("include_deleted", False) + files = self.service.list_files( + g.identity, id_, include_deleted=include_deleted + ) # emit file download stats events for each file emitter = current_stats.get_event_emitter("file-download") for f in files._results: diff --git a/invenio_records_resources/services/errors.py b/invenio_records_resources/services/errors.py index 1eae0c39..ea1d3270 100644 --- a/invenio_records_resources/services/errors.py +++ b/invenio_records_resources/services/errors.py @@ -69,3 +69,12 @@ def __init__(self, recid, file_key): ) self.recid = recid self.file_key = file_key + + +class RecordDeletedException(Exception): + """Exception denoting that the record was deleted.""" + + def __init__(self, record, result_item=None): + """Constructor.""" + self.record = record + self.result_item = result_item diff --git a/invenio_records_resources/services/files/service.py b/invenio_records_resources/services/files/service.py index 50f8442e..11eea6af 100644 --- a/invenio_records_resources/services/files/service.py +++ b/invenio_records_resources/services/files/service.py @@ -10,7 +10,7 @@ """File Service API.""" from ..base import LinksTemplate, Service -from ..errors import FileKeyNotFoundError +from ..errors import FileKeyNotFoundError, RecordDeletedException from ..records.schema import ServiceSchemaWrapper from ..uow import RecordCommitOp, unit_of_work @@ -65,13 +65,27 @@ def _get_record(self, id_, identity, action, file_key=None): return record + def _ensure_record_not_deleted(self, record, identity, include_deleted): + """Ensure that the record exists (not deleted) or raise.""" + if record.is_draft: + return + if record.deletion_status.is_deleted and not include_deleted: + raise RecordDeletedException(record) + if record.deletion_status.is_deleted and include_deleted: + can_read_deleted = self.check_permission( + identity, "read_deleted_files", record=record + ) + if not can_read_deleted: + raise RecordDeletedException(record) + # # High-level API # - def list_files(self, identity, id_): + def list_files(self, identity, id_, include_deleted=False): """List the files of a record.""" record = self._get_record(id_, identity, "read_files") + self._ensure_record_not_deleted(record, identity, include_deleted) self.run_components("list_files", id_, identity, record) return self.file_result_list( @@ -119,13 +133,15 @@ def update_file_metadata(self, identity, id_, file_key, data, uow=None): links_tpl=self.file_links_item_tpl(id_), ) - def read_file_metadata(self, identity, id_, file_key): + def read_file_metadata(self, identity, id_, file_key, include_deleted=False): """Read the metadata of a file. :raises FileKeyNotFoundError: If the record has no file for the ``file_key`` """ record = self._get_record(id_, identity, "read_files", file_key=file_key) + self._ensure_record_not_deleted(record, identity, include_deleted) + self.run_components("read_file_metadata", identity, id_, file_key, record) return self.file_result_item( @@ -259,13 +275,15 @@ def set_file_content( links_tpl=self.file_links_item_tpl(id_), ) - def get_file_content(self, identity, id_, file_key): + def get_file_content(self, identity, id_, file_key, include_deleted=False): """Retrieve file content. :raises FileKeyNotFoundError: If the record has no file for the ``file_key`` """ record = self._get_record(id_, identity, "get_content_files", file_key=file_key) + self._ensure_record_not_deleted(record, identity, include_deleted) + self.run_components("get_file_content", identity, id_, file_key, record) return self.file_result_item( diff --git a/tests/conftest.py b/tests/conftest.py index 134e817f..330d8335 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,8 @@ import pytest from flask_principal import Identity, Need, UserNeed +from invenio_access import ActionRoles, superuser_access +from invenio_accounts.models import Role from invenio_app.factory import create_api as _create_api from mock_module.config import MockFileServiceConfig, ServiceConfig @@ -100,3 +102,48 @@ def identity_simple(): i.provides.add(UserNeed(1)) i.provides.add(Need(method="system_role", value="any_user")) return i + + +@pytest.fixture() +def superuser_role_need(db): + """Store 1 role with 'superuser-access' ActionNeed. + + WHY: This is needed because expansion of ActionNeed is + done on the basis of a User/Role being associated with that Need. + If no User/Role is associated with that Need (in the DB), the + permission is expanded to an empty list. + """ + role = Role(name="superuser-access") + db.session.add(role) + + action_role = ActionRoles.create(action=superuser_access, role=role) + db.session.add(action_role) + + db.session.commit() + + return action_role.need + + +@pytest.fixture() +def superuser(UserFixture, app, db, superuser_role_need): + """Superuser.""" + u = UserFixture( + email="superuser@inveniosoftware.org", + password="superuser", + ) + u.create(app, db) + + datastore = app.extensions["security"].datastore + _, role = datastore._prepare_role_modify_args(u.user, "superuser-access") + + datastore.add_role_to_user(u.user, role) + db.session.commit() + return u + + +@pytest.fixture() +def superuser_identity(superuser_role_need): + """Superuser identity fixture.""" + identity = Identity(1) + identity.provides.add(superuser_role_need) + return identity diff --git a/tests/mock_module/api.py b/tests/mock_module/api.py index 1cebbad3..945b4724 100644 --- a/tests/mock_module/api.py +++ b/tests/mock_module/api.py @@ -37,6 +37,34 @@ class FileRecord(FileRecordBase): record_cls = None # is defined below +class DeletionStatus: + """Mock of Deletion status field.""" + + is_deleted = False + + +class Tombstone: + """Mock of Tombstone system field.""" + + is_visible = True + citation_text = "citation text" + removal_date = "2023-09-20T08:19:58.431539" + removed_by = {"user": "system"} + note = "spam" + + def dump(self): + """Dump the values.""" + data = { + "note": self.note, + "removed_by": self.removed_by, + "removal_date": self.removal_date, + "citation_text": self.citation_text, + "is_visible": self.is_visible, + } + + return data + + class Record(RecordBase): """Example record API.""" @@ -63,6 +91,10 @@ class Record(RecordBase): ] ) + deletion_status = DeletionStatus() + + tombstone = Tombstone() + class RecordWithRelations(Record): """Example record API with relations.""" @@ -87,6 +119,7 @@ class RecordWithFiles(Record): files = FilesField(store=False, file_cls=FileRecord) bucket_id = ModelField() bucket = ModelField(dump=False) + is_draft = False FileRecord.record_cls = RecordWithFiles diff --git a/tests/mock_module/permissions.py b/tests/mock_module/permissions.py index 8b08c512..6a4eb15d 100644 --- a/tests/mock_module/permissions.py +++ b/tests/mock_module/permissions.py @@ -31,3 +31,4 @@ class PermissionPolicy(RecordPermissionPolicy): can_read_files = [AnyUser(), SystemProcess()] can_update_files = [AnyUser(), SystemProcess()] can_delete_files = [AnyUser(), SystemProcess()] + can_read_deleted_files = [SystemProcess()] diff --git a/tests/resources/test_files_resource.py b/tests/resources/test_files_resource.py index 94493c35..a9e760e3 100644 --- a/tests/resources/test_files_resource.py +++ b/tests/resources/test_files_resource.py @@ -14,6 +14,7 @@ from unittest.mock import patch import pytest +from flask import current_app from mock_module.config import ServiceWithFilesConfig from mock_module.resource import ( CustomDisabledUploadFileResourceConfig, @@ -484,3 +485,208 @@ def add(self, fp, *args, **kwargs): files.sort() assert files == ["f1.pdf", "f2.pdf", "f3.pdf"] assert all(f.closed for f in captured_fps) + + +def test_files_api_flow_for_deleted_record( + client, search_clear, headers, input_data, location +): + """Test deleted record files.""" + # Initialize a draft + # Hack to override the mock service config and set records as deleted + current_app.extensions["invenio-records-resources"].registry._services[ + "mock-records" + ].record_cls.deletion_status.is_deleted = True + res = client.post("/mocks", headers=headers, json=input_data) + assert res.status_code == 201 + id_ = res.json["id"] + assert res.json["links"]["files"].endswith(f"/api/mocks/{id_}/files") + + # Initialize files upload + res = client.post( + f"/mocks/{id_}/files", + headers=headers, + json=[ + {"key": "test.pdf", "title": "Test file"}, + ], + ) + assert res.status_code == 201 + res_file = res.json["entries"][0] + assert res_file["key"] == "test.pdf" + assert res_file["status"] == "pending" + assert res_file["metadata"] == {"title": "Test file"} + assert res_file["links"]["self"].endswith(f"/api/mocks/{id_}/files/test.pdf") + assert res_file["links"]["content"].endswith( + f"/api/mocks/{id_}/files/test.pdf/content" + ) + assert res_file["links"]["commit"].endswith( + f"/api/mocks/{id_}/files/test.pdf/commit" + ) + + # Get the file metadata + res = client.get(f"/mocks/{id_}/files/test.pdf", headers=headers) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" + + # Upload a file + res = client.put( + f"/mocks/{id_}/files/test.pdf/content", + headers={ + "content-type": "application/octet-stream", + "accept": "application/json", + }, + data=BytesIO(b"testfile"), + ) + assert res.status_code == 200 + assert res.json["status"] == "pending" + + # Commit the uploaded file + res = client.post(f"/mocks/{id_}/files/test.pdf/commit", headers=headers) + assert res.status_code == 200 + assert res.json["status"] == "completed" + + # Get the file metadata + res = client.get(f"/mocks/{id_}/files/test.pdf", headers=headers) + assert res.status_code == 410 + + # Read a file's content + res = client.get(f"/mocks/{id_}/files/test.pdf/content", headers=headers) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" + + # Update file metadata + res = client.put( + f"/mocks/{id_}/files/test.pdf", headers=headers, json={"title": "New title"} + ) + assert res.status_code == 200 + assert res.json["key"] == "test.pdf" + assert res.json["status"] == "completed" + assert res.json["metadata"] == {"title": "New title"} + + # Get all files + res = client.get(f"/mocks/{id_}/files", headers=headers) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" + + # Delete a file + res = client.delete(f"/mocks/{id_}/files/test.pdf", headers=headers) + assert res.status_code == 204 + + # Get all files + res = client.get(f"/mocks/{id_}/files", headers=headers) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" + + # Hack to override the mock service config to set it back to the default value + current_app.extensions["invenio-records-resources"].registry._services[ + "mock-records" + ].record_cls.deletion_status.is_deleted = False + + +def test_files_superuser_access_to_deleted_record( + client, + search_clear, + headers, + input_data, + location, + superuser, +): + """Test deleted record files with superuser access.""" + superuser_client = superuser.login(client) + + # Hack to override the mock service config and set records as deleted + current_app.extensions["invenio-records-resources"].registry._services[ + "mock-records" + ].record_cls.deletion_status.is_deleted = True + res = superuser_client.post("/mocks", headers=headers, json=input_data) + assert res.status_code == 201 + id_ = res.json["id"] + assert res.json["links"]["files"].endswith(f"/api/mocks/{id_}/files") + + # Initialize files upload + res = superuser_client.post( + f"/mocks/{id_}/files", + headers=headers, + json=[ + {"key": "test.pdf", "title": "Test file"}, + ], + ) + assert res.status_code == 201 + res_file = res.json["entries"][0] + assert res_file["key"] == "test.pdf" + assert res_file["status"] == "pending" + assert res_file["metadata"] == {"title": "Test file"} + assert res_file["links"]["self"].endswith(f"/api/mocks/{id_}/files/test.pdf") + assert res_file["links"]["content"].endswith( + f"/api/mocks/{id_}/files/test.pdf/content" + ) + assert res_file["links"]["commit"].endswith( + f"/api/mocks/{id_}/files/test.pdf/commit" + ) + + # Upload a file + res = superuser_client.put( + f"/mocks/{id_}/files/test.pdf/content", + headers={ + "content-type": "application/octet-stream", + "accept": "application/json", + }, + data=BytesIO(b"testfile"), + ) + assert res.status_code == 200 + assert res.json["status"] == "pending" + + # Commit the uploaded file + res = superuser_client.post(f"/mocks/{id_}/files/test.pdf/commit", headers=headers) + assert res.status_code == 200 + assert res.json["status"] == "completed" + + # Get the files-archive + res = superuser_client.get( + f"/mocks/{id_}/files-archive?include_deleted=0", headers=headers + ) + assert res.status_code == 410 + + res = superuser_client.get( + f"/mocks/{id_}/files-archive?include_deleted=1", headers=headers + ) + assert res.status_code == 200 + res.close() + + # Get the file metadata + res = superuser_client.get( + f"/mocks/{id_}/files/test.pdf?include_deleted=0", headers=headers + ) + assert res.status_code == 410 + + res = superuser_client.get( + f"/mocks/{id_}/files/test.pdf?include_deleted=1", headers=headers + ) + assert res.status_code == 200 + + # Read a file's content + res = superuser_client.get( + f"/mocks/{id_}/files/test.pdf/content?include_deleted=0", headers=headers + ) + assert res.status_code == 410 + assert res.json["message"] == "Record deleted" + assert res.json["tombstone"]["note"] == "spam" + assert res.json["tombstone"]["removed_by"]["user"] == "system" + + res = superuser_client.get( + f"/mocks/{id_}/files/test.pdf/content?include_deleted=1", headers=headers + ) + assert res.status_code == 200 + + current_app.extensions["invenio-records-resources"].registry._services[ + "mock-records" + ].record_cls.deletion_status.is_deleted = False + + superuser.logout(client) diff --git a/tests/services/files/test_file_service.py b/tests/services/files/test_file_service.py index f8d13b82..64c73926 100644 --- a/tests/services/files/test_file_service.py +++ b/tests/services/files/test_file_service.py @@ -12,12 +12,14 @@ from unittest.mock import patch import pytest +from flask import current_app from invenio_access.permissions import system_identity from marshmallow import ValidationError from invenio_records_resources.services.errors import ( FileKeyNotFoundError, PermissionDeniedError, + RecordDeletedException, ) # @@ -450,3 +452,108 @@ def test_read_not_committed_external_file( # Retrieve file with pytest.raises(PermissionDeniedError): file_service.get_file_content(identity_simple, recid, "article.txt") + + +def test_deleted_records_file_flow( + file_service, location, example_file_record, identity_simple, superuser_identity +): + """Test the lifecycle of a file. + + - Initialize file saving + - Save 1 files + - Commit the files + - List files of the record (ERROR) + - Read file metadata (ERROR) + - Retrieve a file (ERROR) + - Delete a file + """ + # Hack to override the mock service config and set records as deleted + current_app.extensions["invenio-records-resources"].registry._services[ + "mock-records" + ].record_cls.deletion_status.is_deleted = True + recid = example_file_record["id"] + file_to_initialise = [ + { + "key": "article.txt", + "checksum": "md5:c785060c866796cc2a1708c997154c8e", + "size": 17, # 2kB + "metadata": { + "description": "Published article PDF.", + }, + } + ] + # Initialize file saving + result = file_service.init_files(identity_simple, recid, file_to_initialise) + assert result.to_dict()["entries"][0]["key"] == file_to_initialise[0]["key"] + # for to_file in to_files: + content = BytesIO(b"test file content") + result = file_service.set_file_content( + identity_simple, + recid, + file_to_initialise[0]["key"], + content, + content.getbuffer().nbytes, + ) + # TODO figure response for succesfully saved file + assert result.to_dict()["key"] == file_to_initialise[0]["key"] + + result = file_service.commit_file(identity_simple, recid, "article.txt") + # TODO currently there is no status in the json between the initialisation + # and the commiting. + assert result.to_dict()["key"] == file_to_initialise[0]["key"] + + # List files + with pytest.raises(RecordDeletedException): + file_service.list_files(identity_simple, recid) + + with pytest.raises(RecordDeletedException): + file_service.list_files(identity_simple, recid, include_deleted=True) + + result = file_service.list_files(superuser_identity, recid, include_deleted=True) + assert result.to_dict()["entries"][0]["key"] == file_to_initialise[0]["key"] + assert result.to_dict()["entries"][0]["storage_class"] == "L" + assert "uri" not in result.to_dict()["entries"][0] + + # Read file metadata + with pytest.raises(RecordDeletedException): + file_service.read_file_metadata(identity_simple, recid, "article.txt") + + with pytest.raises(RecordDeletedException): + file_service.read_file_metadata( + identity_simple, recid, "article.txt", include_deleted=True + ) + + result = file_service.read_file_metadata( + superuser_identity, recid, "article.txt", include_deleted=True + ) + assert result.to_dict()["key"] == file_to_initialise[0]["key"] + assert result.to_dict()["storage_class"] == "L" + assert "uri" not in result.to_dict() + + # Retrieve file + with pytest.raises(RecordDeletedException): + file_service.get_file_content(identity_simple, recid, "article.txt") + + with pytest.raises(RecordDeletedException): + file_service.get_file_content( + identity_simple, recid, "article.txt", include_deleted=True + ) + + result = file_service.get_file_content( + superuser_identity, recid, "article.txt", include_deleted=True + ) + assert result.file_id == "article.txt" + + # Delete file + result = file_service.delete_file(identity_simple, recid, "article.txt") + assert result.file_id == "article.txt" + + # Assert deleted + result = file_service.list_files(superuser_identity, recid, include_deleted=True) + assert result.entries + assert len(list(result.entries)) == 0 + + # Hack to override the mock service config to set it back to the default value + current_app.extensions["invenio-records-resources"].registry._services[ + "mock-records" + ].record_cls.deletion_status.is_deleted = False