From efda229177304b5d1cba08e0d118caaf1fd98370 Mon Sep 17 00:00:00 2001 From: Mirek Simek Date: Sat, 6 Apr 2024 14:08:05 +0200 Subject: [PATCH 1/3] Refactored to use transfer registry, enhanced serialized file data & links Merged in upstream master --- invenio_records_resources/ext.py | 12 ++ invenio_records_resources/proxies.py | 4 + .../services/files/components/content.py | 5 +- .../services/files/components/metadata.py | 22 +- .../services/files/generators.py | 6 +- .../services/files/results.py | 17 +- .../services/files/schema.py | 69 +++---- .../services/files/transfer.py | 194 ------------------ .../services/files/transfer/__init__.py | 27 +++ .../services/files/transfer/base.py | 102 +++++++++ .../services/files/transfer/providers.py | 86 ++++++++ .../services/files/transfer/registry.py | 40 ++++ .../services/files/transfer/types.py | 6 + setup.cfg | 2 +- tests/services/files/test_file_service.py | 6 +- 15 files changed, 346 insertions(+), 252 deletions(-) delete mode 100644 invenio_records_resources/services/files/transfer.py create mode 100644 invenio_records_resources/services/files/transfer/__init__.py create mode 100644 invenio_records_resources/services/files/transfer/base.py create mode 100644 invenio_records_resources/services/files/transfer/providers.py create mode 100644 invenio_records_resources/services/files/transfer/registry.py create mode 100644 invenio_records_resources/services/files/transfer/types.py diff --git a/invenio_records_resources/ext.py b/invenio_records_resources/ext.py index 1ecb539f..26785a5c 100644 --- a/invenio_records_resources/ext.py +++ b/invenio_records_resources/ext.py @@ -10,6 +10,7 @@ from . import config from .registry import NotificationRegistry, ServiceRegistry +from .services.files.transfer.registry import TransferRegistry class InvenioRecordsResources(object): @@ -25,6 +26,8 @@ def init_app(self, app): self.init_config(app) self.registry = ServiceRegistry() self.notification_registry = NotificationRegistry() + self.transfer_registry = TransferRegistry() + self.register_builtin_transfers() app.extensions["invenio-records-resources"] = self def init_config(self, app): @@ -32,3 +35,12 @@ def init_config(self, app): for k in dir(config): if k.startswith("RECORDS_RESOURCES_") or k.startswith("SITE_"): app.config.setdefault(k, getattr(config, k)) + + def register_builtin_transfers(self): + from invenio_records_resources.services.files.transfer import ( + FetchTransfer, + LocalTransfer, + ) + + self.transfer_registry.register(LocalTransfer) + self.transfer_registry.register(FetchTransfer) diff --git a/invenio_records_resources/proxies.py b/invenio_records_resources/proxies.py index 64e0a90d..cd32fba5 100644 --- a/invenio_records_resources/proxies.py +++ b/invenio_records_resources/proxies.py @@ -21,3 +21,7 @@ lambda: current_app.extensions["invenio-records-resources"].notification_registry ) """Helper proxy to get the current notifications registry.""" + +current_transfer_registry = LocalProxy( + lambda: current_app.extensions["invenio-records-resources"].transfer_registry +) diff --git a/invenio_records_resources/services/files/components/content.py b/invenio_records_resources/services/files/components/content.py index fa27f357..052af819 100644 --- a/invenio_records_resources/services/files/components/content.py +++ b/invenio_records_resources/services/files/components/content.py @@ -8,8 +8,8 @@ """Files service components.""" from ...errors import FailedFileUploadException, TransferException -from ..transfer import Transfer from .base import FileServiceComponent +from ....proxies import current_transfer_registry class FileContentComponent(FileServiceComponent): @@ -23,8 +23,7 @@ def set_file_content(self, identity, id, file_key, stream, content_length, recor if file_record is None: raise Exception(f'File with key "{file_key}" has not been initialized yet.') - file_type = file_record.file.storage_class if file_record.file else None - transfer = Transfer.get_transfer(file_type) + transfer = current_transfer_registry.get_transfer(file_record=file_record) try: transfer.set_file_content( record, file_record.file, file_key, stream, content_length diff --git a/invenio_records_resources/services/files/components/metadata.py b/invenio_records_resources/services/files/components/metadata.py index f5d33142..e3c70f2c 100644 --- a/invenio_records_resources/services/files/components/metadata.py +++ b/invenio_records_resources/services/files/components/metadata.py @@ -12,8 +12,8 @@ from ...errors import FilesCountExceededException from ..schema import InitFileSchema -from ..transfer import Transfer from .base import FileServiceComponent +from ....proxies import current_transfer_registry class FileMetadataComponent(FileServiceComponent): @@ -38,10 +38,13 @@ def init_files(self, identity, id, record, data): for file_metadata in validated_data: temporary_obj = deepcopy(file_metadata) - file_type = temporary_obj.pop("storage_class", None) - transfer = Transfer.get_transfer( - file_type, service=self.service, uow=self.uow - ) + transfer_type = temporary_obj.pop("storage_class", None) + + transfer = current_transfer_registry.get_transfer( + transfer_type=transfer_type, + service=self.service, + uow=self.uow) + _ = transfer.init_file(record, temporary_obj) def update_file_metadata(self, identity, id, file_key, record, data): @@ -52,4 +55,11 @@ def update_file_metadata(self, identity, id, file_key, record, data): # TODO: `commit_file` might vary based on your storage backend (e.g. S3) def commit_file(self, identity, id, file_key, record): """Commit file handler.""" - Transfer.commit_file(record, file_key) + + transfer = current_transfer_registry.get_transfer( + record=record, + file_record=record.files.get(file_key), + service=self.service, + uow=self.uow) + + transfer.commit_file() diff --git a/invenio_records_resources/services/files/generators.py b/invenio_records_resources/services/files/generators.py index 790dfe25..210521ef 100644 --- a/invenio_records_resources/services/files/generators.py +++ b/invenio_records_resources/services/files/generators.py @@ -13,7 +13,7 @@ from invenio_records_permissions.generators import Generator from invenio_search.engine import dsl -from .transfer import TransferType +from .transfer import LOCAL_TRANSFER_TYPE class AnyUserIfFileIsLocal(Generator): @@ -28,12 +28,12 @@ def needs(self, **kwargs): file_record = record.files.get(file_key) # file_record __bool__ returns false for `if file_record` file = file_record.file if file_record is not None else None - is_file_local = not file or file.storage_class == TransferType.LOCAL + is_file_local = not file or file.storage_class == LOCAL_TRANSFER_TYPE else: file_records = record.files.entries for file_record in file_records: file = file_record.file - if file and file.storage_class != TransferType.LOCAL: + if file and file.storage_class != LOCAL_TRANSFER_TYPE: is_file_local = False break diff --git a/invenio_records_resources/services/files/results.py b/invenio_records_resources/services/files/results.py index c9edcea7..e8311390 100644 --- a/invenio_records_resources/services/files/results.py +++ b/invenio_records_resources/services/files/results.py @@ -11,6 +11,7 @@ from ..base import ServiceListResult from ..records.results import RecordItem +from ...proxies import current_transfer_registry class FileItem(RecordItem): @@ -92,8 +93,22 @@ def entries(self): identity=self._identity, ), ) + + # create links if self._links_item_tpl: - projection["links"] = self._links_item_tpl.expand(self._identity, entry) + links = self._links_item_tpl.expand(self._identity, entry) + else: + links = {} + + # add transfer links + transfer = current_transfer_registry.get_transfer(file_record=entry) + for k, v in transfer.expand_links(self._identity, entry).items(): + if v is not None: + links[k] = v + else: + links.pop(k, None) + + projection["links"] = links yield projection diff --git a/invenio_records_resources/services/files/schema.py b/invenio_records_resources/services/files/schema.py index 70ed6eb8..a1e95a9f 100644 --- a/invenio_records_resources/services/files/schema.py +++ b/invenio_records_resources/services/files/schema.py @@ -8,7 +8,7 @@ # details. """File schema.""" - +import typing from datetime import timezone from urllib.parse import urlparse @@ -19,13 +19,17 @@ Schema, ValidationError, pre_dump, + post_dump, validate, validates, ) from marshmallow.fields import UUID, Dict, Integer, Str +from marshmallow.schema import _T +from marshmallow.utils import missing from marshmallow_utils.fields import GenMethod, Links, SanitizedUnicode, TZDateTime -from .transfer import TransferType +from .transfer import BaseTransfer +from ...proxies import current_transfer_registry class InitFileSchema(Schema): @@ -73,35 +77,11 @@ def validate_names(self, value): if domain not in allowed_domains: raise ValidationError("Domain not allowed", field_name="uri") - @pre_dump(pass_many=False) - def fields_from_file_obj(self, data, **kwargs): - """Fields coming from the FileInstance model.""" - # this cannot be implemented as fields.Method since those receive the already - # dumped data. it could not be access to data.file. - # using data_key and attribute from marshmallow did not work as expected. - - # data is a FileRecord instance, might not have a file yet. - # data.file is a File wrapper object. - if data.file: - # mandatory fields - data["storage_class"] = data.file.storage_class - data["uri"] = data.file.uri - - # If Local -> remove uri as it contains internal file storage info - if not TransferType(data["storage_class"]).is_serializable(): - data.pop("uri") - - # optional fields - fields = ["checksum", "size"] - for field in fields: - value = getattr(data.file, field, None) - if value is not None: - data[field] = value - - return data + def dump(self, obj: typing.Any, *, many = None, **kwargs): + raise Exception("InitFileSchema should not be used for dumping.") -class FileSchema(InitFileSchema): +class FileSchema(Schema): """Service schema for files.""" class Meta: @@ -109,26 +89,33 @@ class Meta: unknown = RAISE + key = Str(required=True) + created = TZDateTime(timezone=timezone.utc, format="iso", dump_only=True) updated = TZDateTime(timezone=timezone.utc, format="iso", dump_only=True) - status = GenMethod("dump_status") metadata = Dict(dump_only=True) mimetype = Str(dump_only=True, attribute="file.mimetype") + checksum = Str(dump_only=True, attribute="file.checksum") + size = Integer(dump_only=True, attribute="file.size") + + storage_class = Str(dump_only=True, attribute="file.storage_class") + version_id = UUID(attribute="file.version_id") file_id = UUID(attribute="file.file_id") bucket_id = UUID(attribute="file.bucket_id") links = Links() - def dump_status(self, obj): - """Dump file status.""" - # due to time constraints the status check is done here - # however, ideally this class should not need knowledge of - # the TransferType class, it should be encapsulated at File - # wrapper class or lower. - has_file = obj.file is not None - if has_file and TransferType(obj.file.storage_class).is_completed: - return "completed" - - return "pending" + # comes from transfer_data + # status = Str() + # uri = Str() + + @post_dump(pass_many=False, pass_original=True) + def _dump_transfer_data(self, data, original_data, **kwargs): + """ + Enriches the dumped data with the transfer data. + """ + transfer = current_transfer_registry.get_transfer(file_record=original_data) + data |= transfer.transfer_data + return data diff --git a/invenio_records_resources/services/files/transfer.py b/invenio_records_resources/services/files/transfer.py deleted file mode 100644 index 0b67100a..00000000 --- a/invenio_records_resources/services/files/transfer.py +++ /dev/null @@ -1,194 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2022 CERN. -# -# Invenio-Records-Resources is free software; you can redistribute it and/or -# modify it under the terms of the MIT License; see LICENSE file for more -# details. - -"""Files transfer.""" - -from abc import ABC, abstractmethod -from enum import Enum - -from flask import current_app -from fs.errors import CreateFailed -from invenio_files_rest.errors import FileSizeError -from invenio_i18n import lazy_gettext as _ -from werkzeug.exceptions import ClientDisconnected - -from ..errors import TransferException -from ..uow import TaskOp -from .tasks import fetch_file - - -class TransferType(str, Enum): - """File type, it inherits from str to be JSON serializable. - - LOCAL represents a file that is stored locally in the instance's storage. - FETCH represents a file that needs to be fetched from an external storage - and saved locally. - REMOTE represents a file that is stored externally and is linked to the record. - """ - - LOCAL = "L" - FETCH = "F" - REMOTE = "R" - - def __eq__(self, other): - """Equality test.""" - return self.value == other - - def __str__(self): - """Return its value.""" - return self.value - - @property - def is_completed(self): - """Return if the type represents a completed transfer.""" - return self in [TransferType.LOCAL, TransferType.REMOTE] - - def is_serializable(self): - """Return if the type represents a localy available file.""" - return self != TransferType.LOCAL - - -class BaseTransfer(ABC): - """Local transfer.""" - - def __init__(self, type, service=None, uow=None): - """Constructor.""" - self.type = type - self.service = service - self.uow = uow - - @abstractmethod - def init_file(self, record, file_metadata): - """Initialize a file.""" - raise NotImplementedError() - - def set_file_content(self, record, file, file_key, stream, content_length): - """Set file content.""" - bucket = record.bucket - - size_limit = bucket.size_limit - if content_length and size_limit and content_length > size_limit: - desc = ( - _("File size limit exceeded.") - if isinstance(size_limit, int) - else size_limit.reason - ) - raise FileSizeError(description=desc) - - try: - record.files.create_obj( - file_key, stream, size=content_length, size_limit=size_limit - ) - except (ClientDisconnected, CreateFailed) as e: - raise TransferException(f'Transfer of File with key "{file_key}" failed.') - - def commit_file(self, record, file_key): - """Commit a file.""" - # fetch files can be committed, its up to permissions to decide by who - # e.g. system, since its the one downloading the file - record.files.commit(file_key) - f_obj = record.files.get(file_key) - f_inst = getattr(f_obj, "file", None) - file_size = getattr(f_inst, "size", None) - if file_size == 0: - allow_empty_files = current_app.config.get( - "RECORDS_RESOURCES_ALLOW_EMPTY_FILES", True - ) - if not allow_empty_files: - raise FileSizeError(description=_("Empty files are not accepted.")) - - # @abstractmethod - # def read_file_content(self, record, file_metadata): - # """Read a file content.""" - # pass - - -class LocalTransfer(BaseTransfer): - """Local transfer.""" - - def __init__(self, **kwargs): - """Constructor.""" - super().__init__(TransferType.LOCAL, **kwargs) - - def init_file(self, record, file_metadata): - """Initialize a file.""" - uri = file_metadata.pop("uri", None) - if uri: - raise Exception("Cannot set URI for local files.") - - file = record.files.create(key=file_metadata.pop("key"), data=file_metadata) - - return file - - def set_file_content(self, record, file, file_key, stream, content_length): - """Set file content.""" - if file: - raise TransferException(f'File with key "{file_key}" is committed.') - - super().set_file_content(record, file, file_key, stream, content_length) - - -class FetchTransfer(BaseTransfer): - """Fetch transfer.""" - - def __init__(self, **kwargs): - """Constructor.""" - super().__init__(TransferType.FETCH, **kwargs) - - def init_file(self, record, file_metadata): - """Initialize a file.""" - uri = file_metadata.pop("uri", None) - if not uri: - raise Exception("URI is required for fetch files.") - - obj_kwargs = { - "file": { - "uri": uri, - "storage_class": self.type, - "checksum": file_metadata.pop("checksum", None), - "size": file_metadata.pop("size", None), - } - } - - file_key = file_metadata.pop("key") - file = record.files.create( - key=file_key, - data=file_metadata, - obj=obj_kwargs, - ) - - self.uow.register( - TaskOp( - fetch_file, - service_id=self.service.id, - record_id=record.pid.pid_value, - file_key=file_key, - ) - ) - return file - - -class Transfer: - """Transfer type.""" - - @classmethod - def get_transfer(cls, file_type, **kwargs): - """Get transfer type.""" - if file_type == TransferType.FETCH: - return FetchTransfer(**kwargs) - else: # default to local - return LocalTransfer(**kwargs) - - @classmethod - def commit_file(cls, record, file_key): - """Commit a file.""" - file = record.files.get(file_key).file - transfer = cls.get_transfer(getattr(file, "storage_class", None)) - # file is not passed since that is the current head of the OV - # committing means setting the latest of the bucket (OV.get) - transfer.commit_file(record, file_key) diff --git a/invenio_records_resources/services/files/transfer/__init__.py b/invenio_records_resources/services/files/transfer/__init__.py new file mode 100644 index 00000000..8345497f --- /dev/null +++ b/invenio_records_resources/services/files/transfer/__init__.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2022 CERN. +# +# Invenio-Records-Resources is free software; you can redistribute it and/or +# modify it under the terms of the MIT License; see LICENSE file for more +# details. + +"""Files transfer.""" + + +from .base import BaseTransfer +from .providers import FetchTransfer, LocalTransfer +from .types import ( + FETCH_TRANSFER_TYPE, + LOCAL_TRANSFER_TYPE, + REMOTE_TRANSFER_TYPE, +) + +__all__ = ( + "BaseTransfer", + "LocalTransfer", + "FetchTransfer", + "LOCAL_TRANSFER_TYPE", + "FETCH_TRANSFER_TYPE", + "REMOTE_TRANSFER_TYPE", +) diff --git a/invenio_records_resources/services/files/transfer/base.py b/invenio_records_resources/services/files/transfer/base.py new file mode 100644 index 00000000..59e4bdbc --- /dev/null +++ b/invenio_records_resources/services/files/transfer/base.py @@ -0,0 +1,102 @@ +from abc import ABC, abstractmethod + +from flask_babel import lazy_gettext as _ +from fs.errors import CreateFailed +from invenio_files_rest.errors import FileSizeError +from werkzeug.exceptions import ClientDisconnected + +from invenio_records_resources.records import Record, FileRecord +from invenio_records_resources.services.errors import TransferException + + +class TransferStatus: + """Transfer status. Constants to be used as return values for get_status.""" + + # Can not be enum to be json serializable, so just a class with constants. + + PENDING = "pending" + COMPLETED = "completed" + FAILED = "failed" + + +class BaseTransfer(ABC): + """Local transfer.""" + + transfer_type: str = None + """ + The transfer type for this transfer instance. + Overriding classes must set this attribute. + """ + + is_serializable = False + """ + True if this transfer can be serialized, false otherwise + """ + + def __init__(self, record: Record = None, file_record: FileRecord = None, service=None, uow=None): + """Constructor.""" + self.record = record + self.file_record = file_record + self.service = service + self.uow = uow + + @abstractmethod + def init_file(self, record, file_metadata): + """Initialize a file.""" + raise NotImplementedError() + + def set_file_content(self, record, file, file_key, stream, content_length): + """Set file content.""" + bucket = record.bucket + + size_limit = bucket.size_limit + if content_length and size_limit and content_length > size_limit: + desc = ( + _("File size limit exceeded.") + if isinstance(size_limit, int) + else size_limit.reason + ) + raise FileSizeError(description=desc) + + try: + record.files.create_obj( + file_key, stream, size=content_length, size_limit=size_limit + ) + except (ClientDisconnected, CreateFailed) as e: + raise TransferException(f'Transfer of File with key "{file_key}" failed.') + + def commit_file(self): + """Commit a file.""" + # fetch files can be committed, its up to permissions to decide by who + # e.g. system, since its the one downloading the file + self.record.files.commit(self.file_record.key) + + @property + def status(self): + """ + Get status of the upload of the passed file record. + + Returns TransferStatus.COMPLETED if the file is uploaded, + TransferStatus.PENDING if the file is not uploaded yet or + TransferStatus.FAILED if the file upload failed. + """ + + if self.file_record is not None and self.file_record.file is not None: + return TransferStatus.COMPLETED + + return TransferStatus.PENDING + + @property + def transfer_data(self): + return { + 'status': self.status, + } + + def expand_links(self, identity, file_record): + """Expand links.""" + return {} + + # @abstractmethod + # def read_file_content(self, record, file_metadata): + # """Read a file content.""" + # pass diff --git a/invenio_records_resources/services/files/transfer/providers.py b/invenio_records_resources/services/files/transfer/providers.py new file mode 100644 index 00000000..98735d56 --- /dev/null +++ b/invenio_records_resources/services/files/transfer/providers.py @@ -0,0 +1,86 @@ +from invenio_records_resources.records import FileRecord + +from ...errors import TransferException +from ...uow import TaskOp +from ..tasks import fetch_file +from .base import BaseTransfer, TransferStatus +from .types import FETCH_TRANSFER_TYPE, LOCAL_TRANSFER_TYPE + + +class LocalTransfer(BaseTransfer): + """Local transfer.""" + + transfer_type = LOCAL_TRANSFER_TYPE + is_serializable = False + + def __init__(self, **kwargs): + """Constructor.""" + super().__init__(**kwargs) + + def init_file(self, record, file_metadata): + """Initialize a file.""" + uri = file_metadata.pop("uri", None) + if uri: + raise Exception("Cannot set URI for local files.") + + file = record.files.create(key=file_metadata.pop("key"), data=file_metadata) + + return file + + def set_file_content(self, record, file, file_key, stream, content_length): + """Set file content.""" + if file: + raise TransferException(f'File with key "{file_key}" is committed.') + + super().set_file_content(record, file, file_key, stream, content_length) + + +class FetchTransfer(BaseTransfer): + """Fetch transfer.""" + + transfer_type = FETCH_TRANSFER_TYPE + is_serializable = True + + def __init__(self, **kwargs): + """Constructor.""" + super().__init__(**kwargs) + + def init_file(self, record, file_metadata): + """Initialize a file.""" + uri = file_metadata.pop("uri", None) + if not uri: + raise Exception("URI is required for fetch files.") + + obj_kwargs = { + "file": { + "uri": uri, + "storage_class": self.transfer_type, + "checksum": file_metadata.pop("checksum", None), + "size": file_metadata.pop("size", None), + } + } + + file_key = file_metadata.pop("key") + file = record.files.create( + key=file_key, + data=file_metadata, + obj=obj_kwargs, + ) + + self.uow.register( + TaskOp( + fetch_file, + service_id=self.service.id, + record_id=record.pid.pid_value, + file_key=file_key, + ) + ) + return file + + @property + def transfer_data(self): + """Transfer file.""" + + return super().transfer_data | { + "uri": self.file_record.file.uri, + } diff --git a/invenio_records_resources/services/files/transfer/registry.py b/invenio_records_resources/services/files/transfer/registry.py new file mode 100644 index 00000000..020ee252 --- /dev/null +++ b/invenio_records_resources/services/files/transfer/registry.py @@ -0,0 +1,40 @@ +from typing import Dict, Type + +from .base import BaseTransfer +from .types import LOCAL_TRANSFER_TYPE + + +class TransferRegistry: + """ + A registry for transfer providers. + """ + + DEFAULT_TRANSFER_TYPE = LOCAL_TRANSFER_TYPE + """ + Default transfer type if no storage class is provided in file upload initiation. + """ + + def __init__(self): + self._transfers: Dict[str, Type[BaseTransfer]] = {} + + def register(self, transfer_cls: Type[BaseTransfer]): + """Register a new transfer provider.""" + + transfer_type = transfer_cls.transfer_type + + if transfer_type in self._transfers: + raise RuntimeError( + f"Transfer with type '{transfer_type}' " "is already registered." + ) + + self._transfers[transfer_type] = transfer_cls + + def get_transfer(self, *, transfer_type=None, file_record=None, **kwargs): + """Get transfer type.""" + + if transfer_type is None: + if file_record is not None and file_record.file is not None: + transfer_type = file_record.file.storage_class + + return self._transfers[transfer_type or self.DEFAULT_TRANSFER_TYPE]( + file_record=file_record, **kwargs) diff --git a/invenio_records_resources/services/files/transfer/types.py b/invenio_records_resources/services/files/transfer/types.py new file mode 100644 index 00000000..c720c8ab --- /dev/null +++ b/invenio_records_resources/services/files/transfer/types.py @@ -0,0 +1,6 @@ +import dataclasses + +# predefined transfer types +LOCAL_TRANSFER_TYPE = "L" +FETCH_TRANSFER_TYPE = "F" +REMOTE_TRANSFER_TYPE = "R" diff --git a/setup.cfg b/setup.cfg index d873d24e..4a9f9bbe 100644 --- a/setup.cfg +++ b/setup.cfg @@ -113,5 +113,5 @@ ignore = *-requirements.txt [tool:pytest] -addopts = --black --isort --pydocstyle --doctest-glob="*.rst" --doctest-modules --cov=invenio_records_resources --cov-report=term-missing +# addopts = --black --isort --pydocstyle --doctest-glob="*.rst" --doctest-modules --cov=invenio_records_resources --cov-report=term-missing testpaths = docs tests invenio_records_resources diff --git a/tests/services/files/test_file_service.py b/tests/services/files/test_file_service.py index 5823dda5..3d295091 100644 --- a/tests/services/files/test_file_service.py +++ b/tests/services/files/test_file_service.py @@ -251,7 +251,7 @@ def test_external_file_invalid_url( @patch("invenio_records_resources.services.files.tasks.requests.get") -@patch("invenio_records_resources.services.files.transfer.fetch_file") +@patch("invenio_records_resources.services.files.transfer.providers.fetch_file") def test_content_and_commit_external_file( p_fetch_file, p_response_raw, @@ -327,7 +327,7 @@ def test_content_and_commit_external_file( @patch("invenio_records_resources.services.files.tasks.requests.get") -@patch("invenio_records_resources.services.files.transfer.fetch_file") +@patch("invenio_records_resources.services.files.transfer.providers.fetch_file") def test_delete_not_committed_external_file( p_fetch_file, p_response_raw, @@ -401,7 +401,7 @@ def test_delete_not_committed_external_file( @patch("invenio_records_resources.services.files.tasks.requests.get") -@patch("invenio_records_resources.services.files.transfer.fetch_file") +@patch("invenio_records_resources.services.files.transfer.providers.fetch_file") def test_read_not_committed_external_file( p_fetch_file, p_response_raw, From 6a1f920a7e6ed3fb340f1aa7dcda27cb156a4013 Mon Sep 17 00:00:00 2001 From: Mirek Simek Date: Sat, 6 Apr 2024 14:25:48 +0200 Subject: [PATCH 2/3] Added remote transfer type --- .../services/files/transfer/base.py | 5 -- .../services/files/transfer/providers.py | 57 +++++++++++-------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/invenio_records_resources/services/files/transfer/base.py b/invenio_records_resources/services/files/transfer/base.py index 59e4bdbc..b22e5cac 100644 --- a/invenio_records_resources/services/files/transfer/base.py +++ b/invenio_records_resources/services/files/transfer/base.py @@ -28,11 +28,6 @@ class BaseTransfer(ABC): Overriding classes must set this attribute. """ - is_serializable = False - """ - True if this transfer can be serialized, false otherwise - """ - def __init__(self, record: Record = None, file_record: FileRecord = None, service=None, uow=None): """Constructor.""" self.record = record diff --git a/invenio_records_resources/services/files/transfer/providers.py b/invenio_records_resources/services/files/transfer/providers.py index 98735d56..7fbf5937 100644 --- a/invenio_records_resources/services/files/transfer/providers.py +++ b/invenio_records_resources/services/files/transfer/providers.py @@ -1,21 +1,14 @@ -from invenio_records_resources.records import FileRecord - from ...errors import TransferException from ...uow import TaskOp from ..tasks import fetch_file from .base import BaseTransfer, TransferStatus -from .types import FETCH_TRANSFER_TYPE, LOCAL_TRANSFER_TYPE +from .types import FETCH_TRANSFER_TYPE, LOCAL_TRANSFER_TYPE, REMOTE_TRANSFER_TYPE class LocalTransfer(BaseTransfer): """Local transfer.""" transfer_type = LOCAL_TRANSFER_TYPE - is_serializable = False - - def __init__(self, **kwargs): - """Constructor.""" - super().__init__(**kwargs) def init_file(self, record, file_metadata): """Initialize a file.""" @@ -35,15 +28,7 @@ def set_file_content(self, record, file, file_key, stream, content_length): super().set_file_content(record, file, file_key, stream, content_length) -class FetchTransfer(BaseTransfer): - """Fetch transfer.""" - - transfer_type = FETCH_TRANSFER_TYPE - is_serializable = True - - def __init__(self, **kwargs): - """Constructor.""" - super().__init__(**kwargs) +class RemoteTransferBase(BaseTransfer): def init_file(self, record, file_metadata): """Initialize a file.""" @@ -67,20 +52,44 @@ def init_file(self, record, file_metadata): obj=obj_kwargs, ) + return file + + + @property + def transfer_data(self): + """Transfer file.""" + + return super().transfer_data | { + "uri": self.file_record.file.uri, + } + + +class FetchTransfer(RemoteTransferBase): + """Fetch transfer.""" + + transfer_type = FETCH_TRANSFER_TYPE + + def init_file(self, record, file_metadata): + + file = super().init_file(record, file_metadata) + self.uow.register( TaskOp( fetch_file, service_id=self.service.id, record_id=record.pid.pid_value, - file_key=file_key, + file_key=file.key, ) ) return file - @property - def transfer_data(self): - """Transfer file.""" - return super().transfer_data | { - "uri": self.file_record.file.uri, - } +class RemoteTransfer(BaseTransfer): + """Remote transfer.""" + + transfer_type = REMOTE_TRANSFER_TYPE + + @property + def status(self): + # always return completed for remote files + return TransferStatus.COMPLETED From 0733f0b6edeefe6a1ce649df486ba4699953790d Mon Sep 17 00:00:00 2001 From: Mirek Simek Date: Sat, 6 Apr 2024 14:40:53 +0200 Subject: [PATCH 3/3] Ported zero-length file size check from an already deleted file --- .../services/files/components/metadata.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/invenio_records_resources/services/files/components/metadata.py b/invenio_records_resources/services/files/components/metadata.py index e3c70f2c..fe1de271 100644 --- a/invenio_records_resources/services/files/components/metadata.py +++ b/invenio_records_resources/services/files/components/metadata.py @@ -10,6 +10,10 @@ from copy import deepcopy +from flask_babel import gettext as _ +from flask import current_app +from invenio_files_rest.errors import FileSizeError + from ...errors import FilesCountExceededException from ..schema import InitFileSchema from .base import FileServiceComponent @@ -52,7 +56,6 @@ def update_file_metadata(self, identity, id, file_key, record, data): # FIXME: move this call to a transfer call record.files.update(file_key, data=data) - # TODO: `commit_file` might vary based on your storage backend (e.g. S3) def commit_file(self, identity, id, file_key, record): """Commit file handler.""" @@ -63,3 +66,13 @@ def commit_file(self, identity, id, file_key, record): uow=self.uow) transfer.commit_file() + + f_obj = record.files.get(file_key) + f_inst = getattr(f_obj, "file", None) + file_size = getattr(f_inst, "size", None) + if file_size == 0: + allow_empty_files = current_app.config.get( + "RECORDS_RESOURCES_ALLOW_EMPTY_FILES", True + ) + if not allow_empty_files: + raise FileSizeError(description=_("Empty files are not accepted."))