diff --git a/Pipfile b/Pipfile index 49dca939..48189638 100644 --- a/Pipfile +++ b/Pipfile @@ -17,7 +17,7 @@ sentry-sdk = ">=1.45,<2.0.0" zenodo_rdm = {editable="True", path="./site"} zenodo_legacy = {editable="True", path="./legacy"} # TODO: Remove once we fix PyPI package issues -invenio-swh = {git = "https://github.com/inveniosoftware/invenio-swh", ref = "v0.10.3"} +invenio-swh = {git = "https://github.com/inveniosoftware/invenio-swh", ref = "v0.11.0"} jsonschema = ">=4.17.0,<4.18.0" # due to compatibility issues with alpha ipython = "!=8.1.0" uwsgi = ">=2.0" diff --git a/invenio.cfg b/invenio.cfg index 50929839..f53c79d6 100644 --- a/invenio.cfg +++ b/invenio.cfg @@ -1064,3 +1064,49 @@ COMMUNITIES_SHOW_BROWSE_MENU_ENTRY = True JOBS_ADMINISTRATION_ENABLED = True """Enable Jobs administration view.""" + +from invenio_app_rdm.config import APP_RDM_RECORD_EXPORTERS as default_exporters + +APP_RDM_RECORD_EXPORTERS = { + **default_exporters, + "bibtex": { + "name": _("BibTeX"), + "serializer": ("zenodo_rdm.serializers:ZenodoBibtexSerializer"), + "params": {}, + "content-type": "application/x-bibtex", + "filename": "{id}.bib" + }, + "codemeta": { + "name": _("Codemeta"), + "serializer": "zenodo_rdm.serializers:ZenodoCodemetaSerializer", + "params": {}, + "content-type": "application/ld+json", + "filename": "{id}.json", + }, + "datacite-json": { + "name": _("DataCite JSON"), + "serializer": ( + "zenodo_rdm.serializers:ZenodoDataciteJSONSerializer" + ), + "params": {"options": {"indent": 2, "sort_keys": True}}, + "content-type": "application/vnd.datacite.datacite+json", + "filename": "{id}.json", + }, + "datacite-xml": { + "name": _("DataCite XML"), + "serializer": ( + "zenodo_rdm.serializers:ZenodoDataciteXMLSerializer" + ), + "params": {}, + "content-type": "application/vnd.datacite.datacite+xml", + "filename": "{id}.xml", + }, + "cff": { + "name": _("Citation File Format"), + "serializer": "zenodo_rdm.serializers:ZenodoCFFSerializer", + "params": {}, + "content-type": "application/x-yaml", + "filename": "{id}.yaml", + }, + +} \ No newline at end of file diff --git a/site/zenodo_rdm/api.py b/site/zenodo_rdm/api.py index e5e49e18..cc8cf794 100644 --- a/site/zenodo_rdm/api.py +++ b/site/zenodo_rdm/api.py @@ -11,6 +11,7 @@ from invenio_pidstore.providers.recordid import RecordIdProvider from invenio_rdm_records.records.api import RDMDraft, RDMParent, RDMRecord from invenio_records_resources.records.systemfields import PIDField +from invenio_swh.records.systemfields import SWHSysField class DraftRecordIdProvider(RecordIdProvider): @@ -50,6 +51,8 @@ class ZenodoRDMRecord(RDMRecord): ZenodoRDMParent, create=False, soft_delete=False, hard_delete=False ) + swh = SWHSysField("swh") + class ZenodoRDMDraft(RDMDraft): """Zenodo RDMDraft API class.""" diff --git a/site/zenodo_rdm/config.py b/site/zenodo_rdm/config.py index fad10757..cdf7272f 100644 --- a/site/zenodo_rdm/config.py +++ b/site/zenodo_rdm/config.py @@ -6,6 +6,7 @@ # under the terms of the MIT License; see LICENSE file for more details. """Custom code config.""" + from .params import ZenodoArgsSchema, ZenodoSearchOptions from .redirector import ( communities_detail_view_function, @@ -27,6 +28,7 @@ redirect_records_search_slash, search_view_function, ) +from .schema import ZenodoRecordSchema # I18N_TRANSLATIONS_PATHS = [os.path.abspath("./site/zenodo_rdm/translations")] @@ -370,3 +372,7 @@ def lock_edit_record_published_files(service, identity, record=None, draft=None) "//cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.2/es5/tex-mml-chtml.js" "?config=TeX-AMS-MML_HTMLorMML" ) + + +RDM_RECORD_SCHEMA = ZenodoRecordSchema +"""Base record schema.""" diff --git a/site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py b/site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py index 64bf3dae..d53b23f7 100644 --- a/site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py +++ b/site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py @@ -177,6 +177,8 @@ class ZenodoSchema(common.LegacySchema): files = fields.Method("dump_files", dump_only=True) metadata = fields.Nested(MetadataSchema) + swh = fields.Dict(dump_only=True) + owners = fields.Method("dump_owners") def dump_owners(self, obj): diff --git a/site/zenodo_rdm/schema.py b/site/zenodo_rdm/schema.py new file mode 100644 index 00000000..54482624 --- /dev/null +++ b/site/zenodo_rdm/schema.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# Zenodo-RDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Zenodo-RDM service schema.""" + +from invenio_rdm_records.services.schemas import RDMRecordSchema +from marshmallow import Schema, fields + + +class SWHSchema(Schema): + """Software Heritage schema.""" + + swhid = fields.Str() + + +class ZenodoRecordSchema(RDMRecordSchema): + """Zenodo service schema. + + This schema subclasses the base schema and extends it with Zenodo-specific + fields. + """ + + swh = fields.Nested(SWHSchema, dump_only=True) diff --git a/site/zenodo_rdm/serializers/__init__.py b/site/zenodo_rdm/serializers/__init__.py new file mode 100644 index 00000000..9e631dab --- /dev/null +++ b/site/zenodo_rdm/serializers/__init__.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Zenodo serializers.""" + +from .bibtex import ZenodoBibtexSerializer +from .cff import ZenodoCFFSerializer +from .codemeta import ZenodoCodemetaSerializer +from .datacite import ZenodoDataciteJSONSerializer, ZenodoDataciteXMLSerializer + +__all__ = ( + "ZenodoBibtexSerializer", + "ZenodoCodemetaSerializer", + "ZenodoDataciteJSONSerializer", + "ZenodoDataciteXMLSerializer", + "ZenodoCFFSerializer", +) diff --git a/site/zenodo_rdm/serializers/bibtex.py b/site/zenodo_rdm/serializers/bibtex.py new file mode 100644 index 00000000..f8ef291d --- /dev/null +++ b/site/zenodo_rdm/serializers/bibtex.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Zenodo bibtex serializer.""" + +from flask_resources import BaseListSchema, MarshmallowSerializer +from flask_resources.serializers import SimpleSerializer +from invenio_rdm_records.resources.serializers import BibtexSerializer +from invenio_rdm_records.resources.serializers.bibtex.schema import BibTexSchema +from marshmallow import fields, missing + + +class ZenodoBibtexSchema(BibTexSchema): + """Zenodo bibtex schema.""" + + swhid = fields.Method("get_swhid") + + def get_swhid(self, obj): + """Get swhid.""" + return obj.get("swh", {}).get("swhid") or missing + + +class ZenodoBibtexSerializer(MarshmallowSerializer): + """Zenodo bibtex serializer.""" + + def __init__(self, **options): + """Initialize serializer.""" + super().__init__( + format_serializer_cls=SimpleSerializer, + object_schema_cls=ZenodoBibtexSchema, + list_schema_cls=BaseListSchema, + encoder=self.bibtex_tostring, + ) + + @classmethod + def bibtex_tostring(cls, record): + """Stringify a BibTex record.""" + return record diff --git a/site/zenodo_rdm/serializers/cff.py b/site/zenodo_rdm/serializers/cff.py new file mode 100644 index 00000000..010a6866 --- /dev/null +++ b/site/zenodo_rdm/serializers/cff.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Zenodo CFF serializer.""" + +import yaml +from flask_resources import BaseListSchema, MarshmallowSerializer +from flask_resources.serializers import SimpleSerializer +from invenio_rdm_records.resources.serializers.cff.schema import CFFSchema +from marshmallow import missing + + +class ZenodoCFFSchema(CFFSchema): + """Zenodo Codemeta schema.""" + + def get_identifiers(self, obj): + """Get identifiers.""" + ret = super().get_identifiers(obj) or [] + swhid = obj.get("swh", {}).get("swhid") + if swhid: + ret.append({"value": swhid, "type": "swh"}) + return ret or missing + + +class ZenodoCFFSerializer(MarshmallowSerializer): + """Zenodo Codemeta serializer.""" + + def __init__(self, **options): + """Initialize serializer.""" + encoder = options.get("encoder", yaml.dump) + super().__init__( + format_serializer_cls=SimpleSerializer, + object_schema_cls=ZenodoCFFSchema, + list_schema_cls=BaseListSchema, + encoder=encoder, + **options, + ) diff --git a/site/zenodo_rdm/serializers/codemeta.py b/site/zenodo_rdm/serializers/codemeta.py new file mode 100644 index 00000000..7c6f4085 --- /dev/null +++ b/site/zenodo_rdm/serializers/codemeta.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Zenodo codemeta serializer.""" + +from flask import current_app +from flask_resources import BaseListSchema, MarshmallowSerializer +from flask_resources.serializers import JSONSerializer +from idutils import normalize_doi, to_url +from invenio_rdm_records.contrib.codemeta.processors import CodemetaDumper +from invenio_rdm_records.resources.serializers.codemeta.schema import CodemetaSchema +from marshmallow import fields, missing + + +class ZenodoCodemetaSchema(CodemetaSchema): + """Zenodo Codemeta schema.""" + + identifier = fields.Method("get_identifiers") + + def get_identifiers(self, obj): + """Compute the "identifier". + + It uses the DOI expressed as a URL and the Software Hash ID as `swhid`. + If only one identifier is present, it returns it as a single entry. + """ + doi = obj.get("pids", {}).get("doi", {}).get("identifier") + ret = [] + if doi: + doi_url = to_url(normalize_doi(doi), "doi") + ret.append({"@type": "doi", "value": doi, "propertyID": doi_url}) + swhid = obj.get("swh", {}).get("swhid") + if swhid: + swh_url = f"{current_app.config['SWH_UI_BASE_URL']}/{swhid}" + ret.append({"@type": "swhid", "value": swhid, "propertyID": swh_url}) + if len(ret) == 1: + return ret[0] + return ret or missing + + +class ZenodoCodemetaSerializer(MarshmallowSerializer): + """Zenodo Codemeta serializer.""" + + def __init__(self, **options): + """Initialize serializer.""" + super().__init__( + format_serializer_cls=JSONSerializer, + object_schema_cls=ZenodoCodemetaSchema, + list_schema_cls=BaseListSchema, + schema_kwargs={"dumpers": [CodemetaDumper()]}, # Order matters + **options, + ) diff --git a/site/zenodo_rdm/serializers/datacite.py b/site/zenodo_rdm/serializers/datacite.py new file mode 100644 index 00000000..61d571e6 --- /dev/null +++ b/site/zenodo_rdm/serializers/datacite.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Zenodo datacite serializer.""" + +from datacite import schema43 +from flask import current_app +from flask_resources import BaseListSchema, MarshmallowSerializer +from flask_resources.serializers import JSONSerializer, SimpleSerializer +from invenio_rdm_records.contrib.journal.processors import JournalDataciteDumper +from invenio_rdm_records.resources.serializers.datacite.schema import DataCite43Schema +from marshmallow import missing + + +class ZenodoDataciteSchema(DataCite43Schema): + """Zenodo Datacite schema.""" + + def get_related_identifiers(self, obj): + """Get related identifiers.""" + ret = super().get_related_identifiers(obj) or [] + swhid = obj.get("swh", {}).get("swhid") + if swhid: + _url = f"{current_app.config['SWH_UI_BASE_URL']}/{swhid}" + ret.append( + { + "relatedIdentifier": _url, + "relatedIdentifierType": "URL", + "relationType": "IsIdenticalTo", + } + ) + return ret or missing + + +class ZenodoDataciteJSONSerializer(MarshmallowSerializer): + """Zenodo Datacite serializer.""" + + def __init__(self, **options): + """Instantiate serializer.""" + super().__init__( + format_serializer_cls=JSONSerializer, + object_schema_cls=ZenodoDataciteSchema, + list_schema_cls=BaseListSchema, + schema_kwargs={"dumpers": [JournalDataciteDumper()]}, # Order matters + **options, + ) + + +class ZenodoDataciteXMLSerializer(MarshmallowSerializer): + """Zenodo Datacite XML serializer.""" + + def __init__(self, **options): + """Instantiate serializer.""" + encoder = options.get("encoder", schema43.tostring) + super().__init__( + format_serializer_cls=SimpleSerializer, + object_schema_cls=ZenodoDataciteSchema, + list_schema_cls=BaseListSchema, + schema_kwargs={"dumpers": [JournalDataciteDumper()]}, # Order matters + encoder=encoder, + )