Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

record: added swh field and serializers #1069

Merged
merged 9 commits into from
Nov 28, 2024
Merged
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ sentry-sdk = ">=1.45,<2.0.0"
zenodo_rdm = {editable="True", path="./site"}
zenodo_legacy = {editable="True", path="./legacy"}
# TODO: Remove once we fix PyPI package issues
invenio-swh = {git = "https://github.com/inveniosoftware/invenio-swh", ref = "v0.10.3"}
invenio-swh = {git = "https://github.com/inveniosoftware/invenio-swh", ref = "v0.11.0"}
jsonschema = ">=4.17.0,<4.18.0" # due to compatibility issues with alpha
ipython = "!=8.1.0"
uwsgi = ">=2.0"
Expand Down
46 changes: 46 additions & 0 deletions invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -1064,3 +1064,49 @@ COMMUNITIES_SHOW_BROWSE_MENU_ENTRY = True

JOBS_ADMINISTRATION_ENABLED = True
"""Enable Jobs administration view."""

from invenio_app_rdm.config import APP_RDM_RECORD_EXPORTERS as default_exporters

APP_RDM_RECORD_EXPORTERS = {
**default_exporters,
"bibtex": {
"name": _("BibTeX"),
"serializer": ("zenodo_rdm.serializers:ZenodoBibtexSerializer"),
"params": {},
"content-type": "application/x-bibtex",
"filename": "{id}.bib"
},
"codemeta": {
"name": _("Codemeta"),
"serializer": "zenodo_rdm.serializers:ZenodoCodemetaSerializer",
"params": {},
"content-type": "application/ld+json",
"filename": "{id}.json",
},
"datacite-json": {
"name": _("DataCite JSON"),
"serializer": (
"zenodo_rdm.serializers:ZenodoDataciteJSONSerializer"
),
"params": {"options": {"indent": 2, "sort_keys": True}},
"content-type": "application/vnd.datacite.datacite+json",
"filename": "{id}.json",
},
"datacite-xml": {
"name": _("DataCite XML"),
"serializer": (
"zenodo_rdm.serializers:ZenodoDataciteXMLSerializer"
),
"params": {},
"content-type": "application/vnd.datacite.datacite+xml",
"filename": "{id}.xml",
},
"cff": {
"name": _("Citation File Format"),
"serializer": "zenodo_rdm.serializers:ZenodoCFFSerializer",
"params": {},
"content-type": "application/x-yaml",
"filename": "{id}.yaml",
},

}
3 changes: 3 additions & 0 deletions site/zenodo_rdm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from invenio_pidstore.providers.recordid import RecordIdProvider
from invenio_rdm_records.records.api import RDMDraft, RDMParent, RDMRecord
from invenio_records_resources.records.systemfields import PIDField
from invenio_swh.records.systemfields import SWHSysField


class DraftRecordIdProvider(RecordIdProvider):
Expand Down Expand Up @@ -50,6 +51,8 @@ class ZenodoRDMRecord(RDMRecord):
ZenodoRDMParent, create=False, soft_delete=False, hard_delete=False
)

swh = SWHSysField("swh")


class ZenodoRDMDraft(RDMDraft):
"""Zenodo RDMDraft API class."""
Expand Down
6 changes: 6 additions & 0 deletions site/zenodo_rdm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# under the terms of the MIT License; see LICENSE file for more details.
"""Custom code config."""


from .params import ZenodoArgsSchema, ZenodoSearchOptions
from .redirector import (
communities_detail_view_function,
Expand All @@ -27,6 +28,7 @@
redirect_records_search_slash,
search_view_function,
)
from .schema import ZenodoRecordSchema

# I18N_TRANSLATIONS_PATHS = [os.path.abspath("./site/zenodo_rdm/translations")]

Expand Down Expand Up @@ -370,3 +372,7 @@ def lock_edit_record_published_files(service, identity, record=None, draft=None)
"//cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.2/es5/tex-mml-chtml.js"
"?config=TeX-AMS-MML_HTMLorMML"
)


RDM_RECORD_SCHEMA = ZenodoRecordSchema
slint marked this conversation as resolved.
Show resolved Hide resolved
"""Base record schema."""
2 changes: 2 additions & 0 deletions site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ class ZenodoSchema(common.LegacySchema):
files = fields.Method("dump_files", dump_only=True)
metadata = fields.Nested(MetadataSchema)

swh = fields.Dict(dump_only=True)

owners = fields.Method("dump_owners")

def dump_owners(self, obj):
Expand Down
26 changes: 26 additions & 0 deletions site/zenodo_rdm/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# Zenodo-RDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Zenodo-RDM service schema."""

from invenio_rdm_records.services.schemas import RDMRecordSchema
from marshmallow import Schema, fields


class SWHSchema(Schema):
"""Software Heritage schema."""

swhid = fields.Str()


class ZenodoRecordSchema(RDMRecordSchema):
"""Zenodo service schema.
This schema subclasses the base schema and extends it with Zenodo-specific
fields.
"""

swh = fields.Nested(SWHSchema, dump_only=True)
20 changes: 20 additions & 0 deletions site/zenodo_rdm/serializers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Zenodo serializers."""

from .bibtex import ZenodoBibtexSerializer
from .cff import ZenodoCFFSerializer
from .codemeta import ZenodoCodemetaSerializer
from .datacite import ZenodoDataciteJSONSerializer, ZenodoDataciteXMLSerializer

__all__ = (
"ZenodoBibtexSerializer",
"ZenodoCodemetaSerializer",
"ZenodoDataciteJSONSerializer",
"ZenodoDataciteXMLSerializer",
"ZenodoCFFSerializer",
)
41 changes: 41 additions & 0 deletions site/zenodo_rdm/serializers/bibtex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Zenodo bibtex serializer."""

from flask_resources import BaseListSchema, MarshmallowSerializer
from flask_resources.serializers import SimpleSerializer
from invenio_rdm_records.resources.serializers import BibtexSerializer
from invenio_rdm_records.resources.serializers.bibtex.schema import BibTexSchema
from marshmallow import fields, missing


class ZenodoBibtexSchema(BibTexSchema):
"""Zenodo bibtex schema."""

swhid = fields.Method("get_swhid")

def get_swhid(self, obj):
"""Get swhid."""
return obj.get("swh", {}).get("swhid") or missing
slint marked this conversation as resolved.
Show resolved Hide resolved


class ZenodoBibtexSerializer(MarshmallowSerializer):
"""Zenodo bibtex serializer."""

def __init__(self, **options):
"""Initialize serializer."""
super().__init__(
format_serializer_cls=SimpleSerializer,
object_schema_cls=ZenodoBibtexSchema,
list_schema_cls=BaseListSchema,
encoder=self.bibtex_tostring,
)

@classmethod
def bibtex_tostring(cls, record):
"""Stringify a BibTex record."""
return record
40 changes: 40 additions & 0 deletions site/zenodo_rdm/serializers/cff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Zenodo CFF serializer."""

import yaml
from flask_resources import BaseListSchema, MarshmallowSerializer
from flask_resources.serializers import SimpleSerializer
from invenio_rdm_records.resources.serializers.cff.schema import CFFSchema
from marshmallow import missing


class ZenodoCFFSchema(CFFSchema):
"""Zenodo Codemeta schema."""

def get_identifiers(self, obj):
"""Get identifiers."""
ret = super().get_identifiers(obj) or []
swhid = obj.get("swh", {}).get("swhid")
if swhid:
ret.append({"value": swhid, "type": "swh"})
slint marked this conversation as resolved.
Show resolved Hide resolved
return ret or missing


class ZenodoCFFSerializer(MarshmallowSerializer):
"""Zenodo Codemeta serializer."""

def __init__(self, **options):
"""Initialize serializer."""
encoder = options.get("encoder", yaml.dump)
super().__init__(
format_serializer_cls=SimpleSerializer,
object_schema_cls=ZenodoCFFSchema,
list_schema_cls=BaseListSchema,
encoder=encoder,
**options,
)
54 changes: 54 additions & 0 deletions site/zenodo_rdm/serializers/codemeta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Zenodo codemeta serializer."""

from flask import current_app
from flask_resources import BaseListSchema, MarshmallowSerializer
from flask_resources.serializers import JSONSerializer
from idutils import normalize_doi, to_url
from invenio_rdm_records.contrib.codemeta.processors import CodemetaDumper
from invenio_rdm_records.resources.serializers.codemeta.schema import CodemetaSchema
from marshmallow import fields, missing


class ZenodoCodemetaSchema(CodemetaSchema):
"""Zenodo Codemeta schema."""

identifier = fields.Method("get_identifiers")

def get_identifiers(self, obj):
"""Compute the "identifier".

It uses the DOI expressed as a URL and the Software Hash ID as `swhid`.
If only one identifier is present, it returns it as a single entry.
"""
doi = obj.get("pids", {}).get("doi", {}).get("identifier")
ret = []
if doi:
doi_url = to_url(normalize_doi(doi), "doi")
ret.append({"@type": "doi", "value": doi, "propertyID": doi_url})
swhid = obj.get("swh", {}).get("swhid")
if swhid:
swh_url = f"{current_app.config['SWH_UI_BASE_URL']}/{swhid}"
ret.append({"@type": "swhid", "value": swhid, "propertyID": swh_url})
if len(ret) == 1:
return ret[0]
return ret or missing


class ZenodoCodemetaSerializer(MarshmallowSerializer):
"""Zenodo Codemeta serializer."""

def __init__(self, **options):
"""Initialize serializer."""
super().__init__(
format_serializer_cls=JSONSerializer,
object_schema_cls=ZenodoCodemetaSchema,
list_schema_cls=BaseListSchema,
schema_kwargs={"dumpers": [CodemetaDumper()]}, # Order matters
**options,
)
63 changes: 63 additions & 0 deletions site/zenodo_rdm/serializers/datacite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Zenodo datacite serializer."""

from datacite import schema43
from flask import current_app
from flask_resources import BaseListSchema, MarshmallowSerializer
from flask_resources.serializers import JSONSerializer, SimpleSerializer
from invenio_rdm_records.contrib.journal.processors import JournalDataciteDumper
from invenio_rdm_records.resources.serializers.datacite.schema import DataCite43Schema
from marshmallow import missing


class ZenodoDataciteSchema(DataCite43Schema):
"""Zenodo Datacite schema."""

def get_related_identifiers(self, obj):
"""Get related identifiers."""
ret = super().get_related_identifiers(obj) or []
swhid = obj.get("swh", {}).get("swhid")
if swhid:
_url = f"{current_app.config['SWH_UI_BASE_URL']}/{swhid}"
ret.append(
{
"relatedIdentifier": _url,
"relatedIdentifierType": "URL",
"relationType": "IsIdenticalTo",
}
)
return ret or missing


class ZenodoDataciteJSONSerializer(MarshmallowSerializer):
"""Zenodo Datacite serializer."""

def __init__(self, **options):
"""Instantiate serializer."""
super().__init__(
format_serializer_cls=JSONSerializer,
object_schema_cls=ZenodoDataciteSchema,
list_schema_cls=BaseListSchema,
schema_kwargs={"dumpers": [JournalDataciteDumper()]}, # Order matters
**options,
)


class ZenodoDataciteXMLSerializer(MarshmallowSerializer):
"""Zenodo Datacite XML serializer."""

def __init__(self, **options):
"""Instantiate serializer."""
encoder = options.get("encoder", schema43.tostring)
super().__init__(
format_serializer_cls=SimpleSerializer,
object_schema_cls=ZenodoDataciteSchema,
list_schema_cls=BaseListSchema,
schema_kwargs={"dumpers": [JournalDataciteDumper()]}, # Order matters
encoder=encoder,
)
Loading