Skip to content

Commit

Permalink
legacy: fix grants serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
slint committed Sep 16, 2023
1 parent 8d3220f commit d4411dc
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 113 deletions.
35 changes: 33 additions & 2 deletions legacy/zenodo_legacy/funders.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
"10.13039/501100000038": "01h531d29",
"10.13039/100000001": "021nxhr62",
"10.13039/501100003246": "04jsz6e67",
# NOTE: RCUK was succeeded by UKRI. All awards/grants were transferred, so
# we're also remapping the funder IDs to point to UKRI (001aqnf71)
# NOTE: RCUK (10.13039/100014013) was succeeded by UKRI (10.13039/501100000690).
# All awards/grants were transferred, so we're also remapping the funder IDs to
# point to the UKRI ROR ID (001aqnf71).
"10.13039/501100000690": "001aqnf71",
"10.13039/100014013": "001aqnf71",
"10.13039/501100001602": "0271asj38",
Expand All @@ -39,4 +40,34 @@
"10.13039/501100006364": "03m8vkq32",
}

FUNDER_ACRONYMS = {
"10.13039/501100001665": "ASAP",
"10.13039/501100002341": "AKA",
"10.13039/501100000923": "ARC",
"10.13039/100018231": "ASAP",
"10.13039/501100000024": "CIHR",
"10.13039/501100000780": "EC",
"10.13039/501100000806": "EEA",
"10.13039/501100001871": "FCT",
"10.13039/501100002428": "FWF",
"10.13039/501100004488": "HRZZ",
"10.13039/501100004564": "MESTD",
"10.13039/501100000925": "NHMRC",
"10.13039/100000002": "NIH",
"10.13039/501100000038": "NSERC",
"10.13039/100000001": "NSF",
"10.13039/501100003246": "NWO",
"10.13039/501100000690": "RCUK",
"10.13039/100014013": "UKRI",
"10.13039/501100001602": "SFI",
"10.13039/501100001711": "SNSF",
"10.13039/100001345": "SSHRC",
"10.13039/501100004410": "TUBITAK",
"10.13039/100004440": "WT",
"10.13039/501100006364": "INCa",
}


FUNDER_ROR_TO_DOI = {v: k for k, v in FUNDER_DOI_TO_ROR.items()}
# NOTE: We want to always resolve to the UKRI award
FUNDER_ROR_TO_DOI["001aqnf71"] = "10.13039/100014013"
2 changes: 0 additions & 2 deletions site/tests/legacy/deposits/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import pytest

from zenodo_rdm.legacy.resources import LegacyRecordResourceConfig


@pytest.fixture(scope="function")
def test_data():
Expand Down
2 changes: 0 additions & 2 deletions site/tests/legacy/deposits/test_rest_api_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ def test_input_output(
ignored_keys = set()

# doi is returned as a top level key (and not inside metadata)
# TODO: Verify behaviour and fix
# assert data["doi"] == test_data["metadata"]["doi"]
ignored_keys.add("doi")

differences = list(
Expand Down
81 changes: 41 additions & 40 deletions site/zenodo_rdm/legacy/serializers/schemas/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from invenio_communities.proxies import current_communities
from marshmallow import Schema, fields, missing, post_dump, pre_dump
from marshmallow_utils.fields import EDTFDateString, SanitizedHTML, SanitizedUnicode
from zenodo_legacy.funders import FUNDER_ROR_TO_DOI
from zenodo_legacy.funders import FUNDER_ACRONYMS, FUNDER_ROR_TO_DOI
from zenodo_legacy.licenses import rdm_to_legacy


Expand Down Expand Up @@ -270,51 +270,52 @@ def dump_access_right(self, obj):

return legacy_access

def _funder(self, funder):
"""Serialize RDM funder into Zenodo legacy funder."""
legacy_funder = {"name": funder["name"]}
def _grant(self, award, funder):
"""Serialize an RDM award and funder into a legacy Zenodo grant."""
funder_id = funder.get("id")
funder_id = FUNDER_ROR_TO_DOI.get(funder_id, funder_id)
award_number = award.get("number")
if not (funder_id and award_number):
return

grant = {
"code": award_number,
"internal_id": f"{funder_id}::{award_number}",
"funder": {"name": funder["name"]},
}

for identifier in funder.get("identifiers"):
# Add more funder fields
for identifier in funder.get("identifiers", []):
scheme = identifier["scheme"]

if scheme == "doi":
legacy_funder["doi"] = identifier["identifier"]

value = funder.get("country")
if value:
legacy_funder["country"] = value

return legacy_funder

def _award(self, award):
"""Serialize an RDM award into a legacy Zenodo grant."""
funder_ror = award["funder"]["id"]
funder_doi_or_ror = FUNDER_ROR_TO_DOI.get(funder_ror, funder_ror)
legacy_grant = {
"code": award["number"],
"internal_id": f"{funder_doi_or_ror}::{award['id']}",
}

try:
title = award["title"].get("en", next(iter(award["title"])))
legacy_grant["title"] = title
except StopIteration:
pass

value = award.get("acronym")
if value:
legacy_grant["acronym"] = value

for identifier in award.get("identifiers"):
grant["funder"]["doi"] = identifier["identifier"]
if "doi" not in grant["funder"] and funder_id.startswith("10.13039/"):
grant["funder"]["doi"] = funder_id
country = funder.get("country")
if country:
grant["funder"]["country"] = country
acronym = FUNDER_ACRONYMS.get(funder_id) or funder.get("acronym")
if acronym:
grant["funder"]["acronym"] = acronym

# Add more award fields
i18n_title = award.get("title") or {}
title = i18n_title.get("en") or next(iter(i18n_title.values()), None)
if title:
grant["title"] = title

for key in ("acronym", "program"):
value = award.get(key)
if value:
grant[key] = value

for identifier in award.get("identifiers", []):
scheme = identifier["scheme"]

if scheme == "url":
legacy_grant["url"] = identifier["identifier"]

grant["url"] = identifier["identifier"]
if scheme == "doi":
legacy_grant["doi"] = identifier["doi"]

return legacy_grant
grant["doi"] = identifier["doi"]
return grant

@post_dump(pass_original=True)
def dump_additional_descriptions(self, result, original, **kwargs):
Expand Down
46 changes: 10 additions & 36 deletions site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@

"""Zenodo legacy format serializer schemas."""

from invenio_access.permissions import system_identity
from invenio_pidstore.errors import PIDDeletedError, PIDDoesNotExistError
from invenio_records_resources.proxies import current_service_registry
from marshmallow import fields, missing, post_dump, pre_dump
from marshmallow_utils.fields import SanitizedUnicode
from zenodo_legacy.funders import FUNDER_ROR_TO_DOI

from . import common

Expand All @@ -24,44 +20,22 @@ class MetadataSchema(common.MetadataSchema):

def dump_grants(self, obj):
"""Dump grants from funding field."""
funding = obj.get("funding")
funding = obj.get("funding", [])
if not funding:
return missing

ret = []
for funding_item in funding:
award = funding_item.get("award")

# in case there are multiple funding entries, service calls could be
# optimized calling read_many
aid = award.get("id")
if aid:
a_service = current_service_registry.get("awards")
try:
award = a_service.read(system_identity, aid).to_dict()
except (PIDDeletedError, PIDDoesNotExistError):
# funder only funding, or custom awards are not supported in the
# legacy API
return missing

# we are ignoring funding.funder.id in favour of the awards.funder.id
fid = award["funder"]["id"]
f_service = current_service_registry.get("funders")
# every vocabulary award must be linked to a vocabulary funder
# therefore this read call cannot fail
funder = f_service.read(system_identity, fid).to_dict()

# No custom funder/awards in legacy therefore it would always resolve
# the read ops above.
legacy_grant = self._award(award)
legacy_grant["funder"] = self._funder(funder)

award_number = award["number"]
funder_doi = FUNDER_ROR_TO_DOI.get(funder["id"])
serialized_grant = {"id": f"{funder_doi}::{award_number}"}
ret.append(serialized_grant)

return ret
funder = funding_item.get("funder")
legacy_grant = self._grant(award, funder)
if not legacy_grant:
continue
grant_id = legacy_grant["internal_id"]
if legacy_grant.get("program") == "FP7":
grant_id = legacy_grant["code"]
ret.append({"id": grant_id})
return ret or missing

license = SanitizedUnicode()

Expand Down
50 changes: 19 additions & 31 deletions site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

"""Zenodo serializer schemas."""

from marshmallow import Schema, fields, missing, post_dump, pre_dump
from marshmallow_utils.fields import EDTFDateString, SanitizedHTML, SanitizedUnicode
from marshmallow import Schema, fields, missing
from marshmallow_utils.fields import SanitizedUnicode

from . import common

Expand Down Expand Up @@ -65,34 +65,6 @@ class ThesisSchema(Schema):
supervisors = fields.Nested(common.CreatorSchema, many=True)


class FunderSchema(Schema):
"""Schema for a funder."""

doi = fields.Str()
name = fields.Str(dump_only=True)
acronyms = fields.List(fields.Str(), dump_only=True)
links = fields.Method("get_funder_url", dump_only=True)

def get_funder_url(self, obj):
"""Get grant url."""
return dict(self=common.api_link_for("funder", id=obj["doi"]))


class GrantSchema(Schema):
"""Schema for a grant."""

title = fields.Str(dump_only=True)
code = fields.Str()
program = fields.Str(dump_only=True)
acronym = fields.Str(dump_only=True)
funder = fields.Nested(FunderSchema)
links = fields.Method("get_grant_url", dump_only=True)

def get_grant_url(self, obj):
"""Get grant url."""
return dict(self=common.api_link_for("grant", id=obj["internal_id"]))


class FilesSchema(Schema):
"""Files metadata schema."""

Expand Down Expand Up @@ -125,10 +97,26 @@ class MetadataSchema(common.MetadataSchema):
alternate_identifiers = fields.Method("dump_alternate_identifiers")

license = fields.Nested({"id": fields.Function(lambda x: x)})
grants = fields.Nested(GrantSchema, many=True)
grants = fields.Method("dump_grants")
communities = fields.Method("dump_communities")
relations = fields.Method("dump_relations")

def dump_grants(self, obj):
"""Dump grants from funding field."""
funding = obj.get("funding", [])
if not funding:
return missing

ret = []
for funding_item in funding:
award = funding_item.get("award")
funder = funding_item.get("funder")
legacy_grant = self._grant(award, funder)
if not legacy_grant:
continue
ret.append(legacy_grant)
return ret or missing

def dump_communities(self, obj):
"""Dump communities."""
community_slugs = obj.get("_communities", [])
Expand Down

0 comments on commit d4411dc

Please sign in to comment.