Skip to content

Commit

Permalink
legacy: fix grants serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
slint committed Sep 16, 2023
1 parent 2c5151c commit 9d3586d
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 76 deletions.
35 changes: 33 additions & 2 deletions legacy/zenodo_legacy/funders.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
"10.13039/501100000038": "01h531d29",
"10.13039/100000001": "021nxhr62",
"10.13039/501100003246": "04jsz6e67",
# NOTE: RCUK was succeeded by UKRI. All awards/grants were transferred, so
# we're also remapping the funder IDs to point to UKRI (001aqnf71)
# NOTE: RCUK (10.13039/100014013) was succeeded by UKRI (10.13039/501100000690).
# All awards/grants were transferred, so we're also remapping the funder IDs to
# point to the UKRI ROR ID (001aqnf71).
"10.13039/501100000690": "001aqnf71",
"10.13039/100014013": "001aqnf71",
"10.13039/501100001602": "0271asj38",
Expand All @@ -39,4 +40,34 @@
"10.13039/501100006364": "03m8vkq32",
}

FUNDER_ACRONYMS = {
"10.13039/501100001665": "ASAP",
"10.13039/501100002341": "AKA",
"10.13039/501100000923": "ARC",
"10.13039/100018231": "ASAP",
"10.13039/501100000024": "CIHR",
"10.13039/501100000780": "EC",
"10.13039/501100000806": "EEA",
"10.13039/501100001871": "FCT",
"10.13039/501100002428": "FWF",
"10.13039/501100004488": "HRZZ",
"10.13039/501100004564": "MESTD",
"10.13039/501100000925": "NHMRC",
"10.13039/100000002": "NIH",
"10.13039/501100000038": "NSERC",
"10.13039/100000001": "NSF",
"10.13039/501100003246": "NWO",
"10.13039/501100000690": "RCUK",
"10.13039/100014013": "UKRI",
"10.13039/501100001602": "SFI",
"10.13039/501100001711": "SNSF",
"10.13039/100001345": "SSHRC",
"10.13039/501100004410": "TUBITAK",
"10.13039/100004440": "WT",
"10.13039/501100006364": "INCa",
}


FUNDER_ROR_TO_DOI = {v: k for k, v in FUNDER_DOI_TO_ROR.items()}
# NOTE: We want to always resolve to the UKRI award
FUNDER_ROR_TO_DOI["001aqnf71"] = "10.13039/100014013"
2 changes: 0 additions & 2 deletions site/tests/legacy/deposits/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import pytest

from zenodo_rdm.legacy.resources import LegacyRecordResourceConfig


@pytest.fixture(scope="function")
def test_data():
Expand Down
75 changes: 38 additions & 37 deletions site/zenodo_rdm/legacy/serializers/schemas/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from invenio_communities.proxies import current_communities
from marshmallow import Schema, fields, missing, post_dump, pre_dump
from marshmallow_utils.fields import EDTFDateString, SanitizedHTML, SanitizedUnicode
from zenodo_legacy.funders import FUNDER_ROR_TO_DOI
from zenodo_legacy.funders import FUNDER_ROR_TO_DOI, FUNDER_ACRONYMS
from zenodo_legacy.licenses import rdm_to_legacy


Expand Down Expand Up @@ -270,51 +270,52 @@ def dump_access_right(self, obj):

return legacy_access

def _funder(self, funder):
"""Serialize RDM funder into Zenodo legacy funder."""
legacy_funder = {"name": funder["name"]}
def _grant(self, award, funder):
"""Serialize an RDM award and funder into a legacy Zenodo grant."""
funder_id = funder.get("id")
funder_id = FUNDER_ROR_TO_DOI.get(funder_id, funder_id)
award_number = award.get("number")
if not (funder_id and award_number):
return

grant = {
"code": award_number,
"internal_id": f"{funder_id}::{award_number}",
"funder": {"name": funder["name"]},
}

# Add more funder fields
for identifier in funder.get("identifiers"):
scheme = identifier["scheme"]

if scheme == "doi":
legacy_funder["doi"] = identifier["identifier"]

value = funder.get("country")
if value:
legacy_funder["country"] = value

return legacy_funder

def _award(self, award):
"""Serialize an RDM award into a legacy Zenodo grant."""
funder_ror = award["funder"]["id"]
funder_doi_or_ror = FUNDER_ROR_TO_DOI.get(funder_ror, funder_ror)
legacy_grant = {
"code": award["number"],
"internal_id": f"{funder_doi_or_ror}::{award['id']}",
}

try:
title = award["title"].get("en", next(iter(award["title"])))
legacy_grant["title"] = title
except StopIteration:
pass

value = award.get("acronym")
if value:
legacy_grant["acronym"] = value
grant["funder"]["doi"] = identifier["identifier"]
if "doi" not in grant["funder"] and funder_id.startswith("10.13039/"):
grant["funder"]["doi"] = funder_id
country = funder.get("country")
if country:
grant["funder"]["country"] = country
acronym = FUNDER_ACRONYMS.get(funder_id) or funder.get("acronym")
if acronym:
grant["funder"]["acronym"] = acronym

# Add more award fields
i18n_title = award.get("title") or {}
title = i18n_title.get("en") or next(iter(i18n_title.values()), None)
if title:
grant["title"] = title

for key in ("acronym", "program"):
value = award.get(key)
if value:
grant[key] = value

for identifier in award.get("identifiers"):
scheme = identifier["scheme"]

if scheme == "url":
legacy_grant["url"] = identifier["identifier"]

grant["url"] = identifier["identifier"]
if scheme == "doi":
legacy_grant["doi"] = identifier["doi"]

return legacy_grant
grant["doi"] = identifier["doi"]
return grant

@post_dump(pass_original=True)
def dump_additional_descriptions(self, result, original, **kwargs):
Expand Down
38 changes: 3 additions & 35 deletions site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@

"""Zenodo legacy format serializer schemas."""

from invenio_access.permissions import system_identity
from invenio_pidstore.errors import PIDDeletedError, PIDDoesNotExistError
from invenio_records_resources.proxies import current_service_registry
from marshmallow import fields, missing, post_dump, pre_dump
from marshmallow_utils.fields import SanitizedUnicode
from zenodo_legacy.funders import FUNDER_ROR_TO_DOI

from . import common

Expand All @@ -24,43 +20,15 @@ class MetadataSchema(common.MetadataSchema):

def dump_grants(self, obj):
"""Dump grants from funding field."""
funding = obj.get("funding")
funding = obj.get("funding", [])
if not funding:
return missing

ret = []
for funding_item in funding:
award = funding_item.get("award")

# in case there are multiple funding entries, service calls could be
# optimized calling read_many
aid = award.get("id")
if aid:
a_service = current_service_registry.get("awards")
try:
award = a_service.read(system_identity, aid).to_dict()
except (PIDDeletedError, PIDDoesNotExistError):
# funder only funding, or custom awards are not supported in the
# legacy API
return missing

# we are ignoring funding.funder.id in favour of the awards.funder.id
fid = award["funder"]["id"]
f_service = current_service_registry.get("funders")
# every vocabulary award must be linked to a vocabulary funder
# therefore this read call cannot fail
funder = f_service.read(system_identity, fid).to_dict()

# No custom funder/awards in legacy therefore it would always resolve
# the read ops above.
legacy_grant = self._award(award)
legacy_grant["funder"] = self._funder(funder)

award_number = award["number"]
funder_doi = FUNDER_ROR_TO_DOI.get(funder["id"])
serialized_grant = {"id": f"{funder_doi}::{award_number}"}
ret.append(serialized_grant)

funder = funding_item.get("funder")
ret.append(self._grant(award, funder))
return ret

license = SanitizedUnicode()
Expand Down

0 comments on commit 9d3586d

Please sign in to comment.