From 9d3586d3636a2e8b72492b72e7359b95782061e5 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 16 Sep 2023 18:23:29 +0200 Subject: [PATCH] legacy: fix grants serialization --- legacy/zenodo_legacy/funders.py | 35 ++++++++- site/tests/legacy/deposits/conftest.py | 2 - .../legacy/serializers/schemas/common.py | 75 ++++++++++--------- .../legacy/serializers/schemas/legacyjson.py | 38 +--------- 4 files changed, 74 insertions(+), 76 deletions(-) diff --git a/legacy/zenodo_legacy/funders.py b/legacy/zenodo_legacy/funders.py index 5424711b1..a640db2aa 100644 --- a/legacy/zenodo_legacy/funders.py +++ b/legacy/zenodo_legacy/funders.py @@ -27,8 +27,9 @@ "10.13039/501100000038": "01h531d29", "10.13039/100000001": "021nxhr62", "10.13039/501100003246": "04jsz6e67", - # NOTE: RCUK was succeeded by UKRI. All awards/grants were transferred, so - # we're also remapping the funder IDs to point to UKRI (001aqnf71) + # NOTE: RCUK (10.13039/100014013) was succeeded by UKRI (10.13039/501100000690). + # All awards/grants were transferred, so we're also remapping the funder IDs to + # point to the UKRI ROR ID (001aqnf71). "10.13039/501100000690": "001aqnf71", "10.13039/100014013": "001aqnf71", "10.13039/501100001602": "0271asj38", @@ -39,4 +40,34 @@ "10.13039/501100006364": "03m8vkq32", } +FUNDER_ACRONYMS = { + "10.13039/501100001665": "ASAP", + "10.13039/501100002341": "AKA", + "10.13039/501100000923": "ARC", + "10.13039/100018231": "ASAP", + "10.13039/501100000024": "CIHR", + "10.13039/501100000780": "EC", + "10.13039/501100000806": "EEA", + "10.13039/501100001871": "FCT", + "10.13039/501100002428": "FWF", + "10.13039/501100004488": "HRZZ", + "10.13039/501100004564": "MESTD", + "10.13039/501100000925": "NHMRC", + "10.13039/100000002": "NIH", + "10.13039/501100000038": "NSERC", + "10.13039/100000001": "NSF", + "10.13039/501100003246": "NWO", + "10.13039/501100000690": "RCUK", + "10.13039/100014013": "UKRI", + "10.13039/501100001602": "SFI", + "10.13039/501100001711": "SNSF", + "10.13039/100001345": "SSHRC", + "10.13039/501100004410": "TUBITAK", + "10.13039/100004440": "WT", + "10.13039/501100006364": "INCa", +} + + FUNDER_ROR_TO_DOI = {v: k for k, v in FUNDER_DOI_TO_ROR.items()} +# NOTE: We want to always resolve to the UKRI award +FUNDER_ROR_TO_DOI["001aqnf71"] = "10.13039/100014013" diff --git a/site/tests/legacy/deposits/conftest.py b/site/tests/legacy/deposits/conftest.py index 6141239f9..ed234da4e 100644 --- a/site/tests/legacy/deposits/conftest.py +++ b/site/tests/legacy/deposits/conftest.py @@ -10,8 +10,6 @@ import pytest -from zenodo_rdm.legacy.resources import LegacyRecordResourceConfig - @pytest.fixture(scope="function") def test_data(): diff --git a/site/zenodo_rdm/legacy/serializers/schemas/common.py b/site/zenodo_rdm/legacy/serializers/schemas/common.py index 813af39d6..9530bd187 100644 --- a/site/zenodo_rdm/legacy/serializers/schemas/common.py +++ b/site/zenodo_rdm/legacy/serializers/schemas/common.py @@ -10,7 +10,7 @@ from invenio_communities.proxies import current_communities from marshmallow import Schema, fields, missing, post_dump, pre_dump from marshmallow_utils.fields import EDTFDateString, SanitizedHTML, SanitizedUnicode -from zenodo_legacy.funders import FUNDER_ROR_TO_DOI +from zenodo_legacy.funders import FUNDER_ROR_TO_DOI, FUNDER_ACRONYMS from zenodo_legacy.licenses import rdm_to_legacy @@ -270,51 +270,52 @@ def dump_access_right(self, obj): return legacy_access - def _funder(self, funder): - """Serialize RDM funder into Zenodo legacy funder.""" - legacy_funder = {"name": funder["name"]} + def _grant(self, award, funder): + """Serialize an RDM award and funder into a legacy Zenodo grant.""" + funder_id = funder.get("id") + funder_id = FUNDER_ROR_TO_DOI.get(funder_id, funder_id) + award_number = award.get("number") + if not (funder_id and award_number): + return + + grant = { + "code": award_number, + "internal_id": f"{funder_id}::{award_number}", + "funder": {"name": funder["name"]}, + } + # Add more funder fields for identifier in funder.get("identifiers"): scheme = identifier["scheme"] - if scheme == "doi": - legacy_funder["doi"] = identifier["identifier"] - - value = funder.get("country") - if value: - legacy_funder["country"] = value - - return legacy_funder - - def _award(self, award): - """Serialize an RDM award into a legacy Zenodo grant.""" - funder_ror = award["funder"]["id"] - funder_doi_or_ror = FUNDER_ROR_TO_DOI.get(funder_ror, funder_ror) - legacy_grant = { - "code": award["number"], - "internal_id": f"{funder_doi_or_ror}::{award['id']}", - } - - try: - title = award["title"].get("en", next(iter(award["title"]))) - legacy_grant["title"] = title - except StopIteration: - pass - - value = award.get("acronym") - if value: - legacy_grant["acronym"] = value + grant["funder"]["doi"] = identifier["identifier"] + if "doi" not in grant["funder"] and funder_id.startswith("10.13039/"): + grant["funder"]["doi"] = funder_id + country = funder.get("country") + if country: + grant["funder"]["country"] = country + acronym = FUNDER_ACRONYMS.get(funder_id) or funder.get("acronym") + if acronym: + grant["funder"]["acronym"] = acronym + + # Add more award fields + i18n_title = award.get("title") or {} + title = i18n_title.get("en") or next(iter(i18n_title.values()), None) + if title: + grant["title"] = title + + for key in ("acronym", "program"): + value = award.get(key) + if value: + grant[key] = value for identifier in award.get("identifiers"): scheme = identifier["scheme"] - if scheme == "url": - legacy_grant["url"] = identifier["identifier"] - + grant["url"] = identifier["identifier"] if scheme == "doi": - legacy_grant["doi"] = identifier["doi"] - - return legacy_grant + grant["doi"] = identifier["doi"] + return grant @post_dump(pass_original=True) def dump_additional_descriptions(self, result, original, **kwargs): diff --git a/site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py b/site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py index befa12f0c..65b261820 100644 --- a/site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py +++ b/site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py @@ -7,12 +7,8 @@ """Zenodo legacy format serializer schemas.""" -from invenio_access.permissions import system_identity -from invenio_pidstore.errors import PIDDeletedError, PIDDoesNotExistError -from invenio_records_resources.proxies import current_service_registry from marshmallow import fields, missing, post_dump, pre_dump from marshmallow_utils.fields import SanitizedUnicode -from zenodo_legacy.funders import FUNDER_ROR_TO_DOI from . import common @@ -24,43 +20,15 @@ class MetadataSchema(common.MetadataSchema): def dump_grants(self, obj): """Dump grants from funding field.""" - funding = obj.get("funding") + funding = obj.get("funding", []) if not funding: return missing ret = [] for funding_item in funding: award = funding_item.get("award") - - # in case there are multiple funding entries, service calls could be - # optimized calling read_many - aid = award.get("id") - if aid: - a_service = current_service_registry.get("awards") - try: - award = a_service.read(system_identity, aid).to_dict() - except (PIDDeletedError, PIDDoesNotExistError): - # funder only funding, or custom awards are not supported in the - # legacy API - return missing - - # we are ignoring funding.funder.id in favour of the awards.funder.id - fid = award["funder"]["id"] - f_service = current_service_registry.get("funders") - # every vocabulary award must be linked to a vocabulary funder - # therefore this read call cannot fail - funder = f_service.read(system_identity, fid).to_dict() - - # No custom funder/awards in legacy therefore it would always resolve - # the read ops above. - legacy_grant = self._award(award) - legacy_grant["funder"] = self._funder(funder) - - award_number = award["number"] - funder_doi = FUNDER_ROR_TO_DOI.get(funder["id"]) - serialized_grant = {"id": f"{funder_doi}::{award_number}"} - ret.append(serialized_grant) - + funder = funding_item.get("funder") + ret.append(self._grant(award, funder)) return ret license = SanitizedUnicode()