diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 97271a37..2d35bc87 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,14 +32,14 @@ jobs: matrix: package-dir: ["site", "legacy"] python-version: [3.9] - db-service: [postgresql13] + db-service: [postgresql14] search-service: [opensearch2] ignore-failure: [false] include: - package-dir: "migrator" ignore-failure: true python-version: 3.9 - db-service: postgresql13 + db-service: postgresql14 env: DB: ${{ matrix.db-service }} SEARCH: ${{ matrix.search-service }} diff --git a/legacy/zenodo_legacy/funders.py b/legacy/zenodo_legacy/funders.py index 5424711b..a640db2a 100644 --- a/legacy/zenodo_legacy/funders.py +++ b/legacy/zenodo_legacy/funders.py @@ -27,8 +27,9 @@ "10.13039/501100000038": "01h531d29", "10.13039/100000001": "021nxhr62", "10.13039/501100003246": "04jsz6e67", - # NOTE: RCUK was succeeded by UKRI. All awards/grants were transferred, so - # we're also remapping the funder IDs to point to UKRI (001aqnf71) + # NOTE: RCUK (10.13039/100014013) was succeeded by UKRI (10.13039/501100000690). + # All awards/grants were transferred, so we're also remapping the funder IDs to + # point to the UKRI ROR ID (001aqnf71). "10.13039/501100000690": "001aqnf71", "10.13039/100014013": "001aqnf71", "10.13039/501100001602": "0271asj38", @@ -39,4 +40,34 @@ "10.13039/501100006364": "03m8vkq32", } +FUNDER_ACRONYMS = { + "10.13039/501100001665": "ASAP", + "10.13039/501100002341": "AKA", + "10.13039/501100000923": "ARC", + "10.13039/100018231": "ASAP", + "10.13039/501100000024": "CIHR", + "10.13039/501100000780": "EC", + "10.13039/501100000806": "EEA", + "10.13039/501100001871": "FCT", + "10.13039/501100002428": "FWF", + "10.13039/501100004488": "HRZZ", + "10.13039/501100004564": "MESTD", + "10.13039/501100000925": "NHMRC", + "10.13039/100000002": "NIH", + "10.13039/501100000038": "NSERC", + "10.13039/100000001": "NSF", + "10.13039/501100003246": "NWO", + "10.13039/501100000690": "RCUK", + "10.13039/100014013": "UKRI", + "10.13039/501100001602": "SFI", + "10.13039/501100001711": "SNSF", + "10.13039/100001345": "SSHRC", + "10.13039/501100004410": "TUBITAK", + "10.13039/100004440": "WT", + "10.13039/501100006364": "INCa", +} + + FUNDER_ROR_TO_DOI = {v: k for k, v in FUNDER_DOI_TO_ROR.items()} +# NOTE: We want to always resolve to the UKRI award +FUNDER_ROR_TO_DOI["001aqnf71"] = "10.13039/100014013" diff --git a/site/tests/legacy/deposits/conftest.py b/site/tests/legacy/deposits/conftest.py index 6141239f..ed234da4 100644 --- a/site/tests/legacy/deposits/conftest.py +++ b/site/tests/legacy/deposits/conftest.py @@ -10,8 +10,6 @@ import pytest -from zenodo_rdm.legacy.resources import LegacyRecordResourceConfig - @pytest.fixture(scope="function") def test_data(): diff --git a/site/tests/legacy/deposits/test_rest_api_metadata.py b/site/tests/legacy/deposits/test_rest_api_metadata.py index 0e26443f..51f2083b 100644 --- a/site/tests/legacy/deposits/test_rest_api_metadata.py +++ b/site/tests/legacy/deposits/test_rest_api_metadata.py @@ -69,8 +69,6 @@ def test_input_output( ignored_keys = set() # doi is returned as a top level key (and not inside metadata) - # TODO: Verify behaviour and fix - # assert data["doi"] == test_data["metadata"]["doi"] ignored_keys.add("doi") differences = list( diff --git a/site/zenodo_rdm/legacy/serializers/schemas/common.py b/site/zenodo_rdm/legacy/serializers/schemas/common.py index 813af39d..adc898d6 100644 --- a/site/zenodo_rdm/legacy/serializers/schemas/common.py +++ b/site/zenodo_rdm/legacy/serializers/schemas/common.py @@ -10,7 +10,7 @@ from invenio_communities.proxies import current_communities from marshmallow import Schema, fields, missing, post_dump, pre_dump from marshmallow_utils.fields import EDTFDateString, SanitizedHTML, SanitizedUnicode -from zenodo_legacy.funders import FUNDER_ROR_TO_DOI +from zenodo_legacy.funders import FUNDER_ACRONYMS, FUNDER_ROR_TO_DOI from zenodo_legacy.licenses import rdm_to_legacy @@ -270,51 +270,52 @@ def dump_access_right(self, obj): return legacy_access - def _funder(self, funder): - """Serialize RDM funder into Zenodo legacy funder.""" - legacy_funder = {"name": funder["name"]} + def _grant(self, award, funder): + """Serialize an RDM award and funder into a legacy Zenodo grant.""" + funder_id = funder.get("id") + funder_id = FUNDER_ROR_TO_DOI.get(funder_id, funder_id) + award_number = award.get("number") + if not (funder_id and award_number): + return + + grant = { + "code": award_number, + "internal_id": f"{funder_id}::{award_number}", + "funder": {"name": funder["name"]}, + } - for identifier in funder.get("identifiers"): + # Add more funder fields + for identifier in funder.get("identifiers", []): scheme = identifier["scheme"] - if scheme == "doi": - legacy_funder["doi"] = identifier["identifier"] - - value = funder.get("country") - if value: - legacy_funder["country"] = value - - return legacy_funder - - def _award(self, award): - """Serialize an RDM award into a legacy Zenodo grant.""" - funder_ror = award["funder"]["id"] - funder_doi_or_ror = FUNDER_ROR_TO_DOI.get(funder_ror, funder_ror) - legacy_grant = { - "code": award["number"], - "internal_id": f"{funder_doi_or_ror}::{award['id']}", - } - - try: - title = award["title"].get("en", next(iter(award["title"]))) - legacy_grant["title"] = title - except StopIteration: - pass - - value = award.get("acronym") - if value: - legacy_grant["acronym"] = value - - for identifier in award.get("identifiers"): + grant["funder"]["doi"] = identifier["identifier"] + if "doi" not in grant["funder"] and funder_id.startswith("10.13039/"): + grant["funder"]["doi"] = funder_id + country = funder.get("country") + if country: + grant["funder"]["country"] = country + acronym = FUNDER_ACRONYMS.get(funder_id) or funder.get("acronym") + if acronym: + grant["funder"]["acronym"] = acronym + + # Add more award fields + i18n_title = award.get("title") or {} + title = i18n_title.get("en") or next(iter(i18n_title.values()), None) + if title: + grant["title"] = title + + for key in ("acronym", "program"): + value = award.get(key) + if value: + grant[key] = value + + for identifier in award.get("identifiers", []): scheme = identifier["scheme"] - if scheme == "url": - legacy_grant["url"] = identifier["identifier"] - + grant["url"] = identifier["identifier"] if scheme == "doi": - legacy_grant["doi"] = identifier["doi"] - - return legacy_grant + grant["doi"] = identifier["doi"] + return grant @post_dump(pass_original=True) def dump_additional_descriptions(self, result, original, **kwargs): diff --git a/site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py b/site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py index befa12f0..149fe417 100644 --- a/site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py +++ b/site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py @@ -7,12 +7,8 @@ """Zenodo legacy format serializer schemas.""" -from invenio_access.permissions import system_identity -from invenio_pidstore.errors import PIDDeletedError, PIDDoesNotExistError -from invenio_records_resources.proxies import current_service_registry from marshmallow import fields, missing, post_dump, pre_dump from marshmallow_utils.fields import SanitizedUnicode -from zenodo_legacy.funders import FUNDER_ROR_TO_DOI from . import common @@ -24,44 +20,22 @@ class MetadataSchema(common.MetadataSchema): def dump_grants(self, obj): """Dump grants from funding field.""" - funding = obj.get("funding") + funding = obj.get("funding", []) if not funding: return missing ret = [] for funding_item in funding: award = funding_item.get("award") - - # in case there are multiple funding entries, service calls could be - # optimized calling read_many - aid = award.get("id") - if aid: - a_service = current_service_registry.get("awards") - try: - award = a_service.read(system_identity, aid).to_dict() - except (PIDDeletedError, PIDDoesNotExistError): - # funder only funding, or custom awards are not supported in the - # legacy API - return missing - - # we are ignoring funding.funder.id in favour of the awards.funder.id - fid = award["funder"]["id"] - f_service = current_service_registry.get("funders") - # every vocabulary award must be linked to a vocabulary funder - # therefore this read call cannot fail - funder = f_service.read(system_identity, fid).to_dict() - - # No custom funder/awards in legacy therefore it would always resolve - # the read ops above. - legacy_grant = self._award(award) - legacy_grant["funder"] = self._funder(funder) - - award_number = award["number"] - funder_doi = FUNDER_ROR_TO_DOI.get(funder["id"]) - serialized_grant = {"id": f"{funder_doi}::{award_number}"} - ret.append(serialized_grant) - - return ret + funder = funding_item.get("funder") + legacy_grant = self._grant(award, funder) + if not legacy_grant: + continue + grant_id = legacy_grant["internal_id"] + if legacy_grant.get("program") == "FP7": + grant_id = legacy_grant["code"] + ret.append({"id": grant_id}) + return ret or missing license = SanitizedUnicode() diff --git a/site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py b/site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py index 9dd5f099..580f9a45 100644 --- a/site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py +++ b/site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py @@ -7,8 +7,8 @@ """Zenodo serializer schemas.""" -from marshmallow import Schema, fields, missing, post_dump, pre_dump -from marshmallow_utils.fields import EDTFDateString, SanitizedHTML, SanitizedUnicode +from marshmallow import Schema, fields, missing +from marshmallow_utils.fields import SanitizedUnicode from . import common @@ -65,34 +65,6 @@ class ThesisSchema(Schema): supervisors = fields.Nested(common.CreatorSchema, many=True) -class FunderSchema(Schema): - """Schema for a funder.""" - - doi = fields.Str() - name = fields.Str(dump_only=True) - acronyms = fields.List(fields.Str(), dump_only=True) - links = fields.Method("get_funder_url", dump_only=True) - - def get_funder_url(self, obj): - """Get grant url.""" - return dict(self=common.api_link_for("funder", id=obj["doi"])) - - -class GrantSchema(Schema): - """Schema for a grant.""" - - title = fields.Str(dump_only=True) - code = fields.Str() - program = fields.Str(dump_only=True) - acronym = fields.Str(dump_only=True) - funder = fields.Nested(FunderSchema) - links = fields.Method("get_grant_url", dump_only=True) - - def get_grant_url(self, obj): - """Get grant url.""" - return dict(self=common.api_link_for("grant", id=obj["internal_id"])) - - class FilesSchema(Schema): """Files metadata schema.""" @@ -125,10 +97,26 @@ class MetadataSchema(common.MetadataSchema): alternate_identifiers = fields.Method("dump_alternate_identifiers") license = fields.Nested({"id": fields.Function(lambda x: x)}) - grants = fields.Nested(GrantSchema, many=True) + grants = fields.Method("dump_grants") communities = fields.Method("dump_communities") relations = fields.Method("dump_relations") + def dump_grants(self, obj): + """Dump grants from funding field.""" + funding = obj.get("funding", []) + if not funding: + return missing + + ret = [] + for funding_item in funding: + award = funding_item.get("award") + funder = funding_item.get("funder") + legacy_grant = self._grant(award, funder) + if not legacy_grant: + continue + ret.append(legacy_grant) + return ret or missing + def dump_communities(self, obj): """Dump communities.""" community_slugs = obj.get("_communities", [])