Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

legacy: fix grants serialization #510

Merged
merged 2 commits into from
Sep 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ jobs:
matrix:
package-dir: ["site", "legacy"]
python-version: [3.9]
db-service: [postgresql13]
db-service: [postgresql14]
search-service: [opensearch2]
ignore-failure: [false]
include:
- package-dir: "migrator"
ignore-failure: true
python-version: 3.9
db-service: postgresql13
db-service: postgresql14
env:
DB: ${{ matrix.db-service }}
SEARCH: ${{ matrix.search-service }}
Expand Down
35 changes: 33 additions & 2 deletions legacy/zenodo_legacy/funders.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
"10.13039/501100000038": "01h531d29",
"10.13039/100000001": "021nxhr62",
"10.13039/501100003246": "04jsz6e67",
# NOTE: RCUK was succeeded by UKRI. All awards/grants were transferred, so
# we're also remapping the funder IDs to point to UKRI (001aqnf71)
# NOTE: RCUK (10.13039/100014013) was succeeded by UKRI (10.13039/501100000690).
# All awards/grants were transferred, so we're also remapping the funder IDs to
# point to the UKRI ROR ID (001aqnf71).
"10.13039/501100000690": "001aqnf71",
"10.13039/100014013": "001aqnf71",
"10.13039/501100001602": "0271asj38",
Expand All @@ -39,4 +40,34 @@
"10.13039/501100006364": "03m8vkq32",
}

FUNDER_ACRONYMS = {
"10.13039/501100001665": "ASAP",
"10.13039/501100002341": "AKA",
"10.13039/501100000923": "ARC",
"10.13039/100018231": "ASAP",
"10.13039/501100000024": "CIHR",
"10.13039/501100000780": "EC",
"10.13039/501100000806": "EEA",
"10.13039/501100001871": "FCT",
"10.13039/501100002428": "FWF",
"10.13039/501100004488": "HRZZ",
"10.13039/501100004564": "MESTD",
"10.13039/501100000925": "NHMRC",
"10.13039/100000002": "NIH",
"10.13039/501100000038": "NSERC",
"10.13039/100000001": "NSF",
"10.13039/501100003246": "NWO",
"10.13039/501100000690": "RCUK",
"10.13039/100014013": "UKRI",
"10.13039/501100001602": "SFI",
"10.13039/501100001711": "SNSF",
"10.13039/100001345": "SSHRC",
"10.13039/501100004410": "TUBITAK",
"10.13039/100004440": "WT",
"10.13039/501100006364": "INCa",
}


FUNDER_ROR_TO_DOI = {v: k for k, v in FUNDER_DOI_TO_ROR.items()}
# NOTE: We want to always resolve to the UKRI award
FUNDER_ROR_TO_DOI["001aqnf71"] = "10.13039/100014013"
2 changes: 0 additions & 2 deletions site/tests/legacy/deposits/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import pytest

from zenodo_rdm.legacy.resources import LegacyRecordResourceConfig


@pytest.fixture(scope="function")
def test_data():
Expand Down
2 changes: 0 additions & 2 deletions site/tests/legacy/deposits/test_rest_api_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ def test_input_output(
ignored_keys = set()

# doi is returned as a top level key (and not inside metadata)
# TODO: Verify behaviour and fix
# assert data["doi"] == test_data["metadata"]["doi"]
ignored_keys.add("doi")

differences = list(
Expand Down
81 changes: 41 additions & 40 deletions site/zenodo_rdm/legacy/serializers/schemas/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from invenio_communities.proxies import current_communities
from marshmallow import Schema, fields, missing, post_dump, pre_dump
from marshmallow_utils.fields import EDTFDateString, SanitizedHTML, SanitizedUnicode
from zenodo_legacy.funders import FUNDER_ROR_TO_DOI
from zenodo_legacy.funders import FUNDER_ACRONYMS, FUNDER_ROR_TO_DOI
from zenodo_legacy.licenses import rdm_to_legacy


Expand Down Expand Up @@ -270,51 +270,52 @@ def dump_access_right(self, obj):

return legacy_access

def _funder(self, funder):
"""Serialize RDM funder into Zenodo legacy funder."""
legacy_funder = {"name": funder["name"]}
def _grant(self, award, funder):
"""Serialize an RDM award and funder into a legacy Zenodo grant."""
funder_id = funder.get("id")
funder_id = FUNDER_ROR_TO_DOI.get(funder_id, funder_id)
award_number = award.get("number")
if not (funder_id and award_number):
return

grant = {
"code": award_number,
"internal_id": f"{funder_id}::{award_number}",
"funder": {"name": funder["name"]},
}

for identifier in funder.get("identifiers"):
# Add more funder fields
for identifier in funder.get("identifiers", []):
scheme = identifier["scheme"]

if scheme == "doi":
legacy_funder["doi"] = identifier["identifier"]

value = funder.get("country")
if value:
legacy_funder["country"] = value

return legacy_funder

def _award(self, award):
"""Serialize an RDM award into a legacy Zenodo grant."""
funder_ror = award["funder"]["id"]
funder_doi_or_ror = FUNDER_ROR_TO_DOI.get(funder_ror, funder_ror)
legacy_grant = {
"code": award["number"],
"internal_id": f"{funder_doi_or_ror}::{award['id']}",
}

try:
title = award["title"].get("en", next(iter(award["title"])))
legacy_grant["title"] = title
except StopIteration:
pass

value = award.get("acronym")
if value:
legacy_grant["acronym"] = value

for identifier in award.get("identifiers"):
grant["funder"]["doi"] = identifier["identifier"]
if "doi" not in grant["funder"] and funder_id.startswith("10.13039/"):
grant["funder"]["doi"] = funder_id
country = funder.get("country")
if country:
grant["funder"]["country"] = country
acronym = FUNDER_ACRONYMS.get(funder_id) or funder.get("acronym")
if acronym:
grant["funder"]["acronym"] = acronym

# Add more award fields
i18n_title = award.get("title") or {}
title = i18n_title.get("en") or next(iter(i18n_title.values()), None)
if title:
grant["title"] = title

for key in ("acronym", "program"):
value = award.get(key)
if value:
grant[key] = value

for identifier in award.get("identifiers", []):
scheme = identifier["scheme"]

if scheme == "url":
legacy_grant["url"] = identifier["identifier"]

grant["url"] = identifier["identifier"]
if scheme == "doi":
legacy_grant["doi"] = identifier["doi"]

return legacy_grant
grant["doi"] = identifier["doi"]
return grant

@post_dump(pass_original=True)
def dump_additional_descriptions(self, result, original, **kwargs):
Expand Down
46 changes: 10 additions & 36 deletions site/zenodo_rdm/legacy/serializers/schemas/legacyjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@

"""Zenodo legacy format serializer schemas."""

from invenio_access.permissions import system_identity
from invenio_pidstore.errors import PIDDeletedError, PIDDoesNotExistError
from invenio_records_resources.proxies import current_service_registry
from marshmallow import fields, missing, post_dump, pre_dump
from marshmallow_utils.fields import SanitizedUnicode
from zenodo_legacy.funders import FUNDER_ROR_TO_DOI

from . import common

Expand All @@ -24,44 +20,22 @@ class MetadataSchema(common.MetadataSchema):

def dump_grants(self, obj):
"""Dump grants from funding field."""
funding = obj.get("funding")
funding = obj.get("funding", [])
if not funding:
return missing

ret = []
for funding_item in funding:
award = funding_item.get("award")

# in case there are multiple funding entries, service calls could be
# optimized calling read_many
aid = award.get("id")
if aid:
a_service = current_service_registry.get("awards")
try:
award = a_service.read(system_identity, aid).to_dict()
except (PIDDeletedError, PIDDoesNotExistError):
# funder only funding, or custom awards are not supported in the
# legacy API
return missing

# we are ignoring funding.funder.id in favour of the awards.funder.id
fid = award["funder"]["id"]
f_service = current_service_registry.get("funders")
# every vocabulary award must be linked to a vocabulary funder
# therefore this read call cannot fail
funder = f_service.read(system_identity, fid).to_dict()

# No custom funder/awards in legacy therefore it would always resolve
# the read ops above.
legacy_grant = self._award(award)
legacy_grant["funder"] = self._funder(funder)

award_number = award["number"]
funder_doi = FUNDER_ROR_TO_DOI.get(funder["id"])
serialized_grant = {"id": f"{funder_doi}::{award_number}"}
ret.append(serialized_grant)

return ret
funder = funding_item.get("funder")
legacy_grant = self._grant(award, funder)
if not legacy_grant:
continue
grant_id = legacy_grant["internal_id"]
if legacy_grant.get("program") == "FP7":
grant_id = legacy_grant["code"]
ret.append({"id": grant_id})
return ret or missing

license = SanitizedUnicode()

Expand Down
50 changes: 19 additions & 31 deletions site/zenodo_rdm/legacy/serializers/schemas/zenodojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

"""Zenodo serializer schemas."""

from marshmallow import Schema, fields, missing, post_dump, pre_dump
from marshmallow_utils.fields import EDTFDateString, SanitizedHTML, SanitizedUnicode
from marshmallow import Schema, fields, missing
from marshmallow_utils.fields import SanitizedUnicode

from . import common

Expand Down Expand Up @@ -65,34 +65,6 @@ class ThesisSchema(Schema):
supervisors = fields.Nested(common.CreatorSchema, many=True)


class FunderSchema(Schema):
"""Schema for a funder."""

doi = fields.Str()
name = fields.Str(dump_only=True)
acronyms = fields.List(fields.Str(), dump_only=True)
links = fields.Method("get_funder_url", dump_only=True)

def get_funder_url(self, obj):
"""Get grant url."""
return dict(self=common.api_link_for("funder", id=obj["doi"]))


class GrantSchema(Schema):
"""Schema for a grant."""

title = fields.Str(dump_only=True)
code = fields.Str()
program = fields.Str(dump_only=True)
acronym = fields.Str(dump_only=True)
funder = fields.Nested(FunderSchema)
links = fields.Method("get_grant_url", dump_only=True)

def get_grant_url(self, obj):
"""Get grant url."""
return dict(self=common.api_link_for("grant", id=obj["internal_id"]))


class FilesSchema(Schema):
"""Files metadata schema."""

Expand Down Expand Up @@ -125,10 +97,26 @@ class MetadataSchema(common.MetadataSchema):
alternate_identifiers = fields.Method("dump_alternate_identifiers")

license = fields.Nested({"id": fields.Function(lambda x: x)})
grants = fields.Nested(GrantSchema, many=True)
grants = fields.Method("dump_grants")
communities = fields.Method("dump_communities")
relations = fields.Method("dump_relations")

def dump_grants(self, obj):
"""Dump grants from funding field."""
funding = obj.get("funding", [])
if not funding:
return missing

ret = []
for funding_item in funding:
award = funding_item.get("award")
funder = funding_item.get("funder")
legacy_grant = self._grant(award, funder)
if not legacy_grant:
continue
ret.append(legacy_grant)
return ret or missing

def dump_communities(self, obj):
"""Dump communities."""
community_slugs = obj.get("_communities", [])
Expand Down
Loading