From 449dc5bd5ad7b16eaee49b3b98aeca40684f965c Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Thu, 5 Dec 2024 12:35:19 +0100 Subject: [PATCH] names: fix affiliations deduplication --- invenio_vocabularies/contrib/names/datastreams.py | 10 ++++------ tests/contrib/names/test_names_datastreams.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/invenio_vocabularies/contrib/names/datastreams.py b/invenio_vocabularies/contrib/names/datastreams.py index 007c5cda..2c7c9288 100644 --- a/invenio_vocabularies/contrib/names/datastreams.py +++ b/invenio_vocabularies/contrib/names/datastreams.py @@ -260,15 +260,12 @@ def _extract_affiliations(self, record): employment.get("employment-summary", {}) for employment in employments ] - history = set() for employment in employments: terminated = employment.get("end-date") - org = employment["organization"] - - if terminated or org["name"] in history: + if terminated: continue - history.add(org["name"]) + org = employment["organization"] aff = {"name": org["name"]} # Extract the org ID, to link to the affiliation vocabulary @@ -276,7 +273,8 @@ def _extract_affiliations(self, record): if aff_id: aff["id"] = aff_id - result.append(aff) + if aff not in result: + result.append(aff) except Exception: pass return result diff --git a/tests/contrib/names/test_names_datastreams.py b/tests/contrib/names/test_names_datastreams.py index 3abd82ee..c9920947 100644 --- a/tests/contrib/names/test_names_datastreams.py +++ b/tests/contrib/names/test_names_datastreams.py @@ -240,6 +240,12 @@ def orcid_data(): "employments": {"affiliation-group": employments[0]} }, }, + "duplicate_affiliations": { + **base, + "activities-summary": { + "employments": {"affiliation-group": employments + employments}, + }, + }, }, } @@ -267,6 +273,14 @@ def expected_from_xml(): **base, "affiliations": [{"id": "01ggx4157", "name": "CERN"}], }, + "duplicate_affiliations": { + **base, + "affiliations": [ + # Affiliations are deduplicated + {"id": "01ggx4157", "name": "CERN"}, + {"name": "ACME Inc."}, + ], + }, }