From dba4d512da0554184be739d95ce923e3844de144 Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Wed, 10 Mar 2021 17:16:57 -0500 Subject: [PATCH] fix subject deduping bug --- share/disambiguation/strategies/graph.py | 1 + share/schema/schema-spec.yaml | 1 + share/util/graph.py | 4 ++-- .../share/regulate/steps/test_deduplicate.py | 22 ++++++++++++++++++- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/share/disambiguation/strategies/graph.py b/share/disambiguation/strategies/graph.py index b4ee2e745..8485a99ad 100644 --- a/share/disambiguation/strategies/graph.py +++ b/share/disambiguation/strategies/graph.py @@ -60,6 +60,7 @@ def match_subjects(self, nodes): matches = [ n for n in graph_nodes if n != node + and n['parent'] == node['parent'] and n['central_synonym'] == node['central_synonym'] and (equal_not_none(n['uri'], node['uri']) or equal_not_none(n['name'], node['name'])) ] diff --git a/share/schema/schema-spec.yaml b/share/schema/schema-spec.yaml index 508c21316..04b29380a 100644 --- a/share/schema/schema-spec.yaml +++ b/share/schema/schema-spec.yaml @@ -261,6 +261,7 @@ - name: central_synonym relation_shape: many_to_one related_concrete_type: Subject + inverse_relation: custom_synonyms - concrete_type: ThroughSubjects attributes: diff --git a/share/util/graph.py b/share/util/graph.py index 5d04d59f5..314f1e8af 100644 --- a/share/util/graph.py +++ b/share/util/graph.py @@ -560,13 +560,13 @@ def delete(self, cascade=True): self.graph.remove_node(self.id, cascade) self.__graph = None - def to_jsonld(self, ref=False, in_edges=True): + def to_jsonld(self, ref=False, in_edges=False): ld_node = { '@id': self.id, '@type': self.type, } if not ref: - ld_node.update(self.relations(in_edges=False, jsonld=True)) + ld_node.update(self.relations(in_edges=in_edges, jsonld=True)) ld_node.update(self.attrs()) return ld_node diff --git a/tests/share/regulate/steps/test_deduplicate.py b/tests/share/regulate/steps/test_deduplicate.py index c96ea242c..c7084f55d 100644 --- a/tests/share/regulate/steps/test_deduplicate.py +++ b/tests/share/regulate/steps/test_deduplicate.py @@ -32,7 +32,27 @@ def test_no_change(self, Graph, input): WorkIdentifier(4), WorkIdentifier(uri='http://osf.io/guidguid'), ]), - ]) + ]), + ([ + Registration(0, subjects=[ + Subject( + 0, + name='custom-child', + central_synonym=Subject(1, name='central-child', parent=Subject(3, name='central-parent')), + parent=Subject(2, name='custom-parent', central_synonym=Subject(3, name='central-parent')), + ) + for _ in range(3) + ]), + ], [ + Registration(0, subjects=[ + Subject( + 0, + name='custom-child', + central_synonym=Subject(1, name='central-child', parent=Subject(3, id='central-parent', name='central-parent')), + parent=Subject(2, name='custom-parent', central_synonym=Subject(id='central-parent')), + ) + ]), + ]), ]) def test_deduplicate(self, Graph, input, output): graph = Graph(input)