Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

places, concepts: GND closeMatch corrections #174

Merged
merged 1 commit into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions rero_mef/alembic/d8536341fc5e_delete_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def upgrade():
ids.append(id_)
rec = agent_cls.get_record(id_)
rec.pop("identifier", None)
rec.update(data=rec, bcommit=False, reindex=True)
rec.update(data=rec, dbcommit=False, reindex=True)
if idx % 1000 == 0:
print(f" {idx} commit", end=" | ", flush=True)
db.session.commit()
Expand Down Expand Up @@ -102,7 +102,7 @@ def downgrade():
ids.append(id_)
rec = agent_cls.get_record(id_)
rec["identifier"] = f'"{url}{rec.pid}"'
rec.update(data=rec, bcommit=False, reindex=True)
rec.update(data=rec, dbcommit=False, reindex=True)
if idx % 1000 == 0:
print(f" {idx} commit", end=" | ", flush=True)
db.session.commit()
Expand Down
33 changes: 20 additions & 13 deletions rero_mef/marctojson/do_gnd_concepts.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,8 @@ def trans_gnd_relation(self):
if value:
self.json_dict[relation] = value

def trans_gnd_classification(self):
"""Transformation classification from field 686."""
if self.logger and self.verbose:
self.logger.info("Call Function", "trans_gnd_classification")
# TODO: find classification

def trans_gnd_match(self):
"""Transformation closeMatch and exactfrom field 750."""
"""Transformation closeMatch and exactMatch from field 750."""
if self.logger and self.verbose:
self.logger.info("Call Function", "trans_gnd_match")
for field_750 in self.marc.get_fields("750"):
Expand Down Expand Up @@ -253,29 +247,42 @@ def trans_gnd_match(self):
if authorized_ap := build_string_from_field(
field=field_750, subfields=subfields, tag_grouping=tag_grouping
):
match = {
match_data = {
"authorized_access_point": authorized_ap,
"source": "GND",
}
identified_by = []
other_source = None
for subfield_0 in field_750.get_subfields("0"):
if subfield_0.startswith("http"):
match.setdefault("identifiedBy", []).append(
identified_by.insert(
0,
{
"type": "uri",
"value": subfield_0,
}
},
)
if other_source:
rerowep marked this conversation as resolved.
Show resolved Hide resolved
identified_by[0]["source"] = other_source
else:
source, id_ = get_source_and_id(subfield_0)
if source:
match.setdefault("identifiedBy", []).append(
insert_pos = -1
if source != "GND":
other_source = source
match_data["source"] = other_source
insert_pos = 0
identified_by.insert(
insert_pos,
{
"source": source,
"type": "bf:Nbn",
"value": id_,
}
},
)
self.json_dict.setdefault(match_type, []).append(match)
if identified_by:
match_data["identifiedBy"] = identified_by
self.json_dict.setdefault(match_type, []).append(match_data)

def trans_gnd_note(self):
"""Transformation notes from field.
Expand Down
25 changes: 19 additions & 6 deletions rero_mef/marctojson/do_gnd_places.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,29 +270,42 @@ def trans_gnd_match(self):
if authorized_ap := build_string_from_field(
field=field_751, subfields=subfields, tag_grouping=tag_grouping
):
match = {
match_data = {
"authorized_access_point": authorized_ap,
"source": "GND",
}
identified_by = []
other_source = None
for subfield_0 in field_751.get_subfields("0"):
if subfield_0.startswith("http"):
match.setdefault("identifiedBy", []).append(
identified_by.insert(
0,
{
"type": "uri",
"value": subfield_0,
}
},
)
if other_source:
rerowep marked this conversation as resolved.
Show resolved Hide resolved
identified_by[0]["source"] = other_source
else:
source, id_ = get_source_and_id(subfield_0)
if source:
match.setdefault("identifiedBy", []).append(
insert_pos = -1
if source != "GND":
other_source = source
match_data["source"] = other_source
insert_pos = 0
identified_by.insert(
insert_pos,
{
"source": source,
"type": "bf:Nbn",
"value": id_,
}
},
)
self.json_dict.setdefault(match_type, []).append(match)
if identified_by:
match_data["identifiedBy"] = identified_by
self.json_dict.setdefault(match_type, []).append(match_data)

def trans_gnd_note(self):
"""Transformation notes from field.
Expand Down
61 changes: 46 additions & 15 deletions tests/unit/concepts/examples/xml_minimal_record.xml
Original file line number Diff line number Diff line change
@@ -1,24 +1,55 @@

<record>
<leader>00589nx a2200193 45 </leader>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">027630501</subfield>
<subfield code="9">sudoc</subfield>
<datafield tag="670" ind1=" " ind2=" ">
<subfield code="a">
Grand Larousse universel (art. : Livre)
</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">frBN001940328</subfield>
<datafield tag="675" ind1=" " ind2=" ">
<subfield code="a">
Laval RVM (en ligne), 2004-11-23
</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">frBN000000089</subfield>
<datafield tag="680" ind1=" " ind2=" ">
<subfield code="a">
Mers profondément engagées dans la masse des continents
</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">FRBNF118620892</subfield>
<subfield code="z">FRBNF11862089</subfield>
<datafield tag="667" ind1=" " ind2=" ">
<subfield code="a">Note interne</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">http://viaf.org/viaf/124265140</subfield>
<subfield code="2">VIAF</subfield>
<subfield code="C">VIAF</subfield>
<subfield code="d">20200302</subfield>
<datafield tag="260" ind1=" " ind2="9">
<subfield code="a">
Voir le descripteur Opposition (science politique)
</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2="9">
<subfield code="a">
Combiner un des descripteurs Mouvements contestataires
</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">
Voir les vedettes : Mouvements contestataires ; Opposition
</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">
Voir les vedettes du type : Antifascisme ; Mouvements
</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">
Voir aux mouvements d'opposition particuliers, par ex. : Combat
</subfield>
</datafield>
<datafield tag="360" ind1=" " ind2=" ">
<subfield code="a">
Voir aussi aux mers et océans particuliers
</subfield>
</datafield>
<datafield tag="016" ind1=" " ind2=" ">
<subfield code="9">VF3, NC3, NC30</subfield>
</datafield>
</record>
23 changes: 14 additions & 9 deletions tests/unit/concepts/test_concepts_gnd_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,33 +174,38 @@ def test_gnd_close_match():
"closeMatch": [
{
"authorized_access_point": "Atlases",
"source": "GND",
"source": "DLC",
"identifiedBy": [
{
"source": "GND",
"type": "bf:Nbn",
"value": "(DE-101)1134384173",
"source": "DLC",
"type": "uri",
"value": "http://id.loc.gov/authorities/subjects/sh85009231",
},
{
"source": "DLC",
"type": "bf:Nbn",
"value": "sh85009231",
},
{
"type": "uri",
"value": "http://id.loc.gov/authorities/subjects/sh85009231",
"source": "GND",
"type": "bf:Nbn",
"value": "(DE-101)1134384173",
},
],
}
],
"exactMatch": [
{
"authorized_access_point": "Atlas",
"source": "GND",
"source": "DNLM",
"identifiedBy": [
{"source": "GND", "type": "bf:Nbn", "value": "(DE-101)125348144X"},
{
"source": "DNLM",
"type": "uri",
"value": "http://id.nlm.nih.gov/mesh/D020466",
},
{"source": "DNLM", "type": "bf:Nbn", "value": "D020466"},
{"type": "uri", "value": "http://id.nlm.nih.gov/mesh/D020466"},
{"source": "GND", "type": "bf:Nbn", "value": "(DE-101)125348144X"},
],
},
],
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/places/test_places_gnd_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,17 @@ def test_gnd_close_match():
"exactMatch": [
{
"authorized_access_point": "Venedig",
"source": "GND",
"source": "ZBW",
"identifiedBy": [
{
"source": "GND",
"source": "ZBW",
"type": "bf:Nbn",
"value": "(DE-101)997977663",
"value": "091419204",
},
{
"source": "ZBW",
"source": "GND",
"type": "bf:Nbn",
"value": "091419204",
"value": "(DE-101)997977663",
},
],
}
Expand Down
Loading